out-glue-mnli / trainer_state.json
Tural's picture
End of training
b2fe532
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 2.0,
"global_step": 40920,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9987781036168134e-05,
"loss": 1.0996,
"step": 25
},
{
"epoch": 0.02,
"learning_rate": 1.9975562072336266e-05,
"loss": 1.0049,
"step": 50
},
{
"epoch": 0.04,
"learning_rate": 1.99633431085044e-05,
"loss": 0.8837,
"step": 75
},
{
"epoch": 0.05,
"learning_rate": 1.9951124144672535e-05,
"loss": 0.7945,
"step": 100
},
{
"epoch": 0.06,
"learning_rate": 1.9938905180840667e-05,
"loss": 0.7541,
"step": 125
},
{
"epoch": 0.07,
"learning_rate": 1.99266862170088e-05,
"loss": 0.694,
"step": 150
},
{
"epoch": 0.09,
"learning_rate": 1.9914467253176932e-05,
"loss": 0.6936,
"step": 175
},
{
"epoch": 0.1,
"learning_rate": 1.9902248289345064e-05,
"loss": 0.6625,
"step": 200
},
{
"epoch": 0.11,
"learning_rate": 1.9890029325513197e-05,
"loss": 0.661,
"step": 225
},
{
"epoch": 0.12,
"learning_rate": 1.9877810361681332e-05,
"loss": 0.6525,
"step": 250
},
{
"epoch": 0.13,
"learning_rate": 1.9865591397849465e-05,
"loss": 0.6298,
"step": 275
},
{
"epoch": 0.15,
"learning_rate": 1.9853372434017597e-05,
"loss": 0.6111,
"step": 300
},
{
"epoch": 0.16,
"learning_rate": 1.984115347018573e-05,
"loss": 0.62,
"step": 325
},
{
"epoch": 0.17,
"learning_rate": 1.9828934506353862e-05,
"loss": 0.611,
"step": 350
},
{
"epoch": 0.18,
"learning_rate": 1.9816715542521994e-05,
"loss": 0.5751,
"step": 375
},
{
"epoch": 0.2,
"learning_rate": 1.980449657869013e-05,
"loss": 0.6004,
"step": 400
},
{
"epoch": 0.21,
"learning_rate": 1.9792277614858263e-05,
"loss": 0.5816,
"step": 425
},
{
"epoch": 0.22,
"learning_rate": 1.9780058651026395e-05,
"loss": 0.5813,
"step": 450
},
{
"epoch": 0.23,
"learning_rate": 1.9767839687194527e-05,
"loss": 0.5784,
"step": 475
},
{
"epoch": 0.24,
"learning_rate": 1.975562072336266e-05,
"loss": 0.5869,
"step": 500
},
{
"epoch": 0.26,
"learning_rate": 1.9743401759530792e-05,
"loss": 0.5793,
"step": 525
},
{
"epoch": 0.27,
"learning_rate": 1.9731182795698928e-05,
"loss": 0.5782,
"step": 550
},
{
"epoch": 0.28,
"learning_rate": 1.971896383186706e-05,
"loss": 0.5638,
"step": 575
},
{
"epoch": 0.29,
"learning_rate": 1.9706744868035193e-05,
"loss": 0.5645,
"step": 600
},
{
"epoch": 0.31,
"learning_rate": 1.9694525904203325e-05,
"loss": 0.552,
"step": 625
},
{
"epoch": 0.32,
"learning_rate": 1.9682306940371458e-05,
"loss": 0.539,
"step": 650
},
{
"epoch": 0.33,
"learning_rate": 1.967008797653959e-05,
"loss": 0.5464,
"step": 675
},
{
"epoch": 0.34,
"learning_rate": 1.9657869012707726e-05,
"loss": 0.538,
"step": 700
},
{
"epoch": 0.35,
"learning_rate": 1.9645650048875858e-05,
"loss": 0.5394,
"step": 725
},
{
"epoch": 0.37,
"learning_rate": 1.963343108504399e-05,
"loss": 0.5586,
"step": 750
},
{
"epoch": 0.38,
"learning_rate": 1.9621212121212123e-05,
"loss": 0.5241,
"step": 775
},
{
"epoch": 0.39,
"learning_rate": 1.9608993157380255e-05,
"loss": 0.5304,
"step": 800
},
{
"epoch": 0.4,
"learning_rate": 1.9596774193548388e-05,
"loss": 0.5313,
"step": 825
},
{
"epoch": 0.42,
"learning_rate": 1.9584555229716524e-05,
"loss": 0.5423,
"step": 850
},
{
"epoch": 0.43,
"learning_rate": 1.9572336265884656e-05,
"loss": 0.5086,
"step": 875
},
{
"epoch": 0.44,
"learning_rate": 1.956011730205279e-05,
"loss": 0.5172,
"step": 900
},
{
"epoch": 0.45,
"learning_rate": 1.954789833822092e-05,
"loss": 0.5081,
"step": 925
},
{
"epoch": 0.46,
"learning_rate": 1.9535679374389053e-05,
"loss": 0.5244,
"step": 950
},
{
"epoch": 0.48,
"learning_rate": 1.9523460410557186e-05,
"loss": 0.5371,
"step": 975
},
{
"epoch": 0.49,
"learning_rate": 1.951124144672532e-05,
"loss": 0.5278,
"step": 1000
},
{
"epoch": 0.5,
"learning_rate": 1.9499022482893454e-05,
"loss": 0.5325,
"step": 1025
},
{
"epoch": 0.51,
"learning_rate": 1.9486803519061586e-05,
"loss": 0.5019,
"step": 1050
},
{
"epoch": 0.53,
"learning_rate": 1.947458455522972e-05,
"loss": 0.504,
"step": 1075
},
{
"epoch": 0.54,
"learning_rate": 1.946236559139785e-05,
"loss": 0.5225,
"step": 1100
},
{
"epoch": 0.55,
"learning_rate": 1.9450146627565983e-05,
"loss": 0.5232,
"step": 1125
},
{
"epoch": 0.56,
"learning_rate": 1.9437927663734116e-05,
"loss": 0.5282,
"step": 1150
},
{
"epoch": 0.57,
"learning_rate": 1.942570869990225e-05,
"loss": 0.514,
"step": 1175
},
{
"epoch": 0.59,
"learning_rate": 1.9413489736070384e-05,
"loss": 0.4916,
"step": 1200
},
{
"epoch": 0.6,
"learning_rate": 1.9401270772238516e-05,
"loss": 0.5079,
"step": 1225
},
{
"epoch": 0.61,
"learning_rate": 1.938905180840665e-05,
"loss": 0.5221,
"step": 1250
},
{
"epoch": 0.62,
"learning_rate": 1.937683284457478e-05,
"loss": 0.5097,
"step": 1275
},
{
"epoch": 0.64,
"learning_rate": 1.9364613880742914e-05,
"loss": 0.512,
"step": 1300
},
{
"epoch": 0.65,
"learning_rate": 1.935239491691105e-05,
"loss": 0.5001,
"step": 1325
},
{
"epoch": 0.66,
"learning_rate": 1.9340175953079182e-05,
"loss": 0.5021,
"step": 1350
},
{
"epoch": 0.67,
"learning_rate": 1.9327956989247314e-05,
"loss": 0.5044,
"step": 1375
},
{
"epoch": 0.68,
"learning_rate": 1.9315738025415447e-05,
"loss": 0.4943,
"step": 1400
},
{
"epoch": 0.7,
"learning_rate": 1.930351906158358e-05,
"loss": 0.4935,
"step": 1425
},
{
"epoch": 0.71,
"learning_rate": 1.929130009775171e-05,
"loss": 0.5144,
"step": 1450
},
{
"epoch": 0.72,
"learning_rate": 1.9279081133919847e-05,
"loss": 0.506,
"step": 1475
},
{
"epoch": 0.73,
"learning_rate": 1.926686217008798e-05,
"loss": 0.5079,
"step": 1500
},
{
"epoch": 0.75,
"learning_rate": 1.9254643206256112e-05,
"loss": 0.4939,
"step": 1525
},
{
"epoch": 0.76,
"learning_rate": 1.9242424242424244e-05,
"loss": 0.4874,
"step": 1550
},
{
"epoch": 0.77,
"learning_rate": 1.9230205278592377e-05,
"loss": 0.4994,
"step": 1575
},
{
"epoch": 0.78,
"learning_rate": 1.921798631476051e-05,
"loss": 0.4818,
"step": 1600
},
{
"epoch": 0.79,
"learning_rate": 1.9205767350928645e-05,
"loss": 0.5026,
"step": 1625
},
{
"epoch": 0.81,
"learning_rate": 1.9193548387096777e-05,
"loss": 0.4634,
"step": 1650
},
{
"epoch": 0.82,
"learning_rate": 1.918132942326491e-05,
"loss": 0.4824,
"step": 1675
},
{
"epoch": 0.83,
"learning_rate": 1.9169110459433042e-05,
"loss": 0.472,
"step": 1700
},
{
"epoch": 0.84,
"learning_rate": 1.9156891495601175e-05,
"loss": 0.4811,
"step": 1725
},
{
"epoch": 0.86,
"learning_rate": 1.9144672531769307e-05,
"loss": 0.4763,
"step": 1750
},
{
"epoch": 0.87,
"learning_rate": 1.9132453567937443e-05,
"loss": 0.4809,
"step": 1775
},
{
"epoch": 0.88,
"learning_rate": 1.9120234604105575e-05,
"loss": 0.5,
"step": 1800
},
{
"epoch": 0.89,
"learning_rate": 1.9108015640273708e-05,
"loss": 0.4846,
"step": 1825
},
{
"epoch": 0.9,
"learning_rate": 1.909579667644184e-05,
"loss": 0.4624,
"step": 1850
},
{
"epoch": 0.92,
"learning_rate": 1.9083577712609972e-05,
"loss": 0.4824,
"step": 1875
},
{
"epoch": 0.93,
"learning_rate": 1.9071358748778105e-05,
"loss": 0.4708,
"step": 1900
},
{
"epoch": 0.94,
"learning_rate": 1.905913978494624e-05,
"loss": 0.4884,
"step": 1925
},
{
"epoch": 0.95,
"learning_rate": 1.9046920821114373e-05,
"loss": 0.4768,
"step": 1950
},
{
"epoch": 0.97,
"learning_rate": 1.9034701857282505e-05,
"loss": 0.4583,
"step": 1975
},
{
"epoch": 0.98,
"learning_rate": 1.9022482893450638e-05,
"loss": 0.4634,
"step": 2000
},
{
"epoch": 0.99,
"learning_rate": 1.901026392961877e-05,
"loss": 0.4741,
"step": 2025
},
{
"epoch": 1.0,
"learning_rate": 1.8998044965786903e-05,
"loss": 0.458,
"step": 2050
},
{
"epoch": 1.01,
"learning_rate": 1.8985826001955038e-05,
"loss": 0.3961,
"step": 2075
},
{
"epoch": 1.03,
"learning_rate": 1.897360703812317e-05,
"loss": 0.3929,
"step": 2100
},
{
"epoch": 1.04,
"learning_rate": 1.8961388074291303e-05,
"loss": 0.4265,
"step": 2125
},
{
"epoch": 1.05,
"learning_rate": 1.8949169110459435e-05,
"loss": 0.4049,
"step": 2150
},
{
"epoch": 1.06,
"learning_rate": 1.8936950146627568e-05,
"loss": 0.4171,
"step": 2175
},
{
"epoch": 1.08,
"learning_rate": 1.89247311827957e-05,
"loss": 0.4024,
"step": 2200
},
{
"epoch": 1.09,
"learning_rate": 1.8912512218963833e-05,
"loss": 0.403,
"step": 2225
},
{
"epoch": 1.1,
"learning_rate": 1.890029325513197e-05,
"loss": 0.4105,
"step": 2250
},
{
"epoch": 1.11,
"learning_rate": 1.88880742913001e-05,
"loss": 0.3956,
"step": 2275
},
{
"epoch": 1.12,
"learning_rate": 1.8875855327468233e-05,
"loss": 0.397,
"step": 2300
},
{
"epoch": 1.14,
"learning_rate": 1.8863636363636366e-05,
"loss": 0.3883,
"step": 2325
},
{
"epoch": 1.15,
"learning_rate": 1.8851417399804498e-05,
"loss": 0.4056,
"step": 2350
},
{
"epoch": 1.16,
"learning_rate": 1.883919843597263e-05,
"loss": 0.3884,
"step": 2375
},
{
"epoch": 1.17,
"learning_rate": 1.8826979472140766e-05,
"loss": 0.4129,
"step": 2400
},
{
"epoch": 1.19,
"learning_rate": 1.88147605083089e-05,
"loss": 0.4097,
"step": 2425
},
{
"epoch": 1.2,
"learning_rate": 1.880254154447703e-05,
"loss": 0.4036,
"step": 2450
},
{
"epoch": 1.21,
"learning_rate": 1.8790322580645163e-05,
"loss": 0.3894,
"step": 2475
},
{
"epoch": 1.22,
"learning_rate": 1.8778103616813296e-05,
"loss": 0.4147,
"step": 2500
},
{
"epoch": 1.23,
"learning_rate": 1.8765884652981428e-05,
"loss": 0.409,
"step": 2525
},
{
"epoch": 1.25,
"learning_rate": 1.8753665689149564e-05,
"loss": 0.4106,
"step": 2550
},
{
"epoch": 1.26,
"learning_rate": 1.8741446725317693e-05,
"loss": 0.3971,
"step": 2575
},
{
"epoch": 1.27,
"learning_rate": 1.8729227761485825e-05,
"loss": 0.3995,
"step": 2600
},
{
"epoch": 1.28,
"learning_rate": 1.8717008797653958e-05,
"loss": 0.3947,
"step": 2625
},
{
"epoch": 1.3,
"learning_rate": 1.870527859237537e-05,
"loss": 0.4141,
"step": 2650
},
{
"epoch": 1.31,
"learning_rate": 1.8693059628543502e-05,
"loss": 0.4024,
"step": 2675
},
{
"epoch": 1.32,
"learning_rate": 1.8680840664711635e-05,
"loss": 0.4074,
"step": 2700
},
{
"epoch": 1.33,
"learning_rate": 1.8668621700879767e-05,
"loss": 0.3953,
"step": 2725
},
{
"epoch": 1.34,
"learning_rate": 1.86564027370479e-05,
"loss": 0.3861,
"step": 2750
},
{
"epoch": 1.36,
"learning_rate": 1.8644183773216032e-05,
"loss": 0.3939,
"step": 2775
},
{
"epoch": 1.37,
"learning_rate": 1.8631964809384168e-05,
"loss": 0.4088,
"step": 2800
},
{
"epoch": 1.38,
"learning_rate": 1.86197458455523e-05,
"loss": 0.4089,
"step": 2825
},
{
"epoch": 1.39,
"learning_rate": 1.8607526881720433e-05,
"loss": 0.3987,
"step": 2850
},
{
"epoch": 1.41,
"learning_rate": 1.8595307917888565e-05,
"loss": 0.3881,
"step": 2875
},
{
"epoch": 1.42,
"learning_rate": 1.8583088954056697e-05,
"loss": 0.3877,
"step": 2900
},
{
"epoch": 1.43,
"learning_rate": 1.857086999022483e-05,
"loss": 0.3996,
"step": 2925
},
{
"epoch": 1.44,
"learning_rate": 1.8558651026392966e-05,
"loss": 0.3979,
"step": 2950
},
{
"epoch": 1.45,
"learning_rate": 1.8546432062561098e-05,
"loss": 0.4009,
"step": 2975
},
{
"epoch": 1.47,
"learning_rate": 1.853421309872923e-05,
"loss": 0.4069,
"step": 3000
},
{
"epoch": 1.48,
"learning_rate": 1.8521994134897363e-05,
"loss": 0.3725,
"step": 3025
},
{
"epoch": 1.49,
"learning_rate": 1.8509775171065495e-05,
"loss": 0.3899,
"step": 3050
},
{
"epoch": 1.5,
"learning_rate": 1.8497556207233628e-05,
"loss": 0.3916,
"step": 3075
},
{
"epoch": 1.52,
"learning_rate": 1.8485337243401763e-05,
"loss": 0.4114,
"step": 3100
},
{
"epoch": 1.53,
"learning_rate": 1.8473118279569896e-05,
"loss": 0.3945,
"step": 3125
},
{
"epoch": 1.54,
"learning_rate": 1.8460899315738028e-05,
"loss": 0.3984,
"step": 3150
},
{
"epoch": 1.55,
"learning_rate": 1.844868035190616e-05,
"loss": 0.3871,
"step": 3175
},
{
"epoch": 1.56,
"learning_rate": 1.8436461388074293e-05,
"loss": 0.3859,
"step": 3200
},
{
"epoch": 1.58,
"learning_rate": 1.8424242424242425e-05,
"loss": 0.3926,
"step": 3225
},
{
"epoch": 1.59,
"learning_rate": 1.841202346041056e-05,
"loss": 0.3941,
"step": 3250
},
{
"epoch": 1.6,
"learning_rate": 1.8399804496578694e-05,
"loss": 0.3922,
"step": 3275
},
{
"epoch": 1.61,
"learning_rate": 1.8387585532746826e-05,
"loss": 0.3847,
"step": 3300
},
{
"epoch": 1.63,
"learning_rate": 1.837536656891496e-05,
"loss": 0.3709,
"step": 3325
},
{
"epoch": 1.64,
"learning_rate": 1.836314760508309e-05,
"loss": 0.3963,
"step": 3350
},
{
"epoch": 1.65,
"learning_rate": 1.8350928641251223e-05,
"loss": 0.3639,
"step": 3375
},
{
"epoch": 1.66,
"learning_rate": 1.833870967741936e-05,
"loss": 0.4072,
"step": 3400
},
{
"epoch": 1.67,
"learning_rate": 1.832649071358749e-05,
"loss": 0.3918,
"step": 3425
},
{
"epoch": 1.69,
"learning_rate": 1.8314271749755624e-05,
"loss": 0.3847,
"step": 3450
},
{
"epoch": 1.7,
"learning_rate": 1.8302052785923756e-05,
"loss": 0.393,
"step": 3475
},
{
"epoch": 1.71,
"learning_rate": 1.828983382209189e-05,
"loss": 0.4028,
"step": 3500
},
{
"epoch": 1.72,
"learning_rate": 1.827761485826002e-05,
"loss": 0.3967,
"step": 3525
},
{
"epoch": 1.74,
"learning_rate": 1.8265395894428157e-05,
"loss": 0.3993,
"step": 3550
},
{
"epoch": 1.75,
"learning_rate": 1.825317693059629e-05,
"loss": 0.3854,
"step": 3575
},
{
"epoch": 1.76,
"learning_rate": 1.8240957966764418e-05,
"loss": 0.3644,
"step": 3600
},
{
"epoch": 1.77,
"learning_rate": 1.822873900293255e-05,
"loss": 0.4081,
"step": 3625
},
{
"epoch": 1.78,
"learning_rate": 1.8216520039100686e-05,
"loss": 0.3836,
"step": 3650
},
{
"epoch": 1.8,
"learning_rate": 1.820430107526882e-05,
"loss": 0.3927,
"step": 3675
},
{
"epoch": 1.81,
"learning_rate": 1.819208211143695e-05,
"loss": 0.4068,
"step": 3700
},
{
"epoch": 1.82,
"learning_rate": 1.8179863147605084e-05,
"loss": 0.4027,
"step": 3725
},
{
"epoch": 1.83,
"learning_rate": 1.8167644183773216e-05,
"loss": 0.3736,
"step": 3750
},
{
"epoch": 1.85,
"learning_rate": 1.815542521994135e-05,
"loss": 0.3916,
"step": 3775
},
{
"epoch": 1.86,
"learning_rate": 1.814320625610948e-05,
"loss": 0.3872,
"step": 3800
},
{
"epoch": 1.87,
"learning_rate": 1.8130987292277617e-05,
"loss": 0.3867,
"step": 3825
},
{
"epoch": 1.88,
"learning_rate": 1.811876832844575e-05,
"loss": 0.3879,
"step": 3850
},
{
"epoch": 1.89,
"learning_rate": 1.810654936461388e-05,
"loss": 0.4013,
"step": 3875
},
{
"epoch": 1.91,
"learning_rate": 1.8094330400782014e-05,
"loss": 0.3964,
"step": 3900
},
{
"epoch": 1.92,
"learning_rate": 1.8082111436950146e-05,
"loss": 0.4086,
"step": 3925
},
{
"epoch": 1.93,
"learning_rate": 1.806989247311828e-05,
"loss": 0.3772,
"step": 3950
},
{
"epoch": 1.94,
"learning_rate": 1.8057673509286414e-05,
"loss": 0.3886,
"step": 3975
},
{
"epoch": 1.96,
"learning_rate": 1.8045454545454547e-05,
"loss": 0.3952,
"step": 4000
},
{
"epoch": 1.97,
"learning_rate": 1.803323558162268e-05,
"loss": 0.3962,
"step": 4025
},
{
"epoch": 1.98,
"learning_rate": 1.802101661779081e-05,
"loss": 0.3772,
"step": 4050
},
{
"epoch": 1.99,
"learning_rate": 1.8008797653958944e-05,
"loss": 0.3781,
"step": 4075
},
{
"epoch": 2.0,
"learning_rate": 1.7996578690127076e-05,
"loss": 0.3505,
"step": 4100
},
{
"epoch": 2.02,
"learning_rate": 1.7984359726295212e-05,
"loss": 0.3105,
"step": 4125
},
{
"epoch": 2.03,
"learning_rate": 1.7972140762463344e-05,
"loss": 0.3035,
"step": 4150
},
{
"epoch": 2.04,
"learning_rate": 1.7959921798631477e-05,
"loss": 0.3081,
"step": 4175
},
{
"epoch": 2.05,
"learning_rate": 1.794770283479961e-05,
"loss": 0.3007,
"step": 4200
},
{
"epoch": 2.07,
"learning_rate": 1.7935483870967742e-05,
"loss": 0.3166,
"step": 4225
},
{
"epoch": 2.08,
"learning_rate": 1.7923264907135874e-05,
"loss": 0.3119,
"step": 4250
},
{
"epoch": 2.09,
"learning_rate": 1.791104594330401e-05,
"loss": 0.2894,
"step": 4275
},
{
"epoch": 2.1,
"learning_rate": 1.7898826979472142e-05,
"loss": 0.3103,
"step": 4300
},
{
"epoch": 2.11,
"learning_rate": 1.7886608015640275e-05,
"loss": 0.3049,
"step": 4325
},
{
"epoch": 2.13,
"learning_rate": 1.7874389051808407e-05,
"loss": 0.2996,
"step": 4350
},
{
"epoch": 2.14,
"learning_rate": 1.786217008797654e-05,
"loss": 0.3012,
"step": 4375
},
{
"epoch": 2.15,
"learning_rate": 1.7849951124144672e-05,
"loss": 0.2915,
"step": 4400
},
{
"epoch": 2.16,
"learning_rate": 1.7837732160312808e-05,
"loss": 0.3193,
"step": 4425
},
{
"epoch": 2.17,
"learning_rate": 1.782551319648094e-05,
"loss": 0.3132,
"step": 4450
},
{
"epoch": 2.19,
"learning_rate": 1.7813294232649072e-05,
"loss": 0.3209,
"step": 4475
},
{
"epoch": 2.2,
"learning_rate": 1.7801075268817205e-05,
"loss": 0.2968,
"step": 4500
},
{
"epoch": 2.21,
"learning_rate": 1.7788856304985337e-05,
"loss": 0.3092,
"step": 4525
},
{
"epoch": 2.22,
"learning_rate": 1.777663734115347e-05,
"loss": 0.2926,
"step": 4550
},
{
"epoch": 2.24,
"learning_rate": 1.7764418377321605e-05,
"loss": 0.3175,
"step": 4575
},
{
"epoch": 2.25,
"learning_rate": 1.7752199413489738e-05,
"loss": 0.3062,
"step": 4600
},
{
"epoch": 2.26,
"learning_rate": 1.773998044965787e-05,
"loss": 0.3107,
"step": 4625
},
{
"epoch": 2.27,
"learning_rate": 1.7727761485826003e-05,
"loss": 0.3072,
"step": 4650
},
{
"epoch": 2.28,
"learning_rate": 1.7715542521994135e-05,
"loss": 0.3166,
"step": 4675
},
{
"epoch": 2.3,
"learning_rate": 1.7703323558162267e-05,
"loss": 0.3181,
"step": 4700
},
{
"epoch": 2.31,
"learning_rate": 1.7691104594330403e-05,
"loss": 0.2843,
"step": 4725
},
{
"epoch": 2.32,
"learning_rate": 1.7678885630498536e-05,
"loss": 0.3103,
"step": 4750
},
{
"epoch": 2.33,
"learning_rate": 1.7666666666666668e-05,
"loss": 0.3039,
"step": 4775
},
{
"epoch": 2.35,
"learning_rate": 1.76544477028348e-05,
"loss": 0.3099,
"step": 4800
},
{
"epoch": 2.36,
"learning_rate": 1.7642228739002933e-05,
"loss": 0.2989,
"step": 4825
},
{
"epoch": 2.37,
"learning_rate": 1.7630009775171065e-05,
"loss": 0.3132,
"step": 4850
},
{
"epoch": 2.38,
"learning_rate": 1.7617790811339198e-05,
"loss": 0.2937,
"step": 4875
},
{
"epoch": 2.39,
"learning_rate": 1.7605571847507333e-05,
"loss": 0.3067,
"step": 4900
},
{
"epoch": 2.41,
"learning_rate": 1.7593352883675466e-05,
"loss": 0.3163,
"step": 4925
},
{
"epoch": 2.42,
"learning_rate": 1.7581133919843598e-05,
"loss": 0.3078,
"step": 4950
},
{
"epoch": 2.43,
"learning_rate": 1.756891495601173e-05,
"loss": 0.3114,
"step": 4975
},
{
"epoch": 2.44,
"learning_rate": 1.7556695992179863e-05,
"loss": 0.3121,
"step": 5000
},
{
"epoch": 2.46,
"learning_rate": 1.7544965786901272e-05,
"loss": 0.3018,
"step": 5025
},
{
"epoch": 2.47,
"learning_rate": 1.7532746823069404e-05,
"loss": 0.3122,
"step": 5050
},
{
"epoch": 2.48,
"learning_rate": 1.7520527859237537e-05,
"loss": 0.3052,
"step": 5075
},
{
"epoch": 2.49,
"learning_rate": 1.750830889540567e-05,
"loss": 0.3206,
"step": 5100
},
{
"epoch": 2.5,
"learning_rate": 1.7496089931573805e-05,
"loss": 0.3136,
"step": 5125
},
{
"epoch": 2.52,
"learning_rate": 1.7483870967741937e-05,
"loss": 0.316,
"step": 5150
},
{
"epoch": 2.53,
"learning_rate": 1.747165200391007e-05,
"loss": 0.3142,
"step": 5175
},
{
"epoch": 2.54,
"learning_rate": 1.7459433040078202e-05,
"loss": 0.3104,
"step": 5200
},
{
"epoch": 2.55,
"learning_rate": 1.7447214076246334e-05,
"loss": 0.3,
"step": 5225
},
{
"epoch": 2.57,
"learning_rate": 1.7434995112414467e-05,
"loss": 0.3027,
"step": 5250
},
{
"epoch": 2.58,
"learning_rate": 1.74227761485826e-05,
"loss": 0.3097,
"step": 5275
},
{
"epoch": 2.59,
"learning_rate": 1.7410557184750735e-05,
"loss": 0.314,
"step": 5300
},
{
"epoch": 2.6,
"learning_rate": 1.7398338220918867e-05,
"loss": 0.3252,
"step": 5325
},
{
"epoch": 2.61,
"learning_rate": 1.7386119257087e-05,
"loss": 0.3158,
"step": 5350
},
{
"epoch": 2.63,
"learning_rate": 1.7373900293255132e-05,
"loss": 0.3041,
"step": 5375
},
{
"epoch": 2.64,
"learning_rate": 1.7361681329423265e-05,
"loss": 0.3214,
"step": 5400
},
{
"epoch": 2.65,
"learning_rate": 1.7349462365591397e-05,
"loss": 0.3352,
"step": 5425
},
{
"epoch": 2.66,
"learning_rate": 1.7337243401759533e-05,
"loss": 0.3188,
"step": 5450
},
{
"epoch": 2.68,
"learning_rate": 1.7325024437927665e-05,
"loss": 0.3087,
"step": 5475
},
{
"epoch": 2.69,
"learning_rate": 1.7312805474095798e-05,
"loss": 0.3064,
"step": 5500
},
{
"epoch": 2.7,
"learning_rate": 1.730058651026393e-05,
"loss": 0.3153,
"step": 5525
},
{
"epoch": 2.71,
"learning_rate": 1.7288367546432062e-05,
"loss": 0.3084,
"step": 5550
},
{
"epoch": 2.72,
"learning_rate": 1.7276148582600195e-05,
"loss": 0.292,
"step": 5575
},
{
"epoch": 2.74,
"learning_rate": 1.726392961876833e-05,
"loss": 0.3167,
"step": 5600
},
{
"epoch": 2.75,
"learning_rate": 1.7251710654936463e-05,
"loss": 0.3141,
"step": 5625
},
{
"epoch": 2.76,
"learning_rate": 1.7239491691104595e-05,
"loss": 0.2913,
"step": 5650
},
{
"epoch": 2.77,
"learning_rate": 1.7227272727272728e-05,
"loss": 0.3279,
"step": 5675
},
{
"epoch": 2.79,
"learning_rate": 1.721505376344086e-05,
"loss": 0.3225,
"step": 5700
},
{
"epoch": 2.8,
"learning_rate": 1.7202834799608993e-05,
"loss": 0.306,
"step": 5725
},
{
"epoch": 2.81,
"learning_rate": 1.719061583577713e-05,
"loss": 0.3322,
"step": 5750
},
{
"epoch": 2.82,
"learning_rate": 1.717839687194526e-05,
"loss": 0.3102,
"step": 5775
},
{
"epoch": 2.83,
"learning_rate": 1.7166177908113393e-05,
"loss": 0.3123,
"step": 5800
},
{
"epoch": 2.85,
"learning_rate": 1.7153958944281526e-05,
"loss": 0.3077,
"step": 5825
},
{
"epoch": 2.86,
"learning_rate": 1.7141739980449658e-05,
"loss": 0.3223,
"step": 5850
},
{
"epoch": 2.87,
"learning_rate": 1.712952101661779e-05,
"loss": 0.3178,
"step": 5875
},
{
"epoch": 2.88,
"learning_rate": 1.7117302052785926e-05,
"loss": 0.3097,
"step": 5900
},
{
"epoch": 2.9,
"learning_rate": 1.710508308895406e-05,
"loss": 0.3047,
"step": 5925
},
{
"epoch": 2.91,
"learning_rate": 1.709286412512219e-05,
"loss": 0.3316,
"step": 5950
},
{
"epoch": 2.92,
"learning_rate": 1.7080645161290323e-05,
"loss": 0.3107,
"step": 5975
},
{
"epoch": 2.93,
"learning_rate": 1.7068426197458456e-05,
"loss": 0.3037,
"step": 6000
},
{
"epoch": 2.94,
"learning_rate": 1.7056207233626588e-05,
"loss": 0.3075,
"step": 6025
},
{
"epoch": 2.96,
"learning_rate": 1.7043988269794724e-05,
"loss": 0.3249,
"step": 6050
},
{
"epoch": 2.97,
"learning_rate": 1.7031769305962856e-05,
"loss": 0.3078,
"step": 6075
},
{
"epoch": 2.98,
"learning_rate": 1.701955034213099e-05,
"loss": 0.3015,
"step": 6100
},
{
"epoch": 2.99,
"learning_rate": 1.700733137829912e-05,
"loss": 0.3186,
"step": 6125
},
{
"epoch": 3.01,
"learning_rate": 1.6995112414467254e-05,
"loss": 0.2948,
"step": 6150
},
{
"epoch": 3.02,
"learning_rate": 1.6982893450635386e-05,
"loss": 0.2312,
"step": 6175
},
{
"epoch": 3.03,
"learning_rate": 1.697067448680352e-05,
"loss": 0.2379,
"step": 6200
},
{
"epoch": 3.04,
"learning_rate": 1.6958455522971654e-05,
"loss": 0.24,
"step": 6225
},
{
"epoch": 3.05,
"learning_rate": 1.6946236559139786e-05,
"loss": 0.2301,
"step": 6250
},
{
"epoch": 3.07,
"learning_rate": 1.693401759530792e-05,
"loss": 0.2257,
"step": 6275
},
{
"epoch": 3.08,
"learning_rate": 1.692179863147605e-05,
"loss": 0.2318,
"step": 6300
},
{
"epoch": 3.09,
"learning_rate": 1.6909579667644184e-05,
"loss": 0.2417,
"step": 6325
},
{
"epoch": 3.1,
"learning_rate": 1.6897360703812316e-05,
"loss": 0.2334,
"step": 6350
},
{
"epoch": 3.12,
"learning_rate": 1.6885141739980452e-05,
"loss": 0.2507,
"step": 6375
},
{
"epoch": 3.13,
"learning_rate": 1.6872922776148584e-05,
"loss": 0.2401,
"step": 6400
},
{
"epoch": 3.14,
"learning_rate": 1.6860703812316717e-05,
"loss": 0.2264,
"step": 6425
},
{
"epoch": 3.15,
"learning_rate": 1.684848484848485e-05,
"loss": 0.2395,
"step": 6450
},
{
"epoch": 3.16,
"learning_rate": 1.683626588465298e-05,
"loss": 0.2282,
"step": 6475
},
{
"epoch": 3.18,
"learning_rate": 1.6824046920821114e-05,
"loss": 0.237,
"step": 6500
},
{
"epoch": 3.19,
"learning_rate": 1.681182795698925e-05,
"loss": 0.2148,
"step": 6525
},
{
"epoch": 3.2,
"learning_rate": 1.6799608993157382e-05,
"loss": 0.2392,
"step": 6550
},
{
"epoch": 3.21,
"learning_rate": 1.6787390029325514e-05,
"loss": 0.2331,
"step": 6575
},
{
"epoch": 3.23,
"learning_rate": 1.6775171065493647e-05,
"loss": 0.2397,
"step": 6600
},
{
"epoch": 3.24,
"learning_rate": 1.676295210166178e-05,
"loss": 0.2398,
"step": 6625
},
{
"epoch": 3.25,
"learning_rate": 1.675073313782991e-05,
"loss": 0.2326,
"step": 6650
},
{
"epoch": 3.26,
"learning_rate": 1.6738514173998047e-05,
"loss": 0.2549,
"step": 6675
},
{
"epoch": 3.27,
"learning_rate": 1.672629521016618e-05,
"loss": 0.2261,
"step": 6700
},
{
"epoch": 3.29,
"learning_rate": 1.6714076246334312e-05,
"loss": 0.2366,
"step": 6725
},
{
"epoch": 3.3,
"learning_rate": 1.6701857282502445e-05,
"loss": 0.2504,
"step": 6750
},
{
"epoch": 3.31,
"learning_rate": 1.6689638318670577e-05,
"loss": 0.2381,
"step": 6775
},
{
"epoch": 3.32,
"learning_rate": 1.667741935483871e-05,
"loss": 0.2503,
"step": 6800
},
{
"epoch": 3.34,
"learning_rate": 1.6665200391006845e-05,
"loss": 0.2169,
"step": 6825
},
{
"epoch": 3.35,
"learning_rate": 1.6652981427174978e-05,
"loss": 0.2441,
"step": 6850
},
{
"epoch": 3.36,
"learning_rate": 1.664076246334311e-05,
"loss": 0.2348,
"step": 6875
},
{
"epoch": 3.37,
"learning_rate": 1.6628543499511242e-05,
"loss": 0.2479,
"step": 6900
},
{
"epoch": 3.38,
"learning_rate": 1.6616324535679375e-05,
"loss": 0.2479,
"step": 6925
},
{
"epoch": 3.4,
"learning_rate": 1.6604105571847507e-05,
"loss": 0.258,
"step": 6950
},
{
"epoch": 3.41,
"learning_rate": 1.6591886608015643e-05,
"loss": 0.2269,
"step": 6975
},
{
"epoch": 3.42,
"learning_rate": 1.6579667644183775e-05,
"loss": 0.2448,
"step": 7000
},
{
"epoch": 3.43,
"learning_rate": 1.6567448680351908e-05,
"loss": 0.2293,
"step": 7025
},
{
"epoch": 3.45,
"learning_rate": 1.655522971652004e-05,
"loss": 0.2407,
"step": 7050
},
{
"epoch": 3.46,
"learning_rate": 1.6543010752688173e-05,
"loss": 0.2318,
"step": 7075
},
{
"epoch": 3.47,
"learning_rate": 1.6530791788856305e-05,
"loss": 0.2363,
"step": 7100
},
{
"epoch": 3.48,
"learning_rate": 1.651857282502444e-05,
"loss": 0.2474,
"step": 7125
},
{
"epoch": 3.49,
"learning_rate": 1.6506353861192573e-05,
"loss": 0.2394,
"step": 7150
},
{
"epoch": 3.51,
"learning_rate": 1.6494134897360706e-05,
"loss": 0.2434,
"step": 7175
},
{
"epoch": 3.52,
"learning_rate": 1.6481915933528838e-05,
"loss": 0.2403,
"step": 7200
},
{
"epoch": 3.53,
"learning_rate": 1.646969696969697e-05,
"loss": 0.2456,
"step": 7225
},
{
"epoch": 3.54,
"learning_rate": 1.6457478005865103e-05,
"loss": 0.2441,
"step": 7250
},
{
"epoch": 3.56,
"learning_rate": 1.644525904203324e-05,
"loss": 0.2332,
"step": 7275
},
{
"epoch": 3.57,
"learning_rate": 1.643304007820137e-05,
"loss": 0.23,
"step": 7300
},
{
"epoch": 3.58,
"learning_rate": 1.6420821114369503e-05,
"loss": 0.2293,
"step": 7325
},
{
"epoch": 3.59,
"learning_rate": 1.640909090909091e-05,
"loss": 0.2224,
"step": 7350
},
{
"epoch": 3.6,
"learning_rate": 1.6396871945259045e-05,
"loss": 0.2324,
"step": 7375
},
{
"epoch": 3.62,
"learning_rate": 1.6384652981427177e-05,
"loss": 0.2409,
"step": 7400
},
{
"epoch": 3.63,
"learning_rate": 1.637243401759531e-05,
"loss": 0.2462,
"step": 7425
},
{
"epoch": 3.64,
"learning_rate": 1.6360215053763442e-05,
"loss": 0.2375,
"step": 7450
},
{
"epoch": 3.65,
"learning_rate": 1.6347996089931574e-05,
"loss": 0.2445,
"step": 7475
},
{
"epoch": 3.67,
"learning_rate": 1.6335777126099707e-05,
"loss": 0.2364,
"step": 7500
},
{
"epoch": 3.68,
"learning_rate": 1.6323558162267842e-05,
"loss": 0.2374,
"step": 7525
},
{
"epoch": 3.69,
"learning_rate": 1.6311339198435975e-05,
"loss": 0.2383,
"step": 7550
},
{
"epoch": 3.7,
"learning_rate": 1.6299120234604107e-05,
"loss": 0.2312,
"step": 7575
},
{
"epoch": 3.71,
"learning_rate": 1.628690127077224e-05,
"loss": 0.2475,
"step": 7600
},
{
"epoch": 3.73,
"learning_rate": 1.6274682306940372e-05,
"loss": 0.2552,
"step": 7625
},
{
"epoch": 3.74,
"learning_rate": 1.6262463343108504e-05,
"loss": 0.2507,
"step": 7650
},
{
"epoch": 3.75,
"learning_rate": 1.625024437927664e-05,
"loss": 0.248,
"step": 7675
},
{
"epoch": 3.76,
"learning_rate": 1.6238025415444773e-05,
"loss": 0.2333,
"step": 7700
},
{
"epoch": 3.78,
"learning_rate": 1.6225806451612905e-05,
"loss": 0.2424,
"step": 7725
},
{
"epoch": 3.79,
"learning_rate": 1.6213587487781037e-05,
"loss": 0.2565,
"step": 7750
},
{
"epoch": 3.8,
"learning_rate": 1.620136852394917e-05,
"loss": 0.2568,
"step": 7775
},
{
"epoch": 3.81,
"learning_rate": 1.6189149560117302e-05,
"loss": 0.2409,
"step": 7800
},
{
"epoch": 3.82,
"learning_rate": 1.6176930596285435e-05,
"loss": 0.2444,
"step": 7825
},
{
"epoch": 3.84,
"learning_rate": 1.616471163245357e-05,
"loss": 0.243,
"step": 7850
},
{
"epoch": 3.85,
"learning_rate": 1.6152492668621703e-05,
"loss": 0.2417,
"step": 7875
},
{
"epoch": 3.86,
"learning_rate": 1.6140273704789835e-05,
"loss": 0.2246,
"step": 7900
},
{
"epoch": 3.87,
"learning_rate": 1.6128054740957968e-05,
"loss": 0.2533,
"step": 7925
},
{
"epoch": 3.89,
"learning_rate": 1.61158357771261e-05,
"loss": 0.2508,
"step": 7950
},
{
"epoch": 3.9,
"learning_rate": 1.6103616813294232e-05,
"loss": 0.2384,
"step": 7975
},
{
"epoch": 3.91,
"learning_rate": 1.6091397849462368e-05,
"loss": 0.2531,
"step": 8000
},
{
"epoch": 3.92,
"learning_rate": 1.60791788856305e-05,
"loss": 0.2448,
"step": 8025
},
{
"epoch": 3.93,
"learning_rate": 1.6066959921798633e-05,
"loss": 0.241,
"step": 8050
},
{
"epoch": 3.95,
"learning_rate": 1.6054740957966765e-05,
"loss": 0.2566,
"step": 8075
},
{
"epoch": 3.96,
"learning_rate": 1.6042521994134898e-05,
"loss": 0.2297,
"step": 8100
},
{
"epoch": 3.97,
"learning_rate": 1.603030303030303e-05,
"loss": 0.2446,
"step": 8125
},
{
"epoch": 3.98,
"learning_rate": 1.6018084066471166e-05,
"loss": 0.2305,
"step": 8150
},
{
"epoch": 4.0,
"learning_rate": 1.6005865102639298e-05,
"loss": 0.2438,
"step": 8175
},
{
"epoch": 4.01,
"learning_rate": 1.599364613880743e-05,
"loss": 0.2059,
"step": 8200
},
{
"epoch": 4.02,
"learning_rate": 1.5981427174975563e-05,
"loss": 0.1706,
"step": 8225
},
{
"epoch": 4.03,
"learning_rate": 1.5969208211143695e-05,
"loss": 0.1822,
"step": 8250
},
{
"epoch": 4.04,
"learning_rate": 1.5956989247311828e-05,
"loss": 0.1947,
"step": 8275
},
{
"epoch": 4.06,
"learning_rate": 1.5944770283479964e-05,
"loss": 0.1714,
"step": 8300
},
{
"epoch": 4.07,
"learning_rate": 1.5932551319648096e-05,
"loss": 0.1776,
"step": 8325
},
{
"epoch": 4.08,
"learning_rate": 1.592033235581623e-05,
"loss": 0.1974,
"step": 8350
},
{
"epoch": 4.09,
"learning_rate": 1.590811339198436e-05,
"loss": 0.1898,
"step": 8375
},
{
"epoch": 4.11,
"learning_rate": 1.5895894428152493e-05,
"loss": 0.1747,
"step": 8400
},
{
"epoch": 4.12,
"learning_rate": 1.5883675464320626e-05,
"loss": 0.1692,
"step": 8425
},
{
"epoch": 4.13,
"learning_rate": 1.587145650048876e-05,
"loss": 0.1773,
"step": 8450
},
{
"epoch": 4.14,
"learning_rate": 1.5859237536656894e-05,
"loss": 0.1605,
"step": 8475
},
{
"epoch": 4.15,
"learning_rate": 1.5847018572825026e-05,
"loss": 0.1773,
"step": 8500
},
{
"epoch": 4.17,
"learning_rate": 1.583479960899316e-05,
"loss": 0.1778,
"step": 8525
},
{
"epoch": 4.18,
"learning_rate": 1.582258064516129e-05,
"loss": 0.1865,
"step": 8550
},
{
"epoch": 4.19,
"learning_rate": 1.5810361681329423e-05,
"loss": 0.1844,
"step": 8575
},
{
"epoch": 4.2,
"learning_rate": 1.579814271749756e-05,
"loss": 0.1694,
"step": 8600
},
{
"epoch": 4.22,
"learning_rate": 1.578592375366569e-05,
"loss": 0.1813,
"step": 8625
},
{
"epoch": 4.23,
"learning_rate": 1.5773704789833824e-05,
"loss": 0.1899,
"step": 8650
},
{
"epoch": 4.24,
"learning_rate": 1.5761485826001956e-05,
"loss": 0.1691,
"step": 8675
},
{
"epoch": 4.25,
"learning_rate": 1.574926686217009e-05,
"loss": 0.1777,
"step": 8700
},
{
"epoch": 4.26,
"learning_rate": 1.573704789833822e-05,
"loss": 0.1877,
"step": 8725
},
{
"epoch": 4.28,
"learning_rate": 1.5724828934506357e-05,
"loss": 0.1846,
"step": 8750
},
{
"epoch": 4.29,
"learning_rate": 1.571260997067449e-05,
"loss": 0.1963,
"step": 8775
},
{
"epoch": 4.3,
"learning_rate": 1.5700391006842622e-05,
"loss": 0.1771,
"step": 8800
},
{
"epoch": 4.31,
"learning_rate": 1.5688172043010754e-05,
"loss": 0.1865,
"step": 8825
},
{
"epoch": 4.33,
"learning_rate": 1.5675953079178887e-05,
"loss": 0.1763,
"step": 8850
},
{
"epoch": 4.34,
"learning_rate": 1.566373411534702e-05,
"loss": 0.1768,
"step": 8875
},
{
"epoch": 4.35,
"learning_rate": 1.565151515151515e-05,
"loss": 0.1645,
"step": 8900
},
{
"epoch": 4.36,
"learning_rate": 1.5639296187683287e-05,
"loss": 0.1816,
"step": 8925
},
{
"epoch": 4.37,
"learning_rate": 1.562707722385142e-05,
"loss": 0.1793,
"step": 8950
},
{
"epoch": 4.39,
"learning_rate": 1.5614858260019552e-05,
"loss": 0.1888,
"step": 8975
},
{
"epoch": 4.4,
"learning_rate": 1.5602639296187684e-05,
"loss": 0.1769,
"step": 9000
},
{
"epoch": 4.41,
"learning_rate": 1.5590420332355817e-05,
"loss": 0.1791,
"step": 9025
},
{
"epoch": 4.42,
"learning_rate": 1.557820136852395e-05,
"loss": 0.1823,
"step": 9050
},
{
"epoch": 4.44,
"learning_rate": 1.5565982404692085e-05,
"loss": 0.1835,
"step": 9075
},
{
"epoch": 4.45,
"learning_rate": 1.5553763440860217e-05,
"loss": 0.1875,
"step": 9100
},
{
"epoch": 4.46,
"learning_rate": 1.554154447702835e-05,
"loss": 0.1761,
"step": 9125
},
{
"epoch": 4.47,
"learning_rate": 1.5529325513196482e-05,
"loss": 0.1838,
"step": 9150
},
{
"epoch": 4.48,
"learning_rate": 1.5517106549364615e-05,
"loss": 0.1966,
"step": 9175
},
{
"epoch": 4.5,
"learning_rate": 1.5504887585532747e-05,
"loss": 0.1874,
"step": 9200
},
{
"epoch": 4.51,
"learning_rate": 1.5492668621700883e-05,
"loss": 0.1841,
"step": 9225
},
{
"epoch": 4.52,
"learning_rate": 1.5480449657869015e-05,
"loss": 0.2053,
"step": 9250
},
{
"epoch": 4.53,
"learning_rate": 1.5468230694037148e-05,
"loss": 0.1925,
"step": 9275
},
{
"epoch": 4.55,
"learning_rate": 1.545601173020528e-05,
"loss": 0.1784,
"step": 9300
},
{
"epoch": 4.56,
"learning_rate": 1.5443792766373412e-05,
"loss": 0.1914,
"step": 9325
},
{
"epoch": 4.57,
"learning_rate": 1.5431573802541545e-05,
"loss": 0.1875,
"step": 9350
},
{
"epoch": 4.58,
"learning_rate": 1.541935483870968e-05,
"loss": 0.183,
"step": 9375
},
{
"epoch": 4.59,
"learning_rate": 1.5407135874877813e-05,
"loss": 0.1964,
"step": 9400
},
{
"epoch": 4.61,
"learning_rate": 1.539540566959922e-05,
"loss": 0.1753,
"step": 9425
},
{
"epoch": 4.62,
"learning_rate": 1.538318670576735e-05,
"loss": 0.1911,
"step": 9450
},
{
"epoch": 4.63,
"learning_rate": 1.5370967741935487e-05,
"loss": 0.1957,
"step": 9475
},
{
"epoch": 4.64,
"learning_rate": 1.535874877810362e-05,
"loss": 0.1861,
"step": 9500
},
{
"epoch": 4.66,
"learning_rate": 1.534652981427175e-05,
"loss": 0.191,
"step": 9525
},
{
"epoch": 4.67,
"learning_rate": 1.5334310850439884e-05,
"loss": 0.1844,
"step": 9550
},
{
"epoch": 4.68,
"learning_rate": 1.5322091886608016e-05,
"loss": 0.2053,
"step": 9575
},
{
"epoch": 4.69,
"learning_rate": 1.530987292277615e-05,
"loss": 0.1857,
"step": 9600
},
{
"epoch": 4.7,
"learning_rate": 1.5297653958944284e-05,
"loss": 0.1783,
"step": 9625
},
{
"epoch": 4.72,
"learning_rate": 1.5285434995112417e-05,
"loss": 0.1917,
"step": 9650
},
{
"epoch": 4.73,
"learning_rate": 1.527321603128055e-05,
"loss": 0.1816,
"step": 9675
},
{
"epoch": 4.74,
"learning_rate": 1.526099706744868e-05,
"loss": 0.1925,
"step": 9700
},
{
"epoch": 4.75,
"learning_rate": 1.5248778103616814e-05,
"loss": 0.2035,
"step": 9725
},
{
"epoch": 4.77,
"learning_rate": 1.5236559139784948e-05,
"loss": 0.1839,
"step": 9750
},
{
"epoch": 4.78,
"learning_rate": 1.522434017595308e-05,
"loss": 0.1897,
"step": 9775
},
{
"epoch": 4.79,
"learning_rate": 1.5212121212121213e-05,
"loss": 0.188,
"step": 9800
},
{
"epoch": 4.8,
"learning_rate": 1.5199902248289347e-05,
"loss": 0.1791,
"step": 9825
},
{
"epoch": 4.81,
"learning_rate": 1.518768328445748e-05,
"loss": 0.1894,
"step": 9850
},
{
"epoch": 4.83,
"learning_rate": 1.5175464320625612e-05,
"loss": 0.1975,
"step": 9875
},
{
"epoch": 4.84,
"learning_rate": 1.5163245356793746e-05,
"loss": 0.181,
"step": 9900
},
{
"epoch": 4.85,
"learning_rate": 1.5151026392961878e-05,
"loss": 0.2079,
"step": 9925
},
{
"epoch": 4.86,
"learning_rate": 1.513880742913001e-05,
"loss": 0.1866,
"step": 9950
},
{
"epoch": 4.88,
"learning_rate": 1.5126588465298145e-05,
"loss": 0.1868,
"step": 9975
},
{
"epoch": 4.89,
"learning_rate": 1.5114369501466277e-05,
"loss": 0.1956,
"step": 10000
},
{
"epoch": 4.9,
"learning_rate": 1.510215053763441e-05,
"loss": 0.1871,
"step": 10025
},
{
"epoch": 4.91,
"learning_rate": 1.5089931573802544e-05,
"loss": 0.1954,
"step": 10050
},
{
"epoch": 4.92,
"learning_rate": 1.5077712609970676e-05,
"loss": 0.1872,
"step": 10075
},
{
"epoch": 4.94,
"learning_rate": 1.5065493646138808e-05,
"loss": 0.2023,
"step": 10100
},
{
"epoch": 4.95,
"learning_rate": 1.5053274682306943e-05,
"loss": 0.2043,
"step": 10125
},
{
"epoch": 4.96,
"learning_rate": 1.5041055718475075e-05,
"loss": 0.1863,
"step": 10150
},
{
"epoch": 4.97,
"learning_rate": 1.5028836754643207e-05,
"loss": 0.1987,
"step": 10175
},
{
"epoch": 4.99,
"learning_rate": 1.5016617790811341e-05,
"loss": 0.2,
"step": 10200
},
{
"epoch": 5.0,
"learning_rate": 1.5004398826979474e-05,
"loss": 0.1963,
"step": 10225
},
{
"epoch": 5.01,
"learning_rate": 1.4992179863147606e-05,
"loss": 0.1458,
"step": 10250
},
{
"epoch": 5.02,
"learning_rate": 1.497996089931574e-05,
"loss": 0.1321,
"step": 10275
},
{
"epoch": 5.03,
"learning_rate": 1.4967741935483873e-05,
"loss": 0.1246,
"step": 10300
},
{
"epoch": 5.05,
"learning_rate": 1.4955522971652005e-05,
"loss": 0.1298,
"step": 10325
},
{
"epoch": 5.06,
"learning_rate": 1.494330400782014e-05,
"loss": 0.1401,
"step": 10350
},
{
"epoch": 5.07,
"learning_rate": 1.4931085043988272e-05,
"loss": 0.1299,
"step": 10375
},
{
"epoch": 5.08,
"learning_rate": 1.4918866080156404e-05,
"loss": 0.128,
"step": 10400
},
{
"epoch": 5.1,
"learning_rate": 1.4906647116324538e-05,
"loss": 0.134,
"step": 10425
},
{
"epoch": 5.11,
"learning_rate": 1.489442815249267e-05,
"loss": 0.137,
"step": 10450
},
{
"epoch": 5.12,
"learning_rate": 1.4882209188660803e-05,
"loss": 0.1393,
"step": 10475
},
{
"epoch": 5.13,
"learning_rate": 1.4869990224828937e-05,
"loss": 0.1303,
"step": 10500
},
{
"epoch": 5.14,
"learning_rate": 1.485777126099707e-05,
"loss": 0.1412,
"step": 10525
},
{
"epoch": 5.16,
"learning_rate": 1.4845552297165202e-05,
"loss": 0.1454,
"step": 10550
},
{
"epoch": 5.17,
"learning_rate": 1.4833333333333336e-05,
"loss": 0.1464,
"step": 10575
},
{
"epoch": 5.18,
"learning_rate": 1.4821114369501468e-05,
"loss": 0.138,
"step": 10600
},
{
"epoch": 5.19,
"learning_rate": 1.48088954056696e-05,
"loss": 0.1256,
"step": 10625
},
{
"epoch": 5.21,
"learning_rate": 1.4796676441837735e-05,
"loss": 0.1352,
"step": 10650
},
{
"epoch": 5.22,
"learning_rate": 1.4784457478005867e-05,
"loss": 0.138,
"step": 10675
},
{
"epoch": 5.23,
"learning_rate": 1.4772238514174e-05,
"loss": 0.1388,
"step": 10700
},
{
"epoch": 5.24,
"learning_rate": 1.4760019550342134e-05,
"loss": 0.1364,
"step": 10725
},
{
"epoch": 5.25,
"learning_rate": 1.4747800586510266e-05,
"loss": 0.1414,
"step": 10750
},
{
"epoch": 5.27,
"learning_rate": 1.4735581622678398e-05,
"loss": 0.1409,
"step": 10775
},
{
"epoch": 5.28,
"learning_rate": 1.4723362658846531e-05,
"loss": 0.1475,
"step": 10800
},
{
"epoch": 5.29,
"learning_rate": 1.4711143695014665e-05,
"loss": 0.1465,
"step": 10825
},
{
"epoch": 5.3,
"learning_rate": 1.4698924731182797e-05,
"loss": 0.1565,
"step": 10850
},
{
"epoch": 5.32,
"learning_rate": 1.468670576735093e-05,
"loss": 0.1534,
"step": 10875
},
{
"epoch": 5.33,
"learning_rate": 1.4674486803519064e-05,
"loss": 0.1549,
"step": 10900
},
{
"epoch": 5.34,
"learning_rate": 1.4662267839687196e-05,
"loss": 0.1404,
"step": 10925
},
{
"epoch": 5.35,
"learning_rate": 1.4650048875855329e-05,
"loss": 0.1352,
"step": 10950
},
{
"epoch": 5.36,
"learning_rate": 1.4637829912023463e-05,
"loss": 0.1455,
"step": 10975
},
{
"epoch": 5.38,
"learning_rate": 1.4625610948191595e-05,
"loss": 0.1359,
"step": 11000
},
{
"epoch": 5.39,
"learning_rate": 1.4613391984359728e-05,
"loss": 0.1395,
"step": 11025
},
{
"epoch": 5.4,
"learning_rate": 1.4601173020527862e-05,
"loss": 0.1544,
"step": 11050
},
{
"epoch": 5.41,
"learning_rate": 1.4588954056695994e-05,
"loss": 0.1603,
"step": 11075
},
{
"epoch": 5.43,
"learning_rate": 1.4576735092864126e-05,
"loss": 0.141,
"step": 11100
},
{
"epoch": 5.44,
"learning_rate": 1.456451612903226e-05,
"loss": 0.1459,
"step": 11125
},
{
"epoch": 5.45,
"learning_rate": 1.4552297165200393e-05,
"loss": 0.1327,
"step": 11150
},
{
"epoch": 5.46,
"learning_rate": 1.4540078201368525e-05,
"loss": 0.1393,
"step": 11175
},
{
"epoch": 5.47,
"learning_rate": 1.452785923753666e-05,
"loss": 0.1569,
"step": 11200
},
{
"epoch": 5.49,
"learning_rate": 1.4515640273704792e-05,
"loss": 0.1387,
"step": 11225
},
{
"epoch": 5.5,
"learning_rate": 1.4503421309872924e-05,
"loss": 0.1332,
"step": 11250
},
{
"epoch": 5.51,
"learning_rate": 1.4491202346041058e-05,
"loss": 0.1439,
"step": 11275
},
{
"epoch": 5.52,
"learning_rate": 1.447898338220919e-05,
"loss": 0.1252,
"step": 11300
},
{
"epoch": 5.54,
"learning_rate": 1.4466764418377323e-05,
"loss": 0.1516,
"step": 11325
},
{
"epoch": 5.55,
"learning_rate": 1.4454545454545457e-05,
"loss": 0.1378,
"step": 11350
},
{
"epoch": 5.56,
"learning_rate": 1.4442815249266864e-05,
"loss": 0.1571,
"step": 11375
},
{
"epoch": 5.57,
"learning_rate": 1.4430596285434997e-05,
"loss": 0.1519,
"step": 11400
},
{
"epoch": 5.58,
"learning_rate": 1.4418377321603129e-05,
"loss": 0.1498,
"step": 11425
},
{
"epoch": 5.6,
"learning_rate": 1.4406158357771263e-05,
"loss": 0.1408,
"step": 11450
},
{
"epoch": 5.61,
"learning_rate": 1.4393939393939396e-05,
"loss": 0.1336,
"step": 11475
},
{
"epoch": 5.62,
"learning_rate": 1.4381720430107528e-05,
"loss": 0.1423,
"step": 11500
},
{
"epoch": 5.63,
"learning_rate": 1.4369501466275662e-05,
"loss": 0.1498,
"step": 11525
},
{
"epoch": 5.65,
"learning_rate": 1.4357282502443794e-05,
"loss": 0.1476,
"step": 11550
},
{
"epoch": 5.66,
"learning_rate": 1.4345063538611927e-05,
"loss": 0.1363,
"step": 11575
},
{
"epoch": 5.67,
"learning_rate": 1.4332844574780061e-05,
"loss": 0.141,
"step": 11600
},
{
"epoch": 5.68,
"learning_rate": 1.4320625610948193e-05,
"loss": 0.1329,
"step": 11625
},
{
"epoch": 5.69,
"learning_rate": 1.4308406647116326e-05,
"loss": 0.153,
"step": 11650
},
{
"epoch": 5.71,
"learning_rate": 1.429618768328446e-05,
"loss": 0.1587,
"step": 11675
},
{
"epoch": 5.72,
"learning_rate": 1.4283968719452592e-05,
"loss": 0.1518,
"step": 11700
},
{
"epoch": 5.73,
"learning_rate": 1.4271749755620725e-05,
"loss": 0.1449,
"step": 11725
},
{
"epoch": 5.74,
"learning_rate": 1.4259530791788859e-05,
"loss": 0.154,
"step": 11750
},
{
"epoch": 5.76,
"learning_rate": 1.4247311827956991e-05,
"loss": 0.1519,
"step": 11775
},
{
"epoch": 5.77,
"learning_rate": 1.4235092864125124e-05,
"loss": 0.1417,
"step": 11800
},
{
"epoch": 5.78,
"learning_rate": 1.4222873900293258e-05,
"loss": 0.1479,
"step": 11825
},
{
"epoch": 5.79,
"learning_rate": 1.421065493646139e-05,
"loss": 0.1499,
"step": 11850
},
{
"epoch": 5.8,
"learning_rate": 1.4198435972629522e-05,
"loss": 0.1362,
"step": 11875
},
{
"epoch": 5.82,
"learning_rate": 1.4186217008797657e-05,
"loss": 0.1522,
"step": 11900
},
{
"epoch": 5.83,
"learning_rate": 1.4173998044965789e-05,
"loss": 0.149,
"step": 11925
},
{
"epoch": 5.84,
"learning_rate": 1.4161779081133921e-05,
"loss": 0.1448,
"step": 11950
},
{
"epoch": 5.85,
"learning_rate": 1.4149560117302055e-05,
"loss": 0.1602,
"step": 11975
},
{
"epoch": 5.87,
"learning_rate": 1.4137341153470188e-05,
"loss": 0.1481,
"step": 12000
},
{
"epoch": 5.88,
"learning_rate": 1.412512218963832e-05,
"loss": 0.1631,
"step": 12025
},
{
"epoch": 5.89,
"learning_rate": 1.4112903225806454e-05,
"loss": 0.1516,
"step": 12050
},
{
"epoch": 5.9,
"learning_rate": 1.4100684261974587e-05,
"loss": 0.1449,
"step": 12075
},
{
"epoch": 5.91,
"learning_rate": 1.4088465298142719e-05,
"loss": 0.1549,
"step": 12100
},
{
"epoch": 5.93,
"learning_rate": 1.4076246334310853e-05,
"loss": 0.1456,
"step": 12125
},
{
"epoch": 5.94,
"learning_rate": 1.4064027370478986e-05,
"loss": 0.1443,
"step": 12150
},
{
"epoch": 5.95,
"learning_rate": 1.4051808406647118e-05,
"loss": 0.1445,
"step": 12175
},
{
"epoch": 5.96,
"learning_rate": 1.4039589442815252e-05,
"loss": 0.15,
"step": 12200
},
{
"epoch": 5.98,
"learning_rate": 1.4027370478983385e-05,
"loss": 0.1459,
"step": 12225
},
{
"epoch": 5.99,
"learning_rate": 1.4015151515151517e-05,
"loss": 0.1594,
"step": 12250
},
{
"epoch": 6.0,
"learning_rate": 1.400293255131965e-05,
"loss": 0.1511,
"step": 12275
},
{
"epoch": 6.01,
"learning_rate": 1.3990713587487783e-05,
"loss": 0.1114,
"step": 12300
},
{
"epoch": 6.02,
"learning_rate": 1.3978494623655916e-05,
"loss": 0.1002,
"step": 12325
},
{
"epoch": 6.04,
"learning_rate": 1.3966275659824048e-05,
"loss": 0.117,
"step": 12350
},
{
"epoch": 6.05,
"learning_rate": 1.3954056695992182e-05,
"loss": 0.1082,
"step": 12375
},
{
"epoch": 6.06,
"learning_rate": 1.3941837732160315e-05,
"loss": 0.1215,
"step": 12400
},
{
"epoch": 6.07,
"learning_rate": 1.3929618768328447e-05,
"loss": 0.1046,
"step": 12425
},
{
"epoch": 6.09,
"learning_rate": 1.3917399804496581e-05,
"loss": 0.1063,
"step": 12450
},
{
"epoch": 6.1,
"learning_rate": 1.3905180840664714e-05,
"loss": 0.1178,
"step": 12475
},
{
"epoch": 6.11,
"learning_rate": 1.3892961876832846e-05,
"loss": 0.1093,
"step": 12500
},
{
"epoch": 6.12,
"learning_rate": 1.388074291300098e-05,
"loss": 0.1085,
"step": 12525
},
{
"epoch": 6.13,
"learning_rate": 1.3868523949169112e-05,
"loss": 0.1171,
"step": 12550
},
{
"epoch": 6.15,
"learning_rate": 1.3856304985337245e-05,
"loss": 0.1145,
"step": 12575
},
{
"epoch": 6.16,
"learning_rate": 1.3844086021505379e-05,
"loss": 0.1042,
"step": 12600
},
{
"epoch": 6.17,
"learning_rate": 1.3831867057673511e-05,
"loss": 0.1059,
"step": 12625
},
{
"epoch": 6.18,
"learning_rate": 1.3819648093841644e-05,
"loss": 0.1144,
"step": 12650
},
{
"epoch": 6.2,
"learning_rate": 1.3807429130009778e-05,
"loss": 0.1023,
"step": 12675
},
{
"epoch": 6.21,
"learning_rate": 1.379521016617791e-05,
"loss": 0.1188,
"step": 12700
},
{
"epoch": 6.22,
"learning_rate": 1.3782991202346043e-05,
"loss": 0.1028,
"step": 12725
},
{
"epoch": 6.23,
"learning_rate": 1.3770772238514177e-05,
"loss": 0.1174,
"step": 12750
},
{
"epoch": 6.24,
"learning_rate": 1.375855327468231e-05,
"loss": 0.1113,
"step": 12775
},
{
"epoch": 6.26,
"learning_rate": 1.374633431085044e-05,
"loss": 0.1183,
"step": 12800
},
{
"epoch": 6.27,
"learning_rate": 1.3734115347018572e-05,
"loss": 0.1145,
"step": 12825
},
{
"epoch": 6.28,
"learning_rate": 1.3721896383186706e-05,
"loss": 0.1075,
"step": 12850
},
{
"epoch": 6.29,
"learning_rate": 1.3709677419354839e-05,
"loss": 0.1083,
"step": 12875
},
{
"epoch": 6.3,
"learning_rate": 1.3697458455522971e-05,
"loss": 0.1123,
"step": 12900
},
{
"epoch": 6.32,
"learning_rate": 1.3685239491691105e-05,
"loss": 0.1097,
"step": 12925
},
{
"epoch": 6.33,
"learning_rate": 1.3673020527859238e-05,
"loss": 0.1262,
"step": 12950
},
{
"epoch": 6.34,
"learning_rate": 1.366080156402737e-05,
"loss": 0.1156,
"step": 12975
},
{
"epoch": 6.35,
"learning_rate": 1.3648582600195504e-05,
"loss": 0.1229,
"step": 13000
},
{
"epoch": 6.37,
"learning_rate": 1.3636363636363637e-05,
"loss": 0.1173,
"step": 13025
},
{
"epoch": 6.38,
"learning_rate": 1.3624144672531769e-05,
"loss": 0.113,
"step": 13050
},
{
"epoch": 6.39,
"learning_rate": 1.3611925708699903e-05,
"loss": 0.1119,
"step": 13075
},
{
"epoch": 6.4,
"learning_rate": 1.3599706744868035e-05,
"loss": 0.1205,
"step": 13100
},
{
"epoch": 6.41,
"learning_rate": 1.3587487781036168e-05,
"loss": 0.1074,
"step": 13125
},
{
"epoch": 6.43,
"learning_rate": 1.3575268817204302e-05,
"loss": 0.1134,
"step": 13150
},
{
"epoch": 6.44,
"learning_rate": 1.3563049853372434e-05,
"loss": 0.1146,
"step": 13175
},
{
"epoch": 6.45,
"learning_rate": 1.3550830889540567e-05,
"loss": 0.1055,
"step": 13200
},
{
"epoch": 6.46,
"learning_rate": 1.35386119257087e-05,
"loss": 0.1067,
"step": 13225
},
{
"epoch": 6.48,
"learning_rate": 1.3526392961876833e-05,
"loss": 0.1141,
"step": 13250
},
{
"epoch": 6.49,
"learning_rate": 1.3514173998044966e-05,
"loss": 0.1143,
"step": 13275
},
{
"epoch": 6.5,
"learning_rate": 1.35019550342131e-05,
"loss": 0.115,
"step": 13300
},
{
"epoch": 6.51,
"learning_rate": 1.3489736070381232e-05,
"loss": 0.1158,
"step": 13325
},
{
"epoch": 6.52,
"learning_rate": 1.3477517106549365e-05,
"loss": 0.1119,
"step": 13350
},
{
"epoch": 6.54,
"learning_rate": 1.3465298142717499e-05,
"loss": 0.1225,
"step": 13375
},
{
"epoch": 6.55,
"learning_rate": 1.3453079178885631e-05,
"loss": 0.1171,
"step": 13400
},
{
"epoch": 6.56,
"learning_rate": 1.3440860215053763e-05,
"loss": 0.1235,
"step": 13425
},
{
"epoch": 6.57,
"learning_rate": 1.3428641251221896e-05,
"loss": 0.1156,
"step": 13450
},
{
"epoch": 6.59,
"learning_rate": 1.341642228739003e-05,
"loss": 0.1129,
"step": 13475
},
{
"epoch": 6.6,
"learning_rate": 1.3404203323558162e-05,
"loss": 0.1219,
"step": 13500
},
{
"epoch": 6.61,
"learning_rate": 1.3391984359726295e-05,
"loss": 0.1131,
"step": 13525
},
{
"epoch": 6.62,
"learning_rate": 1.3379765395894429e-05,
"loss": 0.1163,
"step": 13550
},
{
"epoch": 6.63,
"learning_rate": 1.3367546432062561e-05,
"loss": 0.1073,
"step": 13575
},
{
"epoch": 6.65,
"learning_rate": 1.3355327468230694e-05,
"loss": 0.1082,
"step": 13600
},
{
"epoch": 6.66,
"learning_rate": 1.3343108504398828e-05,
"loss": 0.1161,
"step": 13625
},
{
"epoch": 6.67,
"learning_rate": 1.333088954056696e-05,
"loss": 0.1021,
"step": 13650
},
{
"epoch": 6.68,
"learning_rate": 1.3318670576735093e-05,
"loss": 0.117,
"step": 13675
},
{
"epoch": 6.7,
"learning_rate": 1.3306451612903227e-05,
"loss": 0.1294,
"step": 13700
},
{
"epoch": 6.71,
"learning_rate": 1.3294232649071359e-05,
"loss": 0.1148,
"step": 13725
},
{
"epoch": 6.72,
"learning_rate": 1.3282013685239491e-05,
"loss": 0.1037,
"step": 13750
},
{
"epoch": 6.73,
"learning_rate": 1.3269794721407626e-05,
"loss": 0.1125,
"step": 13775
},
{
"epoch": 6.74,
"learning_rate": 1.3257575757575758e-05,
"loss": 0.119,
"step": 13800
},
{
"epoch": 6.76,
"learning_rate": 1.324535679374389e-05,
"loss": 0.1161,
"step": 13825
},
{
"epoch": 6.77,
"learning_rate": 1.3233137829912024e-05,
"loss": 0.112,
"step": 13850
},
{
"epoch": 6.78,
"learning_rate": 1.3220918866080157e-05,
"loss": 0.1124,
"step": 13875
},
{
"epoch": 6.79,
"learning_rate": 1.320869990224829e-05,
"loss": 0.1352,
"step": 13900
},
{
"epoch": 6.81,
"learning_rate": 1.3196480938416423e-05,
"loss": 0.1252,
"step": 13925
},
{
"epoch": 6.82,
"learning_rate": 1.3184261974584556e-05,
"loss": 0.1054,
"step": 13950
},
{
"epoch": 6.83,
"learning_rate": 1.3172043010752688e-05,
"loss": 0.1153,
"step": 13975
},
{
"epoch": 6.84,
"learning_rate": 1.3159824046920822e-05,
"loss": 0.1323,
"step": 14000
},
{
"epoch": 6.85,
"learning_rate": 1.3147605083088955e-05,
"loss": 0.1227,
"step": 14025
},
{
"epoch": 6.87,
"learning_rate": 1.3135386119257087e-05,
"loss": 0.1197,
"step": 14050
},
{
"epoch": 6.88,
"learning_rate": 1.3123167155425221e-05,
"loss": 0.1202,
"step": 14075
},
{
"epoch": 6.89,
"learning_rate": 1.3110948191593353e-05,
"loss": 0.1119,
"step": 14100
},
{
"epoch": 6.9,
"learning_rate": 1.3098729227761486e-05,
"loss": 0.115,
"step": 14125
},
{
"epoch": 6.92,
"learning_rate": 1.308651026392962e-05,
"loss": 0.1209,
"step": 14150
},
{
"epoch": 6.93,
"learning_rate": 1.3074291300097752e-05,
"loss": 0.1201,
"step": 14175
},
{
"epoch": 6.94,
"learning_rate": 1.3062072336265885e-05,
"loss": 0.1227,
"step": 14200
},
{
"epoch": 6.95,
"learning_rate": 1.3049853372434019e-05,
"loss": 0.1132,
"step": 14225
},
{
"epoch": 6.96,
"learning_rate": 1.3037634408602151e-05,
"loss": 0.1326,
"step": 14250
},
{
"epoch": 6.98,
"learning_rate": 1.3025415444770284e-05,
"loss": 0.1098,
"step": 14275
},
{
"epoch": 6.99,
"learning_rate": 1.3013196480938418e-05,
"loss": 0.1168,
"step": 14300
},
{
"epoch": 7.0,
"learning_rate": 1.300097751710655e-05,
"loss": 0.132,
"step": 14325
},
{
"epoch": 7.01,
"learning_rate": 1.2988758553274683e-05,
"loss": 0.0912,
"step": 14350
},
{
"epoch": 7.03,
"learning_rate": 1.2976539589442817e-05,
"loss": 0.091,
"step": 14375
},
{
"epoch": 7.04,
"learning_rate": 1.2964320625610949e-05,
"loss": 0.0789,
"step": 14400
},
{
"epoch": 7.05,
"learning_rate": 1.2952101661779081e-05,
"loss": 0.0815,
"step": 14425
},
{
"epoch": 7.06,
"learning_rate": 1.2939882697947216e-05,
"loss": 0.0935,
"step": 14450
},
{
"epoch": 7.07,
"learning_rate": 1.2927663734115348e-05,
"loss": 0.0863,
"step": 14475
},
{
"epoch": 7.09,
"learning_rate": 1.291544477028348e-05,
"loss": 0.0877,
"step": 14500
},
{
"epoch": 7.1,
"learning_rate": 1.2903225806451613e-05,
"loss": 0.0832,
"step": 14525
},
{
"epoch": 7.11,
"learning_rate": 1.2891006842619747e-05,
"loss": 0.0888,
"step": 14550
},
{
"epoch": 7.12,
"learning_rate": 1.287878787878788e-05,
"loss": 0.0901,
"step": 14575
},
{
"epoch": 7.14,
"learning_rate": 1.2866568914956012e-05,
"loss": 0.0891,
"step": 14600
},
{
"epoch": 7.15,
"learning_rate": 1.2854349951124146e-05,
"loss": 0.0887,
"step": 14625
},
{
"epoch": 7.16,
"learning_rate": 1.2842130987292278e-05,
"loss": 0.0916,
"step": 14650
},
{
"epoch": 7.17,
"learning_rate": 1.282991202346041e-05,
"loss": 0.0853,
"step": 14675
},
{
"epoch": 7.18,
"learning_rate": 1.2817693059628545e-05,
"loss": 0.0858,
"step": 14700
},
{
"epoch": 7.2,
"learning_rate": 1.2805474095796677e-05,
"loss": 0.0876,
"step": 14725
},
{
"epoch": 7.21,
"learning_rate": 1.279325513196481e-05,
"loss": 0.1036,
"step": 14750
},
{
"epoch": 7.22,
"learning_rate": 1.2781036168132944e-05,
"loss": 0.0959,
"step": 14775
},
{
"epoch": 7.23,
"learning_rate": 1.2768817204301076e-05,
"loss": 0.0789,
"step": 14800
},
{
"epoch": 7.25,
"learning_rate": 1.2756598240469208e-05,
"loss": 0.0843,
"step": 14825
},
{
"epoch": 7.26,
"learning_rate": 1.2744379276637342e-05,
"loss": 0.0895,
"step": 14850
},
{
"epoch": 7.27,
"learning_rate": 1.2732160312805475e-05,
"loss": 0.0985,
"step": 14875
},
{
"epoch": 7.28,
"learning_rate": 1.2719941348973607e-05,
"loss": 0.0793,
"step": 14900
},
{
"epoch": 7.29,
"learning_rate": 1.2707722385141741e-05,
"loss": 0.0808,
"step": 14925
},
{
"epoch": 7.31,
"learning_rate": 1.2695503421309874e-05,
"loss": 0.0843,
"step": 14950
},
{
"epoch": 7.32,
"learning_rate": 1.2683284457478006e-05,
"loss": 0.0979,
"step": 14975
},
{
"epoch": 7.33,
"learning_rate": 1.267106549364614e-05,
"loss": 0.1021,
"step": 15000
},
{
"epoch": 7.34,
"learning_rate": 1.2658846529814273e-05,
"loss": 0.0892,
"step": 15025
},
{
"epoch": 7.36,
"learning_rate": 1.2646627565982405e-05,
"loss": 0.0842,
"step": 15050
},
{
"epoch": 7.37,
"learning_rate": 1.2634408602150539e-05,
"loss": 0.0836,
"step": 15075
},
{
"epoch": 7.38,
"learning_rate": 1.2622189638318672e-05,
"loss": 0.0844,
"step": 15100
},
{
"epoch": 7.39,
"learning_rate": 1.2609970674486804e-05,
"loss": 0.1078,
"step": 15125
},
{
"epoch": 7.4,
"learning_rate": 1.2597751710654938e-05,
"loss": 0.0929,
"step": 15150
},
{
"epoch": 7.42,
"learning_rate": 1.258553274682307e-05,
"loss": 0.089,
"step": 15175
},
{
"epoch": 7.43,
"learning_rate": 1.2573313782991203e-05,
"loss": 0.1009,
"step": 15200
},
{
"epoch": 7.44,
"learning_rate": 1.2561094819159337e-05,
"loss": 0.0925,
"step": 15225
},
{
"epoch": 7.45,
"learning_rate": 1.254887585532747e-05,
"loss": 0.0892,
"step": 15250
},
{
"epoch": 7.47,
"learning_rate": 1.2536656891495602e-05,
"loss": 0.0887,
"step": 15275
},
{
"epoch": 7.48,
"learning_rate": 1.2524437927663736e-05,
"loss": 0.0964,
"step": 15300
},
{
"epoch": 7.49,
"learning_rate": 1.2512218963831868e-05,
"loss": 0.0894,
"step": 15325
},
{
"epoch": 7.5,
"learning_rate": 1.25e-05,
"loss": 0.0946,
"step": 15350
},
{
"epoch": 7.51,
"learning_rate": 1.2487781036168135e-05,
"loss": 0.0921,
"step": 15375
},
{
"epoch": 7.53,
"learning_rate": 1.2475562072336267e-05,
"loss": 0.0926,
"step": 15400
},
{
"epoch": 7.54,
"learning_rate": 1.24633431085044e-05,
"loss": 0.0959,
"step": 15425
},
{
"epoch": 7.55,
"learning_rate": 1.2451124144672534e-05,
"loss": 0.0936,
"step": 15450
},
{
"epoch": 7.56,
"learning_rate": 1.243939393939394e-05,
"loss": 0.0971,
"step": 15475
},
{
"epoch": 7.58,
"learning_rate": 1.2427174975562073e-05,
"loss": 0.0852,
"step": 15500
},
{
"epoch": 7.59,
"learning_rate": 1.2414956011730205e-05,
"loss": 0.0913,
"step": 15525
},
{
"epoch": 7.6,
"learning_rate": 1.240273704789834e-05,
"loss": 0.0922,
"step": 15550
},
{
"epoch": 7.61,
"learning_rate": 1.2390518084066472e-05,
"loss": 0.0918,
"step": 15575
},
{
"epoch": 7.62,
"learning_rate": 1.2378299120234604e-05,
"loss": 0.1045,
"step": 15600
},
{
"epoch": 7.64,
"learning_rate": 1.2366080156402738e-05,
"loss": 0.1003,
"step": 15625
},
{
"epoch": 7.65,
"learning_rate": 1.235386119257087e-05,
"loss": 0.087,
"step": 15650
},
{
"epoch": 7.66,
"learning_rate": 1.2341642228739003e-05,
"loss": 0.0965,
"step": 15675
},
{
"epoch": 7.67,
"learning_rate": 1.232991202346041e-05,
"loss": 0.0913,
"step": 15700
},
{
"epoch": 7.69,
"learning_rate": 1.2317693059628544e-05,
"loss": 0.0866,
"step": 15725
},
{
"epoch": 7.7,
"learning_rate": 1.2305474095796677e-05,
"loss": 0.091,
"step": 15750
},
{
"epoch": 7.71,
"learning_rate": 1.2293255131964809e-05,
"loss": 0.101,
"step": 15775
},
{
"epoch": 7.72,
"learning_rate": 1.2281036168132943e-05,
"loss": 0.1011,
"step": 15800
},
{
"epoch": 7.73,
"learning_rate": 1.2268817204301076e-05,
"loss": 0.0854,
"step": 15825
},
{
"epoch": 7.75,
"learning_rate": 1.2256598240469208e-05,
"loss": 0.0872,
"step": 15850
},
{
"epoch": 7.76,
"learning_rate": 1.2244379276637342e-05,
"loss": 0.0904,
"step": 15875
},
{
"epoch": 7.77,
"learning_rate": 1.2232160312805475e-05,
"loss": 0.1043,
"step": 15900
},
{
"epoch": 7.78,
"learning_rate": 1.2219941348973607e-05,
"loss": 0.0815,
"step": 15925
},
{
"epoch": 7.8,
"learning_rate": 1.2207722385141741e-05,
"loss": 0.0909,
"step": 15950
},
{
"epoch": 7.81,
"learning_rate": 1.2195503421309873e-05,
"loss": 0.0998,
"step": 15975
},
{
"epoch": 7.82,
"learning_rate": 1.2183284457478006e-05,
"loss": 0.0947,
"step": 16000
},
{
"epoch": 7.83,
"learning_rate": 1.217106549364614e-05,
"loss": 0.1099,
"step": 16025
},
{
"epoch": 7.84,
"learning_rate": 1.2158846529814272e-05,
"loss": 0.0971,
"step": 16050
},
{
"epoch": 7.86,
"learning_rate": 1.2146627565982405e-05,
"loss": 0.0944,
"step": 16075
},
{
"epoch": 7.87,
"learning_rate": 1.2134408602150539e-05,
"loss": 0.1006,
"step": 16100
},
{
"epoch": 7.88,
"learning_rate": 1.2122189638318671e-05,
"loss": 0.1007,
"step": 16125
},
{
"epoch": 7.89,
"learning_rate": 1.2109970674486804e-05,
"loss": 0.1004,
"step": 16150
},
{
"epoch": 7.91,
"learning_rate": 1.2097751710654938e-05,
"loss": 0.0946,
"step": 16175
},
{
"epoch": 7.92,
"learning_rate": 1.208553274682307e-05,
"loss": 0.0988,
"step": 16200
},
{
"epoch": 7.93,
"learning_rate": 1.2073313782991203e-05,
"loss": 0.0994,
"step": 16225
},
{
"epoch": 7.94,
"learning_rate": 1.2061094819159337e-05,
"loss": 0.0912,
"step": 16250
},
{
"epoch": 7.95,
"learning_rate": 1.2048875855327469e-05,
"loss": 0.094,
"step": 16275
},
{
"epoch": 7.97,
"learning_rate": 1.2036656891495601e-05,
"loss": 0.1037,
"step": 16300
},
{
"epoch": 7.98,
"learning_rate": 1.2024437927663734e-05,
"loss": 0.1082,
"step": 16325
},
{
"epoch": 7.99,
"learning_rate": 1.2012218963831868e-05,
"loss": 0.0959,
"step": 16350
},
{
"epoch": 8.0,
"learning_rate": 1.2e-05,
"loss": 0.0848,
"step": 16375
},
{
"epoch": 8.02,
"learning_rate": 1.1987781036168133e-05,
"loss": 0.07,
"step": 16400
},
{
"epoch": 8.03,
"learning_rate": 1.1975562072336267e-05,
"loss": 0.0713,
"step": 16425
},
{
"epoch": 8.04,
"learning_rate": 1.19633431085044e-05,
"loss": 0.0816,
"step": 16450
},
{
"epoch": 8.05,
"learning_rate": 1.1951124144672532e-05,
"loss": 0.0823,
"step": 16475
},
{
"epoch": 8.06,
"learning_rate": 1.1938905180840666e-05,
"loss": 0.0624,
"step": 16500
},
{
"epoch": 8.08,
"learning_rate": 1.1926686217008798e-05,
"loss": 0.0606,
"step": 16525
},
{
"epoch": 8.09,
"learning_rate": 1.191446725317693e-05,
"loss": 0.063,
"step": 16550
},
{
"epoch": 8.1,
"learning_rate": 1.1902248289345065e-05,
"loss": 0.0679,
"step": 16575
},
{
"epoch": 8.11,
"learning_rate": 1.1890029325513197e-05,
"loss": 0.0748,
"step": 16600
},
{
"epoch": 8.13,
"learning_rate": 1.187781036168133e-05,
"loss": 0.0666,
"step": 16625
},
{
"epoch": 8.14,
"learning_rate": 1.1865591397849463e-05,
"loss": 0.0703,
"step": 16650
},
{
"epoch": 8.15,
"learning_rate": 1.1853372434017596e-05,
"loss": 0.0756,
"step": 16675
},
{
"epoch": 8.16,
"learning_rate": 1.1841153470185728e-05,
"loss": 0.0611,
"step": 16700
},
{
"epoch": 8.17,
"learning_rate": 1.1828934506353862e-05,
"loss": 0.0766,
"step": 16725
},
{
"epoch": 8.19,
"learning_rate": 1.1816715542521995e-05,
"loss": 0.067,
"step": 16750
},
{
"epoch": 8.2,
"learning_rate": 1.1804496578690127e-05,
"loss": 0.075,
"step": 16775
},
{
"epoch": 8.21,
"learning_rate": 1.1792277614858261e-05,
"loss": 0.0887,
"step": 16800
},
{
"epoch": 8.22,
"learning_rate": 1.1780058651026394e-05,
"loss": 0.0652,
"step": 16825
},
{
"epoch": 8.24,
"learning_rate": 1.1767839687194526e-05,
"loss": 0.0787,
"step": 16850
},
{
"epoch": 8.25,
"learning_rate": 1.175562072336266e-05,
"loss": 0.0753,
"step": 16875
},
{
"epoch": 8.26,
"learning_rate": 1.1743401759530793e-05,
"loss": 0.0747,
"step": 16900
},
{
"epoch": 8.27,
"learning_rate": 1.1731182795698925e-05,
"loss": 0.0681,
"step": 16925
},
{
"epoch": 8.28,
"learning_rate": 1.1718963831867059e-05,
"loss": 0.0716,
"step": 16950
},
{
"epoch": 8.3,
"learning_rate": 1.1706744868035191e-05,
"loss": 0.0696,
"step": 16975
},
{
"epoch": 8.31,
"learning_rate": 1.1694525904203324e-05,
"loss": 0.0787,
"step": 17000
},
{
"epoch": 8.32,
"learning_rate": 1.1682306940371458e-05,
"loss": 0.0835,
"step": 17025
},
{
"epoch": 8.33,
"learning_rate": 1.167008797653959e-05,
"loss": 0.0712,
"step": 17050
},
{
"epoch": 8.35,
"learning_rate": 1.1657869012707723e-05,
"loss": 0.0728,
"step": 17075
},
{
"epoch": 8.36,
"learning_rate": 1.1645650048875857e-05,
"loss": 0.0765,
"step": 17100
},
{
"epoch": 8.37,
"learning_rate": 1.163343108504399e-05,
"loss": 0.0742,
"step": 17125
},
{
"epoch": 8.38,
"learning_rate": 1.1621212121212122e-05,
"loss": 0.0949,
"step": 17150
},
{
"epoch": 8.39,
"learning_rate": 1.1608993157380256e-05,
"loss": 0.0709,
"step": 17175
},
{
"epoch": 8.41,
"learning_rate": 1.1596774193548388e-05,
"loss": 0.0712,
"step": 17200
},
{
"epoch": 8.42,
"learning_rate": 1.158455522971652e-05,
"loss": 0.082,
"step": 17225
},
{
"epoch": 8.43,
"learning_rate": 1.1572336265884655e-05,
"loss": 0.0783,
"step": 17250
},
{
"epoch": 8.44,
"learning_rate": 1.1560117302052787e-05,
"loss": 0.068,
"step": 17275
},
{
"epoch": 8.46,
"learning_rate": 1.154789833822092e-05,
"loss": 0.0788,
"step": 17300
},
{
"epoch": 8.47,
"learning_rate": 1.1535679374389054e-05,
"loss": 0.0783,
"step": 17325
},
{
"epoch": 8.48,
"learning_rate": 1.1523460410557186e-05,
"loss": 0.0796,
"step": 17350
},
{
"epoch": 8.49,
"learning_rate": 1.1511241446725318e-05,
"loss": 0.0751,
"step": 17375
},
{
"epoch": 8.5,
"learning_rate": 1.149902248289345e-05,
"loss": 0.0754,
"step": 17400
},
{
"epoch": 8.52,
"learning_rate": 1.1486803519061585e-05,
"loss": 0.0855,
"step": 17425
},
{
"epoch": 8.53,
"learning_rate": 1.1474584555229717e-05,
"loss": 0.0773,
"step": 17450
},
{
"epoch": 8.54,
"learning_rate": 1.146236559139785e-05,
"loss": 0.0844,
"step": 17475
},
{
"epoch": 8.55,
"learning_rate": 1.1450146627565984e-05,
"loss": 0.0797,
"step": 17500
},
{
"epoch": 8.57,
"learning_rate": 1.1437927663734116e-05,
"loss": 0.0675,
"step": 17525
},
{
"epoch": 8.58,
"learning_rate": 1.1425708699902249e-05,
"loss": 0.0742,
"step": 17550
},
{
"epoch": 8.59,
"learning_rate": 1.1413489736070383e-05,
"loss": 0.0761,
"step": 17575
},
{
"epoch": 8.6,
"learning_rate": 1.1401270772238515e-05,
"loss": 0.0806,
"step": 17600
},
{
"epoch": 8.61,
"learning_rate": 1.1389051808406647e-05,
"loss": 0.071,
"step": 17625
},
{
"epoch": 8.63,
"learning_rate": 1.1376832844574782e-05,
"loss": 0.0782,
"step": 17650
},
{
"epoch": 8.64,
"learning_rate": 1.1364613880742914e-05,
"loss": 0.067,
"step": 17675
},
{
"epoch": 8.65,
"learning_rate": 1.1352394916911046e-05,
"loss": 0.0778,
"step": 17700
},
{
"epoch": 8.66,
"learning_rate": 1.134017595307918e-05,
"loss": 0.0729,
"step": 17725
},
{
"epoch": 8.68,
"learning_rate": 1.1327956989247313e-05,
"loss": 0.0775,
"step": 17750
},
{
"epoch": 8.69,
"learning_rate": 1.1315738025415445e-05,
"loss": 0.0726,
"step": 17775
},
{
"epoch": 8.7,
"learning_rate": 1.130351906158358e-05,
"loss": 0.0761,
"step": 17800
},
{
"epoch": 8.71,
"learning_rate": 1.1291300097751712e-05,
"loss": 0.0824,
"step": 17825
},
{
"epoch": 8.72,
"learning_rate": 1.1279081133919844e-05,
"loss": 0.0753,
"step": 17850
},
{
"epoch": 8.74,
"learning_rate": 1.1266862170087978e-05,
"loss": 0.069,
"step": 17875
},
{
"epoch": 8.75,
"learning_rate": 1.125464320625611e-05,
"loss": 0.0848,
"step": 17900
},
{
"epoch": 8.76,
"learning_rate": 1.1242424242424243e-05,
"loss": 0.0845,
"step": 17925
},
{
"epoch": 8.77,
"learning_rate": 1.1230205278592377e-05,
"loss": 0.0823,
"step": 17950
},
{
"epoch": 8.79,
"learning_rate": 1.121798631476051e-05,
"loss": 0.0764,
"step": 17975
},
{
"epoch": 8.8,
"learning_rate": 1.1205767350928642e-05,
"loss": 0.0899,
"step": 18000
},
{
"epoch": 8.81,
"learning_rate": 1.1193548387096776e-05,
"loss": 0.0819,
"step": 18025
},
{
"epoch": 8.82,
"learning_rate": 1.1181329423264908e-05,
"loss": 0.0823,
"step": 18050
},
{
"epoch": 8.83,
"learning_rate": 1.1169599217986315e-05,
"loss": 0.0717,
"step": 18075
},
{
"epoch": 8.85,
"learning_rate": 1.1157380254154448e-05,
"loss": 0.0776,
"step": 18100
},
{
"epoch": 8.86,
"learning_rate": 1.1145161290322582e-05,
"loss": 0.0786,
"step": 18125
},
{
"epoch": 8.87,
"learning_rate": 1.1132942326490714e-05,
"loss": 0.0759,
"step": 18150
},
{
"epoch": 8.88,
"learning_rate": 1.1120723362658847e-05,
"loss": 0.0759,
"step": 18175
},
{
"epoch": 8.9,
"learning_rate": 1.110850439882698e-05,
"loss": 0.0659,
"step": 18200
},
{
"epoch": 8.91,
"learning_rate": 1.1096285434995113e-05,
"loss": 0.0849,
"step": 18225
},
{
"epoch": 8.92,
"learning_rate": 1.1084066471163246e-05,
"loss": 0.0795,
"step": 18250
},
{
"epoch": 8.93,
"learning_rate": 1.107184750733138e-05,
"loss": 0.0871,
"step": 18275
},
{
"epoch": 8.94,
"learning_rate": 1.1059628543499512e-05,
"loss": 0.0786,
"step": 18300
},
{
"epoch": 8.96,
"learning_rate": 1.1047409579667645e-05,
"loss": 0.0857,
"step": 18325
},
{
"epoch": 8.97,
"learning_rate": 1.1035190615835779e-05,
"loss": 0.0847,
"step": 18350
},
{
"epoch": 8.98,
"learning_rate": 1.1022971652003911e-05,
"loss": 0.0869,
"step": 18375
},
{
"epoch": 8.99,
"learning_rate": 1.1010752688172043e-05,
"loss": 0.0711,
"step": 18400
},
{
"epoch": 9.01,
"learning_rate": 1.0998533724340177e-05,
"loss": 0.0767,
"step": 18425
},
{
"epoch": 9.02,
"learning_rate": 1.098631476050831e-05,
"loss": 0.063,
"step": 18450
},
{
"epoch": 9.03,
"learning_rate": 1.0974095796676442e-05,
"loss": 0.0544,
"step": 18475
},
{
"epoch": 9.04,
"learning_rate": 1.0961876832844576e-05,
"loss": 0.0664,
"step": 18500
},
{
"epoch": 9.05,
"learning_rate": 1.0949657869012709e-05,
"loss": 0.0577,
"step": 18525
},
{
"epoch": 9.07,
"learning_rate": 1.0937438905180841e-05,
"loss": 0.0518,
"step": 18550
},
{
"epoch": 9.08,
"learning_rate": 1.0925219941348975e-05,
"loss": 0.0689,
"step": 18575
},
{
"epoch": 9.09,
"learning_rate": 1.0913000977517108e-05,
"loss": 0.0557,
"step": 18600
},
{
"epoch": 9.1,
"learning_rate": 1.090078201368524e-05,
"loss": 0.0646,
"step": 18625
},
{
"epoch": 9.12,
"learning_rate": 1.0888563049853374e-05,
"loss": 0.0705,
"step": 18650
},
{
"epoch": 9.13,
"learning_rate": 1.0876344086021507e-05,
"loss": 0.049,
"step": 18675
},
{
"epoch": 9.14,
"learning_rate": 1.0864125122189639e-05,
"loss": 0.0568,
"step": 18700
},
{
"epoch": 9.15,
"learning_rate": 1.0851906158357773e-05,
"loss": 0.0547,
"step": 18725
},
{
"epoch": 9.16,
"learning_rate": 1.0839687194525905e-05,
"loss": 0.0629,
"step": 18750
},
{
"epoch": 9.18,
"learning_rate": 1.0827468230694038e-05,
"loss": 0.0647,
"step": 18775
},
{
"epoch": 9.19,
"learning_rate": 1.0815249266862172e-05,
"loss": 0.0692,
"step": 18800
},
{
"epoch": 9.2,
"learning_rate": 1.0803030303030304e-05,
"loss": 0.0495,
"step": 18825
},
{
"epoch": 9.21,
"learning_rate": 1.0790811339198437e-05,
"loss": 0.0628,
"step": 18850
},
{
"epoch": 9.23,
"learning_rate": 1.077859237536657e-05,
"loss": 0.0563,
"step": 18875
},
{
"epoch": 9.24,
"learning_rate": 1.0766373411534703e-05,
"loss": 0.0657,
"step": 18900
},
{
"epoch": 9.25,
"learning_rate": 1.0754154447702836e-05,
"loss": 0.0535,
"step": 18925
},
{
"epoch": 9.26,
"learning_rate": 1.0741935483870968e-05,
"loss": 0.0667,
"step": 18950
},
{
"epoch": 9.27,
"learning_rate": 1.0729716520039102e-05,
"loss": 0.0594,
"step": 18975
},
{
"epoch": 9.29,
"learning_rate": 1.0717497556207235e-05,
"loss": 0.0628,
"step": 19000
},
{
"epoch": 9.3,
"learning_rate": 1.0705278592375367e-05,
"loss": 0.058,
"step": 19025
},
{
"epoch": 9.31,
"learning_rate": 1.0693059628543501e-05,
"loss": 0.0677,
"step": 19050
},
{
"epoch": 9.32,
"learning_rate": 1.0680840664711633e-05,
"loss": 0.0623,
"step": 19075
},
{
"epoch": 9.34,
"learning_rate": 1.0668621700879766e-05,
"loss": 0.0631,
"step": 19100
},
{
"epoch": 9.35,
"learning_rate": 1.06564027370479e-05,
"loss": 0.0616,
"step": 19125
},
{
"epoch": 9.36,
"learning_rate": 1.0644183773216032e-05,
"loss": 0.0613,
"step": 19150
},
{
"epoch": 9.37,
"learning_rate": 1.0631964809384165e-05,
"loss": 0.066,
"step": 19175
},
{
"epoch": 9.38,
"learning_rate": 1.0619745845552299e-05,
"loss": 0.0653,
"step": 19200
},
{
"epoch": 9.4,
"learning_rate": 1.0607526881720431e-05,
"loss": 0.0701,
"step": 19225
},
{
"epoch": 9.41,
"learning_rate": 1.0595307917888564e-05,
"loss": 0.0612,
"step": 19250
},
{
"epoch": 9.42,
"learning_rate": 1.0583088954056698e-05,
"loss": 0.055,
"step": 19275
},
{
"epoch": 9.43,
"learning_rate": 1.057086999022483e-05,
"loss": 0.0603,
"step": 19300
},
{
"epoch": 9.45,
"learning_rate": 1.0558651026392963e-05,
"loss": 0.0648,
"step": 19325
},
{
"epoch": 9.46,
"learning_rate": 1.0546432062561097e-05,
"loss": 0.0659,
"step": 19350
},
{
"epoch": 9.47,
"learning_rate": 1.0534213098729229e-05,
"loss": 0.0548,
"step": 19375
},
{
"epoch": 9.48,
"learning_rate": 1.0521994134897361e-05,
"loss": 0.0605,
"step": 19400
},
{
"epoch": 9.49,
"learning_rate": 1.0509775171065496e-05,
"loss": 0.0542,
"step": 19425
},
{
"epoch": 9.51,
"learning_rate": 1.0497556207233628e-05,
"loss": 0.0647,
"step": 19450
},
{
"epoch": 9.52,
"learning_rate": 1.048533724340176e-05,
"loss": 0.0677,
"step": 19475
},
{
"epoch": 9.53,
"learning_rate": 1.0473118279569894e-05,
"loss": 0.06,
"step": 19500
},
{
"epoch": 9.54,
"learning_rate": 1.0460899315738027e-05,
"loss": 0.0664,
"step": 19525
},
{
"epoch": 9.56,
"learning_rate": 1.044868035190616e-05,
"loss": 0.0666,
"step": 19550
},
{
"epoch": 9.57,
"learning_rate": 1.0436461388074293e-05,
"loss": 0.0576,
"step": 19575
},
{
"epoch": 9.58,
"learning_rate": 1.0424242424242426e-05,
"loss": 0.0586,
"step": 19600
},
{
"epoch": 9.59,
"learning_rate": 1.0412023460410558e-05,
"loss": 0.0631,
"step": 19625
},
{
"epoch": 9.6,
"learning_rate": 1.0399804496578692e-05,
"loss": 0.0688,
"step": 19650
},
{
"epoch": 9.62,
"learning_rate": 1.0387585532746825e-05,
"loss": 0.0619,
"step": 19675
},
{
"epoch": 9.63,
"learning_rate": 1.0375366568914957e-05,
"loss": 0.0695,
"step": 19700
},
{
"epoch": 9.64,
"learning_rate": 1.0363147605083091e-05,
"loss": 0.0694,
"step": 19725
},
{
"epoch": 9.65,
"learning_rate": 1.0350928641251224e-05,
"loss": 0.0654,
"step": 19750
},
{
"epoch": 9.67,
"learning_rate": 1.0338709677419356e-05,
"loss": 0.0688,
"step": 19775
},
{
"epoch": 9.68,
"learning_rate": 1.032649071358749e-05,
"loss": 0.0657,
"step": 19800
},
{
"epoch": 9.69,
"learning_rate": 1.0314271749755622e-05,
"loss": 0.0575,
"step": 19825
},
{
"epoch": 9.7,
"learning_rate": 1.0302052785923755e-05,
"loss": 0.0545,
"step": 19850
},
{
"epoch": 9.71,
"learning_rate": 1.0289833822091889e-05,
"loss": 0.0694,
"step": 19875
},
{
"epoch": 9.73,
"learning_rate": 1.0277614858260021e-05,
"loss": 0.0706,
"step": 19900
},
{
"epoch": 9.74,
"learning_rate": 1.0265395894428154e-05,
"loss": 0.061,
"step": 19925
},
{
"epoch": 9.75,
"learning_rate": 1.0253176930596286e-05,
"loss": 0.064,
"step": 19950
},
{
"epoch": 9.76,
"learning_rate": 1.024095796676442e-05,
"loss": 0.0629,
"step": 19975
},
{
"epoch": 9.78,
"learning_rate": 1.0228739002932553e-05,
"loss": 0.0654,
"step": 20000
},
{
"epoch": 9.79,
"learning_rate": 1.0216520039100685e-05,
"loss": 0.0594,
"step": 20025
},
{
"epoch": 9.8,
"learning_rate": 1.0204301075268819e-05,
"loss": 0.0644,
"step": 20050
},
{
"epoch": 9.81,
"learning_rate": 1.0192082111436951e-05,
"loss": 0.0781,
"step": 20075
},
{
"epoch": 9.82,
"learning_rate": 1.0179863147605084e-05,
"loss": 0.0659,
"step": 20100
},
{
"epoch": 9.84,
"learning_rate": 1.0167644183773218e-05,
"loss": 0.072,
"step": 20125
},
{
"epoch": 9.85,
"learning_rate": 1.015542521994135e-05,
"loss": 0.0661,
"step": 20150
},
{
"epoch": 9.86,
"learning_rate": 1.0143206256109483e-05,
"loss": 0.0615,
"step": 20175
},
{
"epoch": 9.87,
"learning_rate": 1.0130987292277617e-05,
"loss": 0.0625,
"step": 20200
},
{
"epoch": 9.89,
"learning_rate": 1.011876832844575e-05,
"loss": 0.0634,
"step": 20225
},
{
"epoch": 9.9,
"learning_rate": 1.0106549364613882e-05,
"loss": 0.0616,
"step": 20250
},
{
"epoch": 9.91,
"learning_rate": 1.0094330400782016e-05,
"loss": 0.0602,
"step": 20275
},
{
"epoch": 9.92,
"learning_rate": 1.0082111436950148e-05,
"loss": 0.0739,
"step": 20300
},
{
"epoch": 9.93,
"learning_rate": 1.006989247311828e-05,
"loss": 0.0654,
"step": 20325
},
{
"epoch": 9.95,
"learning_rate": 1.0057673509286415e-05,
"loss": 0.0724,
"step": 20350
},
{
"epoch": 9.96,
"learning_rate": 1.0045454545454547e-05,
"loss": 0.0735,
"step": 20375
},
{
"epoch": 9.97,
"learning_rate": 1.003323558162268e-05,
"loss": 0.0652,
"step": 20400
},
{
"epoch": 9.98,
"learning_rate": 1.0021016617790814e-05,
"loss": 0.0694,
"step": 20425
},
{
"epoch": 10.0,
"learning_rate": 1.0008797653958946e-05,
"loss": 0.071,
"step": 20450
},
{
"epoch": 10.01,
"learning_rate": 9.996578690127078e-06,
"loss": 0.0609,
"step": 20475
},
{
"epoch": 10.02,
"learning_rate": 9.98435972629521e-06,
"loss": 0.0383,
"step": 20500
},
{
"epoch": 10.03,
"learning_rate": 9.972140762463343e-06,
"loss": 0.0541,
"step": 20525
},
{
"epoch": 10.04,
"learning_rate": 9.959921798631477e-06,
"loss": 0.0552,
"step": 20550
},
{
"epoch": 10.06,
"learning_rate": 9.94770283479961e-06,
"loss": 0.052,
"step": 20575
},
{
"epoch": 10.07,
"learning_rate": 9.935483870967742e-06,
"loss": 0.0512,
"step": 20600
},
{
"epoch": 10.08,
"learning_rate": 9.923264907135876e-06,
"loss": 0.0453,
"step": 20625
},
{
"epoch": 10.09,
"learning_rate": 9.911045943304009e-06,
"loss": 0.0465,
"step": 20650
},
{
"epoch": 10.11,
"learning_rate": 9.898826979472141e-06,
"loss": 0.0522,
"step": 20675
},
{
"epoch": 10.12,
"learning_rate": 9.886608015640275e-06,
"loss": 0.054,
"step": 20700
},
{
"epoch": 10.13,
"learning_rate": 9.874389051808407e-06,
"loss": 0.0575,
"step": 20725
},
{
"epoch": 10.14,
"learning_rate": 9.86217008797654e-06,
"loss": 0.0526,
"step": 20750
},
{
"epoch": 10.15,
"learning_rate": 9.849951124144674e-06,
"loss": 0.0572,
"step": 20775
},
{
"epoch": 10.17,
"learning_rate": 9.837732160312806e-06,
"loss": 0.0539,
"step": 20800
},
{
"epoch": 10.18,
"learning_rate": 9.825513196480939e-06,
"loss": 0.0505,
"step": 20825
},
{
"epoch": 10.19,
"learning_rate": 9.813294232649073e-06,
"loss": 0.0607,
"step": 20850
},
{
"epoch": 10.2,
"learning_rate": 9.801075268817205e-06,
"loss": 0.0487,
"step": 20875
},
{
"epoch": 10.22,
"learning_rate": 9.788856304985338e-06,
"loss": 0.0438,
"step": 20900
},
{
"epoch": 10.23,
"learning_rate": 9.776637341153472e-06,
"loss": 0.0585,
"step": 20925
},
{
"epoch": 10.24,
"learning_rate": 9.764418377321604e-06,
"loss": 0.0608,
"step": 20950
},
{
"epoch": 10.25,
"learning_rate": 9.752199413489737e-06,
"loss": 0.0421,
"step": 20975
},
{
"epoch": 10.26,
"learning_rate": 9.73998044965787e-06,
"loss": 0.0511,
"step": 21000
},
{
"epoch": 10.28,
"learning_rate": 9.727761485826003e-06,
"loss": 0.0517,
"step": 21025
},
{
"epoch": 10.29,
"learning_rate": 9.715542521994135e-06,
"loss": 0.0639,
"step": 21050
},
{
"epoch": 10.3,
"learning_rate": 9.703323558162268e-06,
"loss": 0.0544,
"step": 21075
},
{
"epoch": 10.31,
"learning_rate": 9.691104594330402e-06,
"loss": 0.049,
"step": 21100
},
{
"epoch": 10.33,
"learning_rate": 9.678885630498534e-06,
"loss": 0.0522,
"step": 21125
},
{
"epoch": 10.34,
"learning_rate": 9.666666666666667e-06,
"loss": 0.0596,
"step": 21150
},
{
"epoch": 10.35,
"learning_rate": 9.6544477028348e-06,
"loss": 0.0452,
"step": 21175
},
{
"epoch": 10.36,
"learning_rate": 9.642228739002933e-06,
"loss": 0.0555,
"step": 21200
},
{
"epoch": 10.37,
"learning_rate": 9.630009775171066e-06,
"loss": 0.0507,
"step": 21225
},
{
"epoch": 10.39,
"learning_rate": 9.6177908113392e-06,
"loss": 0.0371,
"step": 21250
},
{
"epoch": 10.4,
"learning_rate": 9.605571847507332e-06,
"loss": 0.0534,
"step": 21275
},
{
"epoch": 10.41,
"learning_rate": 9.593352883675465e-06,
"loss": 0.0482,
"step": 21300
},
{
"epoch": 10.42,
"learning_rate": 9.581133919843599e-06,
"loss": 0.0473,
"step": 21325
},
{
"epoch": 10.43,
"learning_rate": 9.568914956011731e-06,
"loss": 0.0538,
"step": 21350
},
{
"epoch": 10.45,
"learning_rate": 9.556695992179863e-06,
"loss": 0.0545,
"step": 21375
},
{
"epoch": 10.46,
"learning_rate": 9.544477028347998e-06,
"loss": 0.0528,
"step": 21400
},
{
"epoch": 10.47,
"learning_rate": 9.53225806451613e-06,
"loss": 0.0476,
"step": 21425
},
{
"epoch": 10.48,
"learning_rate": 9.520039100684262e-06,
"loss": 0.0582,
"step": 21450
},
{
"epoch": 10.5,
"learning_rate": 9.507820136852396e-06,
"loss": 0.05,
"step": 21475
},
{
"epoch": 10.51,
"learning_rate": 9.495601173020529e-06,
"loss": 0.0498,
"step": 21500
},
{
"epoch": 10.52,
"learning_rate": 9.483382209188661e-06,
"loss": 0.0537,
"step": 21525
},
{
"epoch": 10.53,
"learning_rate": 9.471163245356795e-06,
"loss": 0.0596,
"step": 21550
},
{
"epoch": 10.54,
"learning_rate": 9.458944281524928e-06,
"loss": 0.0495,
"step": 21575
},
{
"epoch": 10.56,
"learning_rate": 9.44672531769306e-06,
"loss": 0.056,
"step": 21600
},
{
"epoch": 10.57,
"learning_rate": 9.434506353861194e-06,
"loss": 0.0493,
"step": 21625
},
{
"epoch": 10.58,
"learning_rate": 9.422776148582601e-06,
"loss": 0.0561,
"step": 21650
},
{
"epoch": 10.59,
"learning_rate": 9.410557184750734e-06,
"loss": 0.0483,
"step": 21675
},
{
"epoch": 10.61,
"learning_rate": 9.398338220918866e-06,
"loss": 0.0545,
"step": 21700
},
{
"epoch": 10.62,
"learning_rate": 9.386119257087e-06,
"loss": 0.0519,
"step": 21725
},
{
"epoch": 10.63,
"learning_rate": 9.373900293255133e-06,
"loss": 0.051,
"step": 21750
},
{
"epoch": 10.64,
"learning_rate": 9.361681329423265e-06,
"loss": 0.054,
"step": 21775
},
{
"epoch": 10.65,
"learning_rate": 9.349462365591399e-06,
"loss": 0.0594,
"step": 21800
},
{
"epoch": 10.67,
"learning_rate": 9.337243401759531e-06,
"loss": 0.0561,
"step": 21825
},
{
"epoch": 10.68,
"learning_rate": 9.325024437927664e-06,
"loss": 0.0607,
"step": 21850
},
{
"epoch": 10.69,
"learning_rate": 9.312805474095798e-06,
"loss": 0.0416,
"step": 21875
},
{
"epoch": 10.7,
"learning_rate": 9.30058651026393e-06,
"loss": 0.053,
"step": 21900
},
{
"epoch": 10.72,
"learning_rate": 9.288367546432063e-06,
"loss": 0.0555,
"step": 21925
},
{
"epoch": 10.73,
"learning_rate": 9.276148582600197e-06,
"loss": 0.0633,
"step": 21950
},
{
"epoch": 10.74,
"learning_rate": 9.26392961876833e-06,
"loss": 0.0621,
"step": 21975
},
{
"epoch": 10.75,
"learning_rate": 9.251710654936462e-06,
"loss": 0.052,
"step": 22000
},
{
"epoch": 10.76,
"learning_rate": 9.239491691104596e-06,
"loss": 0.0548,
"step": 22025
},
{
"epoch": 10.78,
"learning_rate": 9.227272727272728e-06,
"loss": 0.06,
"step": 22050
},
{
"epoch": 10.79,
"learning_rate": 9.21505376344086e-06,
"loss": 0.0528,
"step": 22075
},
{
"epoch": 10.8,
"learning_rate": 9.202834799608995e-06,
"loss": 0.06,
"step": 22100
},
{
"epoch": 10.81,
"learning_rate": 9.190615835777127e-06,
"loss": 0.0561,
"step": 22125
},
{
"epoch": 10.83,
"learning_rate": 9.17839687194526e-06,
"loss": 0.057,
"step": 22150
},
{
"epoch": 10.84,
"learning_rate": 9.166177908113393e-06,
"loss": 0.0546,
"step": 22175
},
{
"epoch": 10.85,
"learning_rate": 9.153958944281526e-06,
"loss": 0.0549,
"step": 22200
},
{
"epoch": 10.86,
"learning_rate": 9.141739980449658e-06,
"loss": 0.0615,
"step": 22225
},
{
"epoch": 10.87,
"learning_rate": 9.129521016617792e-06,
"loss": 0.0557,
"step": 22250
},
{
"epoch": 10.89,
"learning_rate": 9.117302052785925e-06,
"loss": 0.0605,
"step": 22275
},
{
"epoch": 10.9,
"learning_rate": 9.105083088954057e-06,
"loss": 0.0611,
"step": 22300
},
{
"epoch": 10.91,
"learning_rate": 9.092864125122191e-06,
"loss": 0.0596,
"step": 22325
},
{
"epoch": 10.92,
"learning_rate": 9.080645161290324e-06,
"loss": 0.057,
"step": 22350
},
{
"epoch": 10.94,
"learning_rate": 9.068426197458456e-06,
"loss": 0.0543,
"step": 22375
},
{
"epoch": 10.95,
"learning_rate": 9.05620723362659e-06,
"loss": 0.0604,
"step": 22400
},
{
"epoch": 10.96,
"learning_rate": 9.043988269794723e-06,
"loss": 0.0546,
"step": 22425
},
{
"epoch": 10.97,
"learning_rate": 9.031769305962855e-06,
"loss": 0.0552,
"step": 22450
},
{
"epoch": 10.98,
"learning_rate": 9.019550342130989e-06,
"loss": 0.065,
"step": 22475
},
{
"epoch": 11.0,
"learning_rate": 9.007331378299121e-06,
"loss": 0.0511,
"step": 22500
},
{
"epoch": 11.01,
"learning_rate": 8.995112414467254e-06,
"loss": 0.0367,
"step": 22525
},
{
"epoch": 11.02,
"learning_rate": 8.982893450635386e-06,
"loss": 0.0531,
"step": 22550
},
{
"epoch": 11.03,
"learning_rate": 8.97067448680352e-06,
"loss": 0.0524,
"step": 22575
},
{
"epoch": 11.05,
"learning_rate": 8.958455522971653e-06,
"loss": 0.0442,
"step": 22600
},
{
"epoch": 11.06,
"learning_rate": 8.946236559139785e-06,
"loss": 0.0443,
"step": 22625
},
{
"epoch": 11.07,
"learning_rate": 8.93401759530792e-06,
"loss": 0.0395,
"step": 22650
},
{
"epoch": 11.08,
"learning_rate": 8.921798631476052e-06,
"loss": 0.0429,
"step": 22675
},
{
"epoch": 11.09,
"learning_rate": 8.909579667644184e-06,
"loss": 0.0411,
"step": 22700
},
{
"epoch": 11.11,
"learning_rate": 8.897360703812318e-06,
"loss": 0.0516,
"step": 22725
},
{
"epoch": 11.12,
"learning_rate": 8.88514173998045e-06,
"loss": 0.0428,
"step": 22750
},
{
"epoch": 11.13,
"learning_rate": 8.872922776148583e-06,
"loss": 0.0367,
"step": 22775
},
{
"epoch": 11.14,
"learning_rate": 8.860703812316717e-06,
"loss": 0.045,
"step": 22800
},
{
"epoch": 11.16,
"learning_rate": 8.84848484848485e-06,
"loss": 0.0447,
"step": 22825
},
{
"epoch": 11.17,
"learning_rate": 8.836265884652982e-06,
"loss": 0.0475,
"step": 22850
},
{
"epoch": 11.18,
"learning_rate": 8.824046920821116e-06,
"loss": 0.0441,
"step": 22875
},
{
"epoch": 11.19,
"learning_rate": 8.811827956989248e-06,
"loss": 0.0416,
"step": 22900
},
{
"epoch": 11.2,
"learning_rate": 8.79960899315738e-06,
"loss": 0.0555,
"step": 22925
},
{
"epoch": 11.22,
"learning_rate": 8.787390029325515e-06,
"loss": 0.035,
"step": 22950
},
{
"epoch": 11.23,
"learning_rate": 8.775171065493647e-06,
"loss": 0.0486,
"step": 22975
},
{
"epoch": 11.24,
"learning_rate": 8.76295210166178e-06,
"loss": 0.0478,
"step": 23000
},
{
"epoch": 11.25,
"learning_rate": 8.750733137829914e-06,
"loss": 0.0494,
"step": 23025
},
{
"epoch": 11.27,
"learning_rate": 8.738514173998046e-06,
"loss": 0.0452,
"step": 23050
},
{
"epoch": 11.28,
"learning_rate": 8.726295210166179e-06,
"loss": 0.0398,
"step": 23075
},
{
"epoch": 11.29,
"learning_rate": 8.714076246334313e-06,
"loss": 0.0438,
"step": 23100
},
{
"epoch": 11.3,
"learning_rate": 8.701857282502445e-06,
"loss": 0.0479,
"step": 23125
},
{
"epoch": 11.31,
"learning_rate": 8.689638318670577e-06,
"loss": 0.0484,
"step": 23150
},
{
"epoch": 11.33,
"learning_rate": 8.677419354838712e-06,
"loss": 0.04,
"step": 23175
},
{
"epoch": 11.34,
"learning_rate": 8.665200391006844e-06,
"loss": 0.043,
"step": 23200
},
{
"epoch": 11.35,
"learning_rate": 8.652981427174976e-06,
"loss": 0.0434,
"step": 23225
},
{
"epoch": 11.36,
"learning_rate": 8.64076246334311e-06,
"loss": 0.0462,
"step": 23250
},
{
"epoch": 11.38,
"learning_rate": 8.628543499511243e-06,
"loss": 0.0479,
"step": 23275
},
{
"epoch": 11.39,
"learning_rate": 8.616324535679375e-06,
"loss": 0.0419,
"step": 23300
},
{
"epoch": 11.4,
"learning_rate": 8.604105571847508e-06,
"loss": 0.0459,
"step": 23325
},
{
"epoch": 11.41,
"learning_rate": 8.59188660801564e-06,
"loss": 0.0449,
"step": 23350
},
{
"epoch": 11.42,
"learning_rate": 8.579667644183774e-06,
"loss": 0.0398,
"step": 23375
},
{
"epoch": 11.44,
"learning_rate": 8.567448680351907e-06,
"loss": 0.0443,
"step": 23400
},
{
"epoch": 11.45,
"learning_rate": 8.555229716520039e-06,
"loss": 0.0518,
"step": 23425
},
{
"epoch": 11.46,
"learning_rate": 8.543010752688173e-06,
"loss": 0.0377,
"step": 23450
},
{
"epoch": 11.47,
"learning_rate": 8.530791788856305e-06,
"loss": 0.0547,
"step": 23475
},
{
"epoch": 11.49,
"learning_rate": 8.518572825024438e-06,
"loss": 0.045,
"step": 23500
},
{
"epoch": 11.5,
"learning_rate": 8.506353861192572e-06,
"loss": 0.0444,
"step": 23525
},
{
"epoch": 11.51,
"learning_rate": 8.494134897360704e-06,
"loss": 0.0483,
"step": 23550
},
{
"epoch": 11.52,
"learning_rate": 8.481915933528837e-06,
"loss": 0.0436,
"step": 23575
},
{
"epoch": 11.53,
"learning_rate": 8.46969696969697e-06,
"loss": 0.0505,
"step": 23600
},
{
"epoch": 11.55,
"learning_rate": 8.457478005865103e-06,
"loss": 0.0395,
"step": 23625
},
{
"epoch": 11.56,
"learning_rate": 8.445259042033236e-06,
"loss": 0.0417,
"step": 23650
},
{
"epoch": 11.57,
"learning_rate": 8.433040078201368e-06,
"loss": 0.0577,
"step": 23675
},
{
"epoch": 11.58,
"learning_rate": 8.420821114369502e-06,
"loss": 0.0476,
"step": 23700
},
{
"epoch": 11.6,
"learning_rate": 8.408602150537634e-06,
"loss": 0.0415,
"step": 23725
},
{
"epoch": 11.61,
"learning_rate": 8.396383186705767e-06,
"loss": 0.0455,
"step": 23750
},
{
"epoch": 11.62,
"learning_rate": 8.384164222873901e-06,
"loss": 0.0572,
"step": 23775
},
{
"epoch": 11.63,
"learning_rate": 8.371945259042033e-06,
"loss": 0.0442,
"step": 23800
},
{
"epoch": 11.64,
"learning_rate": 8.359726295210166e-06,
"loss": 0.0376,
"step": 23825
},
{
"epoch": 11.66,
"learning_rate": 8.3475073313783e-06,
"loss": 0.0455,
"step": 23850
},
{
"epoch": 11.67,
"learning_rate": 8.335288367546432e-06,
"loss": 0.0514,
"step": 23875
},
{
"epoch": 11.68,
"learning_rate": 8.323069403714565e-06,
"loss": 0.0496,
"step": 23900
},
{
"epoch": 11.69,
"learning_rate": 8.310850439882699e-06,
"loss": 0.0498,
"step": 23925
},
{
"epoch": 11.71,
"learning_rate": 8.298631476050831e-06,
"loss": 0.0479,
"step": 23950
},
{
"epoch": 11.72,
"learning_rate": 8.286412512218964e-06,
"loss": 0.0423,
"step": 23975
},
{
"epoch": 11.73,
"learning_rate": 8.274193548387098e-06,
"loss": 0.0444,
"step": 24000
},
{
"epoch": 11.74,
"learning_rate": 8.26197458455523e-06,
"loss": 0.0482,
"step": 24025
},
{
"epoch": 11.75,
"learning_rate": 8.249755620723362e-06,
"loss": 0.0502,
"step": 24050
},
{
"epoch": 11.77,
"learning_rate": 8.237536656891497e-06,
"loss": 0.0503,
"step": 24075
},
{
"epoch": 11.78,
"learning_rate": 8.225317693059629e-06,
"loss": 0.0503,
"step": 24100
},
{
"epoch": 11.79,
"learning_rate": 8.213098729227761e-06,
"loss": 0.0424,
"step": 24125
},
{
"epoch": 11.8,
"learning_rate": 8.200879765395895e-06,
"loss": 0.0543,
"step": 24150
},
{
"epoch": 11.82,
"learning_rate": 8.188660801564028e-06,
"loss": 0.0505,
"step": 24175
},
{
"epoch": 11.83,
"learning_rate": 8.17644183773216e-06,
"loss": 0.0513,
"step": 24200
},
{
"epoch": 11.84,
"learning_rate": 8.164222873900294e-06,
"loss": 0.0537,
"step": 24225
},
{
"epoch": 11.85,
"learning_rate": 8.152003910068427e-06,
"loss": 0.0449,
"step": 24250
},
{
"epoch": 11.86,
"learning_rate": 8.139784946236559e-06,
"loss": 0.0469,
"step": 24275
},
{
"epoch": 11.88,
"learning_rate": 8.127565982404693e-06,
"loss": 0.055,
"step": 24300
},
{
"epoch": 11.89,
"learning_rate": 8.115347018572826e-06,
"loss": 0.0455,
"step": 24325
},
{
"epoch": 11.9,
"learning_rate": 8.103128054740958e-06,
"loss": 0.049,
"step": 24350
},
{
"epoch": 11.91,
"learning_rate": 8.090909090909092e-06,
"loss": 0.0457,
"step": 24375
},
{
"epoch": 11.93,
"learning_rate": 8.078690127077225e-06,
"loss": 0.0512,
"step": 24400
},
{
"epoch": 11.94,
"learning_rate": 8.066471163245357e-06,
"loss": 0.052,
"step": 24425
},
{
"epoch": 11.95,
"learning_rate": 8.054252199413491e-06,
"loss": 0.0468,
"step": 24450
},
{
"epoch": 11.96,
"learning_rate": 8.042033235581623e-06,
"loss": 0.0451,
"step": 24475
},
{
"epoch": 11.97,
"learning_rate": 8.029814271749756e-06,
"loss": 0.0443,
"step": 24500
},
{
"epoch": 11.99,
"learning_rate": 8.01759530791789e-06,
"loss": 0.0454,
"step": 24525
},
{
"epoch": 12.0,
"learning_rate": 8.005376344086022e-06,
"loss": 0.0415,
"step": 24550
},
{
"epoch": 12.01,
"learning_rate": 7.993157380254155e-06,
"loss": 0.0313,
"step": 24575
},
{
"epoch": 12.02,
"learning_rate": 7.980938416422289e-06,
"loss": 0.0428,
"step": 24600
},
{
"epoch": 12.04,
"learning_rate": 7.968719452590421e-06,
"loss": 0.0386,
"step": 24625
},
{
"epoch": 12.05,
"learning_rate": 7.956500488758554e-06,
"loss": 0.0357,
"step": 24650
},
{
"epoch": 12.06,
"learning_rate": 7.944281524926688e-06,
"loss": 0.0376,
"step": 24675
},
{
"epoch": 12.07,
"learning_rate": 7.93206256109482e-06,
"loss": 0.0398,
"step": 24700
},
{
"epoch": 12.08,
"learning_rate": 7.919843597262953e-06,
"loss": 0.0413,
"step": 24725
},
{
"epoch": 12.1,
"learning_rate": 7.907624633431087e-06,
"loss": 0.0261,
"step": 24750
},
{
"epoch": 12.11,
"learning_rate": 7.895405669599219e-06,
"loss": 0.0375,
"step": 24775
},
{
"epoch": 12.12,
"learning_rate": 7.883186705767351e-06,
"loss": 0.0396,
"step": 24800
},
{
"epoch": 12.13,
"learning_rate": 7.870967741935484e-06,
"loss": 0.0356,
"step": 24825
},
{
"epoch": 12.15,
"learning_rate": 7.858748778103618e-06,
"loss": 0.0284,
"step": 24850
},
{
"epoch": 12.16,
"learning_rate": 7.84652981427175e-06,
"loss": 0.0369,
"step": 24875
},
{
"epoch": 12.17,
"learning_rate": 7.834310850439883e-06,
"loss": 0.033,
"step": 24900
},
{
"epoch": 12.18,
"learning_rate": 7.822091886608017e-06,
"loss": 0.0361,
"step": 24925
},
{
"epoch": 12.19,
"learning_rate": 7.80987292277615e-06,
"loss": 0.0384,
"step": 24950
},
{
"epoch": 12.21,
"learning_rate": 7.797653958944282e-06,
"loss": 0.0418,
"step": 24975
},
{
"epoch": 12.22,
"learning_rate": 7.785434995112416e-06,
"loss": 0.0395,
"step": 25000
},
{
"epoch": 12.23,
"learning_rate": 7.773216031280548e-06,
"loss": 0.0356,
"step": 25025
},
{
"epoch": 12.24,
"learning_rate": 7.76099706744868e-06,
"loss": 0.0326,
"step": 25050
},
{
"epoch": 12.26,
"learning_rate": 7.748778103616815e-06,
"loss": 0.0402,
"step": 25075
},
{
"epoch": 12.27,
"learning_rate": 7.736559139784947e-06,
"loss": 0.0372,
"step": 25100
},
{
"epoch": 12.28,
"learning_rate": 7.72434017595308e-06,
"loss": 0.0486,
"step": 25125
},
{
"epoch": 12.29,
"learning_rate": 7.712121212121213e-06,
"loss": 0.0351,
"step": 25150
},
{
"epoch": 12.3,
"learning_rate": 7.699902248289346e-06,
"loss": 0.0388,
"step": 25175
},
{
"epoch": 12.32,
"learning_rate": 7.687683284457478e-06,
"loss": 0.05,
"step": 25200
},
{
"epoch": 12.33,
"learning_rate": 7.675464320625612e-06,
"loss": 0.0448,
"step": 25225
},
{
"epoch": 12.34,
"learning_rate": 7.663245356793745e-06,
"loss": 0.0338,
"step": 25250
},
{
"epoch": 12.35,
"learning_rate": 7.651026392961877e-06,
"loss": 0.0393,
"step": 25275
},
{
"epoch": 12.37,
"learning_rate": 7.638807429130011e-06,
"loss": 0.0363,
"step": 25300
},
{
"epoch": 12.38,
"learning_rate": 7.626588465298144e-06,
"loss": 0.039,
"step": 25325
},
{
"epoch": 12.39,
"learning_rate": 7.614369501466277e-06,
"loss": 0.0343,
"step": 25350
},
{
"epoch": 12.4,
"learning_rate": 7.602150537634409e-06,
"loss": 0.0416,
"step": 25375
},
{
"epoch": 12.41,
"learning_rate": 7.5899315738025426e-06,
"loss": 0.0472,
"step": 25400
},
{
"epoch": 12.43,
"learning_rate": 7.577712609970676e-06,
"loss": 0.0384,
"step": 25425
},
{
"epoch": 12.44,
"learning_rate": 7.565493646138808e-06,
"loss": 0.0376,
"step": 25450
},
{
"epoch": 12.45,
"learning_rate": 7.5532746823069415e-06,
"loss": 0.0386,
"step": 25475
},
{
"epoch": 12.46,
"learning_rate": 7.541055718475075e-06,
"loss": 0.0413,
"step": 25500
},
{
"epoch": 12.48,
"learning_rate": 7.528836754643207e-06,
"loss": 0.0372,
"step": 25525
},
{
"epoch": 12.49,
"learning_rate": 7.51661779081134e-06,
"loss": 0.04,
"step": 25550
},
{
"epoch": 12.5,
"learning_rate": 7.504398826979474e-06,
"loss": 0.0416,
"step": 25575
},
{
"epoch": 12.51,
"learning_rate": 7.492179863147606e-06,
"loss": 0.0443,
"step": 25600
},
{
"epoch": 12.52,
"learning_rate": 7.479960899315739e-06,
"loss": 0.038,
"step": 25625
},
{
"epoch": 12.54,
"learning_rate": 7.467741935483872e-06,
"loss": 0.0452,
"step": 25650
},
{
"epoch": 12.55,
"learning_rate": 7.455522971652005e-06,
"loss": 0.0366,
"step": 25675
},
{
"epoch": 12.56,
"learning_rate": 7.443304007820138e-06,
"loss": 0.0463,
"step": 25700
},
{
"epoch": 12.57,
"learning_rate": 7.4310850439882706e-06,
"loss": 0.0447,
"step": 25725
},
{
"epoch": 12.59,
"learning_rate": 7.418866080156404e-06,
"loss": 0.0411,
"step": 25750
},
{
"epoch": 12.6,
"learning_rate": 7.406647116324537e-06,
"loss": 0.0413,
"step": 25775
},
{
"epoch": 12.61,
"learning_rate": 7.3944281524926694e-06,
"loss": 0.0322,
"step": 25800
},
{
"epoch": 12.62,
"learning_rate": 7.382697947214077e-06,
"loss": 0.0464,
"step": 25825
},
{
"epoch": 12.63,
"learning_rate": 7.37047898338221e-06,
"loss": 0.0337,
"step": 25850
},
{
"epoch": 12.65,
"learning_rate": 7.358260019550343e-06,
"loss": 0.0365,
"step": 25875
},
{
"epoch": 12.66,
"learning_rate": 7.346041055718476e-06,
"loss": 0.0432,
"step": 25900
},
{
"epoch": 12.67,
"learning_rate": 7.333822091886609e-06,
"loss": 0.0457,
"step": 25925
},
{
"epoch": 12.68,
"learning_rate": 7.321603128054742e-06,
"loss": 0.0368,
"step": 25950
},
{
"epoch": 12.7,
"learning_rate": 7.309384164222874e-06,
"loss": 0.0385,
"step": 25975
},
{
"epoch": 12.71,
"learning_rate": 7.2971652003910075e-06,
"loss": 0.044,
"step": 26000
},
{
"epoch": 12.72,
"learning_rate": 7.284946236559141e-06,
"loss": 0.0417,
"step": 26025
},
{
"epoch": 12.73,
"learning_rate": 7.272727272727273e-06,
"loss": 0.0418,
"step": 26050
},
{
"epoch": 12.74,
"learning_rate": 7.260508308895406e-06,
"loss": 0.0328,
"step": 26075
},
{
"epoch": 12.76,
"learning_rate": 7.24828934506354e-06,
"loss": 0.0343,
"step": 26100
},
{
"epoch": 12.77,
"learning_rate": 7.236070381231672e-06,
"loss": 0.0349,
"step": 26125
},
{
"epoch": 12.78,
"learning_rate": 7.223851417399805e-06,
"loss": 0.0364,
"step": 26150
},
{
"epoch": 12.79,
"learning_rate": 7.2116324535679386e-06,
"loss": 0.0358,
"step": 26175
},
{
"epoch": 12.81,
"learning_rate": 7.199413489736071e-06,
"loss": 0.0399,
"step": 26200
},
{
"epoch": 12.82,
"learning_rate": 7.187194525904204e-06,
"loss": 0.0376,
"step": 26225
},
{
"epoch": 12.83,
"learning_rate": 7.1749755620723375e-06,
"loss": 0.0396,
"step": 26250
},
{
"epoch": 12.84,
"learning_rate": 7.16275659824047e-06,
"loss": 0.0377,
"step": 26275
},
{
"epoch": 12.85,
"learning_rate": 7.150537634408603e-06,
"loss": 0.0274,
"step": 26300
},
{
"epoch": 12.87,
"learning_rate": 7.138318670576736e-06,
"loss": 0.0409,
"step": 26325
},
{
"epoch": 12.88,
"learning_rate": 7.126099706744869e-06,
"loss": 0.0409,
"step": 26350
},
{
"epoch": 12.89,
"learning_rate": 7.113880742913002e-06,
"loss": 0.0351,
"step": 26375
},
{
"epoch": 12.9,
"learning_rate": 7.101661779081135e-06,
"loss": 0.0372,
"step": 26400
},
{
"epoch": 12.92,
"learning_rate": 7.089442815249268e-06,
"loss": 0.0394,
"step": 26425
},
{
"epoch": 12.93,
"learning_rate": 7.077223851417401e-06,
"loss": 0.0409,
"step": 26450
},
{
"epoch": 12.94,
"learning_rate": 7.065004887585533e-06,
"loss": 0.0328,
"step": 26475
},
{
"epoch": 12.95,
"learning_rate": 7.0527859237536665e-06,
"loss": 0.0348,
"step": 26500
},
{
"epoch": 12.96,
"learning_rate": 7.0405669599218e-06,
"loss": 0.0355,
"step": 26525
},
{
"epoch": 12.98,
"learning_rate": 7.028347996089932e-06,
"loss": 0.043,
"step": 26550
},
{
"epoch": 12.99,
"learning_rate": 7.0161290322580654e-06,
"loss": 0.0468,
"step": 26575
},
{
"epoch": 13.0,
"learning_rate": 7.003910068426199e-06,
"loss": 0.0427,
"step": 26600
},
{
"epoch": 13.01,
"learning_rate": 6.991691104594331e-06,
"loss": 0.0343,
"step": 26625
},
{
"epoch": 13.03,
"learning_rate": 6.979472140762464e-06,
"loss": 0.0318,
"step": 26650
},
{
"epoch": 13.04,
"learning_rate": 6.9672531769305976e-06,
"loss": 0.0302,
"step": 26675
},
{
"epoch": 13.05,
"learning_rate": 6.95503421309873e-06,
"loss": 0.032,
"step": 26700
},
{
"epoch": 13.06,
"learning_rate": 6.942815249266863e-06,
"loss": 0.0327,
"step": 26725
},
{
"epoch": 13.07,
"learning_rate": 6.9305962854349965e-06,
"loss": 0.0335,
"step": 26750
},
{
"epoch": 13.09,
"learning_rate": 6.918377321603129e-06,
"loss": 0.0319,
"step": 26775
},
{
"epoch": 13.1,
"learning_rate": 6.906158357771262e-06,
"loss": 0.036,
"step": 26800
},
{
"epoch": 13.11,
"learning_rate": 6.893939393939395e-06,
"loss": 0.0346,
"step": 26825
},
{
"epoch": 13.12,
"learning_rate": 6.881720430107528e-06,
"loss": 0.0309,
"step": 26850
},
{
"epoch": 13.14,
"learning_rate": 6.86950146627566e-06,
"loss": 0.0306,
"step": 26875
},
{
"epoch": 13.15,
"learning_rate": 6.8572825024437926e-06,
"loss": 0.0311,
"step": 26900
},
{
"epoch": 13.16,
"learning_rate": 6.845063538611926e-06,
"loss": 0.0334,
"step": 26925
},
{
"epoch": 13.17,
"learning_rate": 6.832844574780059e-06,
"loss": 0.0331,
"step": 26950
},
{
"epoch": 13.18,
"learning_rate": 6.8206256109481915e-06,
"loss": 0.0408,
"step": 26975
},
{
"epoch": 13.2,
"learning_rate": 6.808406647116325e-06,
"loss": 0.0338,
"step": 27000
},
{
"epoch": 13.21,
"learning_rate": 6.796187683284458e-06,
"loss": 0.0267,
"step": 27025
},
{
"epoch": 13.22,
"learning_rate": 6.78396871945259e-06,
"loss": 0.0303,
"step": 27050
},
{
"epoch": 13.23,
"learning_rate": 6.771749755620724e-06,
"loss": 0.0434,
"step": 27075
},
{
"epoch": 13.25,
"learning_rate": 6.759530791788856e-06,
"loss": 0.029,
"step": 27100
},
{
"epoch": 13.26,
"learning_rate": 6.747311827956989e-06,
"loss": 0.032,
"step": 27125
},
{
"epoch": 13.27,
"learning_rate": 6.7350928641251225e-06,
"loss": 0.0319,
"step": 27150
},
{
"epoch": 13.28,
"learning_rate": 6.722873900293255e-06,
"loss": 0.0304,
"step": 27175
},
{
"epoch": 13.29,
"learning_rate": 6.710654936461388e-06,
"loss": 0.0315,
"step": 27200
},
{
"epoch": 13.31,
"learning_rate": 6.698435972629521e-06,
"loss": 0.0414,
"step": 27225
},
{
"epoch": 13.32,
"learning_rate": 6.686217008797654e-06,
"loss": 0.0367,
"step": 27250
},
{
"epoch": 13.33,
"learning_rate": 6.673998044965787e-06,
"loss": 0.0297,
"step": 27275
},
{
"epoch": 13.34,
"learning_rate": 6.66177908113392e-06,
"loss": 0.0301,
"step": 27300
},
{
"epoch": 13.36,
"learning_rate": 6.649560117302053e-06,
"loss": 0.033,
"step": 27325
},
{
"epoch": 13.37,
"learning_rate": 6.637341153470186e-06,
"loss": 0.0324,
"step": 27350
},
{
"epoch": 13.38,
"learning_rate": 6.625122189638319e-06,
"loss": 0.0251,
"step": 27375
},
{
"epoch": 13.39,
"learning_rate": 6.612903225806452e-06,
"loss": 0.035,
"step": 27400
},
{
"epoch": 13.4,
"learning_rate": 6.600684261974585e-06,
"loss": 0.0311,
"step": 27425
},
{
"epoch": 13.42,
"learning_rate": 6.588465298142718e-06,
"loss": 0.0347,
"step": 27450
},
{
"epoch": 13.43,
"learning_rate": 6.5762463343108505e-06,
"loss": 0.0305,
"step": 27475
},
{
"epoch": 13.44,
"learning_rate": 6.564027370478984e-06,
"loss": 0.0298,
"step": 27500
},
{
"epoch": 13.45,
"learning_rate": 6.551808406647117e-06,
"loss": 0.0313,
"step": 27525
},
{
"epoch": 13.47,
"learning_rate": 6.539589442815249e-06,
"loss": 0.0454,
"step": 27550
},
{
"epoch": 13.48,
"learning_rate": 6.527370478983383e-06,
"loss": 0.029,
"step": 27575
},
{
"epoch": 13.49,
"learning_rate": 6.515151515151516e-06,
"loss": 0.0318,
"step": 27600
},
{
"epoch": 13.5,
"learning_rate": 6.502932551319648e-06,
"loss": 0.0381,
"step": 27625
},
{
"epoch": 13.51,
"learning_rate": 6.4907135874877815e-06,
"loss": 0.03,
"step": 27650
},
{
"epoch": 13.53,
"learning_rate": 6.478494623655914e-06,
"loss": 0.0414,
"step": 27675
},
{
"epoch": 13.54,
"learning_rate": 6.466275659824047e-06,
"loss": 0.035,
"step": 27700
},
{
"epoch": 13.55,
"learning_rate": 6.45405669599218e-06,
"loss": 0.0411,
"step": 27725
},
{
"epoch": 13.56,
"learning_rate": 6.441837732160313e-06,
"loss": 0.0394,
"step": 27750
},
{
"epoch": 13.58,
"learning_rate": 6.429618768328446e-06,
"loss": 0.0287,
"step": 27775
},
{
"epoch": 13.59,
"learning_rate": 6.417399804496579e-06,
"loss": 0.029,
"step": 27800
},
{
"epoch": 13.6,
"learning_rate": 6.405180840664712e-06,
"loss": 0.0396,
"step": 27825
},
{
"epoch": 13.61,
"learning_rate": 6.392961876832845e-06,
"loss": 0.0378,
"step": 27850
},
{
"epoch": 13.62,
"learning_rate": 6.380742913000978e-06,
"loss": 0.0365,
"step": 27875
},
{
"epoch": 13.64,
"learning_rate": 6.368523949169111e-06,
"loss": 0.0385,
"step": 27900
},
{
"epoch": 13.65,
"learning_rate": 6.356304985337244e-06,
"loss": 0.04,
"step": 27925
},
{
"epoch": 13.66,
"learning_rate": 6.344086021505377e-06,
"loss": 0.0302,
"step": 27950
},
{
"epoch": 13.67,
"learning_rate": 6.332355816226784e-06,
"loss": 0.0293,
"step": 27975
},
{
"epoch": 13.69,
"learning_rate": 6.3201368523949165e-06,
"loss": 0.0422,
"step": 28000
},
{
"epoch": 13.7,
"learning_rate": 6.30791788856305e-06,
"loss": 0.035,
"step": 28025
},
{
"epoch": 13.71,
"learning_rate": 6.295698924731183e-06,
"loss": 0.0323,
"step": 28050
},
{
"epoch": 13.72,
"learning_rate": 6.283479960899315e-06,
"loss": 0.0275,
"step": 28075
},
{
"epoch": 13.73,
"learning_rate": 6.271260997067449e-06,
"loss": 0.0374,
"step": 28100
},
{
"epoch": 13.75,
"learning_rate": 6.259042033235582e-06,
"loss": 0.034,
"step": 28125
},
{
"epoch": 13.76,
"learning_rate": 6.246823069403714e-06,
"loss": 0.0326,
"step": 28150
},
{
"epoch": 13.77,
"learning_rate": 6.2346041055718476e-06,
"loss": 0.0338,
"step": 28175
},
{
"epoch": 13.78,
"learning_rate": 6.222385141739981e-06,
"loss": 0.031,
"step": 28200
},
{
"epoch": 13.8,
"learning_rate": 6.210166177908113e-06,
"loss": 0.0345,
"step": 28225
},
{
"epoch": 13.81,
"learning_rate": 6.1979472140762465e-06,
"loss": 0.0375,
"step": 28250
},
{
"epoch": 13.82,
"learning_rate": 6.18572825024438e-06,
"loss": 0.0296,
"step": 28275
},
{
"epoch": 13.83,
"learning_rate": 6.173509286412512e-06,
"loss": 0.0329,
"step": 28300
},
{
"epoch": 13.84,
"learning_rate": 6.161290322580645e-06,
"loss": 0.0301,
"step": 28325
},
{
"epoch": 13.86,
"learning_rate": 6.149071358748779e-06,
"loss": 0.034,
"step": 28350
},
{
"epoch": 13.87,
"learning_rate": 6.136852394916911e-06,
"loss": 0.034,
"step": 28375
},
{
"epoch": 13.88,
"learning_rate": 6.124633431085044e-06,
"loss": 0.0303,
"step": 28400
},
{
"epoch": 13.89,
"learning_rate": 6.1124144672531775e-06,
"loss": 0.0304,
"step": 28425
},
{
"epoch": 13.91,
"learning_rate": 6.10019550342131e-06,
"loss": 0.0281,
"step": 28450
},
{
"epoch": 13.92,
"learning_rate": 6.087976539589443e-06,
"loss": 0.0342,
"step": 28475
},
{
"epoch": 13.93,
"learning_rate": 6.0757575757575755e-06,
"loss": 0.0438,
"step": 28500
},
{
"epoch": 13.94,
"learning_rate": 6.063538611925709e-06,
"loss": 0.0241,
"step": 28525
},
{
"epoch": 13.95,
"learning_rate": 6.051319648093842e-06,
"loss": 0.0268,
"step": 28550
},
{
"epoch": 13.97,
"learning_rate": 6.0391006842619744e-06,
"loss": 0.0462,
"step": 28575
},
{
"epoch": 13.98,
"learning_rate": 6.026881720430108e-06,
"loss": 0.0292,
"step": 28600
},
{
"epoch": 13.99,
"learning_rate": 6.014662756598241e-06,
"loss": 0.0365,
"step": 28625
},
{
"epoch": 14.0,
"learning_rate": 6.002443792766373e-06,
"loss": 0.0397,
"step": 28650
},
{
"epoch": 14.02,
"learning_rate": 5.990224828934507e-06,
"loss": 0.0296,
"step": 28675
},
{
"epoch": 14.03,
"learning_rate": 5.97800586510264e-06,
"loss": 0.0288,
"step": 28700
},
{
"epoch": 14.04,
"learning_rate": 5.965786901270772e-06,
"loss": 0.0268,
"step": 28725
},
{
"epoch": 14.05,
"learning_rate": 5.9535679374389055e-06,
"loss": 0.0244,
"step": 28750
},
{
"epoch": 14.06,
"learning_rate": 5.941348973607039e-06,
"loss": 0.0288,
"step": 28775
},
{
"epoch": 14.08,
"learning_rate": 5.929130009775171e-06,
"loss": 0.0301,
"step": 28800
},
{
"epoch": 14.09,
"learning_rate": 5.916911045943304e-06,
"loss": 0.0244,
"step": 28825
},
{
"epoch": 14.1,
"learning_rate": 5.904692082111438e-06,
"loss": 0.0289,
"step": 28850
},
{
"epoch": 14.11,
"learning_rate": 5.89247311827957e-06,
"loss": 0.0195,
"step": 28875
},
{
"epoch": 14.13,
"learning_rate": 5.880254154447703e-06,
"loss": 0.0257,
"step": 28900
},
{
"epoch": 14.14,
"learning_rate": 5.8680351906158365e-06,
"loss": 0.0256,
"step": 28925
},
{
"epoch": 14.15,
"learning_rate": 5.855816226783969e-06,
"loss": 0.0317,
"step": 28950
},
{
"epoch": 14.16,
"learning_rate": 5.843597262952102e-06,
"loss": 0.0346,
"step": 28975
},
{
"epoch": 14.17,
"learning_rate": 5.831378299120235e-06,
"loss": 0.0306,
"step": 29000
},
{
"epoch": 14.19,
"learning_rate": 5.819159335288368e-06,
"loss": 0.0262,
"step": 29025
},
{
"epoch": 14.2,
"learning_rate": 5.806940371456501e-06,
"loss": 0.0255,
"step": 29050
},
{
"epoch": 14.21,
"learning_rate": 5.7947214076246335e-06,
"loss": 0.027,
"step": 29075
},
{
"epoch": 14.22,
"learning_rate": 5.782502443792767e-06,
"loss": 0.0419,
"step": 29100
},
{
"epoch": 14.24,
"learning_rate": 5.7702834799609e-06,
"loss": 0.0211,
"step": 29125
},
{
"epoch": 14.25,
"learning_rate": 5.758064516129032e-06,
"loss": 0.0332,
"step": 29150
},
{
"epoch": 14.26,
"learning_rate": 5.745845552297166e-06,
"loss": 0.0282,
"step": 29175
},
{
"epoch": 14.27,
"learning_rate": 5.733626588465299e-06,
"loss": 0.0274,
"step": 29200
},
{
"epoch": 14.28,
"learning_rate": 5.721407624633431e-06,
"loss": 0.0271,
"step": 29225
},
{
"epoch": 14.3,
"learning_rate": 5.7091886608015645e-06,
"loss": 0.0276,
"step": 29250
},
{
"epoch": 14.31,
"learning_rate": 5.696969696969698e-06,
"loss": 0.0331,
"step": 29275
},
{
"epoch": 14.32,
"learning_rate": 5.68475073313783e-06,
"loss": 0.0321,
"step": 29300
},
{
"epoch": 14.33,
"learning_rate": 5.672531769305963e-06,
"loss": 0.0253,
"step": 29325
},
{
"epoch": 14.35,
"learning_rate": 5.660312805474097e-06,
"loss": 0.0265,
"step": 29350
},
{
"epoch": 14.36,
"learning_rate": 5.648093841642229e-06,
"loss": 0.0305,
"step": 29375
},
{
"epoch": 14.37,
"learning_rate": 5.635874877810362e-06,
"loss": 0.0266,
"step": 29400
},
{
"epoch": 14.38,
"learning_rate": 5.6236559139784955e-06,
"loss": 0.0336,
"step": 29425
},
{
"epoch": 14.39,
"learning_rate": 5.611436950146628e-06,
"loss": 0.0267,
"step": 29450
},
{
"epoch": 14.41,
"learning_rate": 5.599217986314761e-06,
"loss": 0.0252,
"step": 29475
},
{
"epoch": 14.42,
"learning_rate": 5.586999022482894e-06,
"loss": 0.0246,
"step": 29500
},
{
"epoch": 14.43,
"learning_rate": 5.574780058651027e-06,
"loss": 0.0302,
"step": 29525
},
{
"epoch": 14.44,
"learning_rate": 5.56256109481916e-06,
"loss": 0.0255,
"step": 29550
},
{
"epoch": 14.46,
"learning_rate": 5.550342130987293e-06,
"loss": 0.0313,
"step": 29575
},
{
"epoch": 14.47,
"learning_rate": 5.538123167155426e-06,
"loss": 0.0289,
"step": 29600
},
{
"epoch": 14.48,
"learning_rate": 5.525904203323559e-06,
"loss": 0.0266,
"step": 29625
},
{
"epoch": 14.49,
"learning_rate": 5.513685239491691e-06,
"loss": 0.0326,
"step": 29650
},
{
"epoch": 14.5,
"learning_rate": 5.501466275659825e-06,
"loss": 0.0298,
"step": 29675
},
{
"epoch": 14.52,
"learning_rate": 5.489247311827958e-06,
"loss": 0.0218,
"step": 29700
},
{
"epoch": 14.53,
"learning_rate": 5.47702834799609e-06,
"loss": 0.0281,
"step": 29725
},
{
"epoch": 14.54,
"learning_rate": 5.4648093841642235e-06,
"loss": 0.0258,
"step": 29750
},
{
"epoch": 14.55,
"learning_rate": 5.452590420332357e-06,
"loss": 0.0259,
"step": 29775
},
{
"epoch": 14.57,
"learning_rate": 5.440371456500489e-06,
"loss": 0.0255,
"step": 29800
},
{
"epoch": 14.58,
"learning_rate": 5.428152492668622e-06,
"loss": 0.0321,
"step": 29825
},
{
"epoch": 14.59,
"learning_rate": 5.415933528836756e-06,
"loss": 0.0337,
"step": 29850
},
{
"epoch": 14.6,
"learning_rate": 5.403714565004888e-06,
"loss": 0.032,
"step": 29875
},
{
"epoch": 14.61,
"learning_rate": 5.391495601173021e-06,
"loss": 0.0374,
"step": 29900
},
{
"epoch": 14.63,
"learning_rate": 5.3792766373411545e-06,
"loss": 0.0285,
"step": 29925
},
{
"epoch": 14.64,
"learning_rate": 5.367057673509287e-06,
"loss": 0.0283,
"step": 29950
},
{
"epoch": 14.65,
"learning_rate": 5.35483870967742e-06,
"loss": 0.0249,
"step": 29975
},
{
"epoch": 14.66,
"learning_rate": 5.3426197458455534e-06,
"loss": 0.0305,
"step": 30000
},
{
"epoch": 14.67,
"learning_rate": 5.330400782013686e-06,
"loss": 0.035,
"step": 30025
},
{
"epoch": 14.69,
"learning_rate": 5.318181818181819e-06,
"loss": 0.0392,
"step": 30050
},
{
"epoch": 14.7,
"learning_rate": 5.305962854349952e-06,
"loss": 0.0269,
"step": 30075
},
{
"epoch": 14.71,
"learning_rate": 5.294232649071359e-06,
"loss": 0.0258,
"step": 30100
},
{
"epoch": 14.72,
"learning_rate": 5.282013685239492e-06,
"loss": 0.0358,
"step": 30125
},
{
"epoch": 14.74,
"learning_rate": 5.269794721407625e-06,
"loss": 0.0332,
"step": 30150
},
{
"epoch": 14.75,
"learning_rate": 5.257575757575758e-06,
"loss": 0.0273,
"step": 30175
},
{
"epoch": 14.76,
"learning_rate": 5.245356793743891e-06,
"loss": 0.0275,
"step": 30200
},
{
"epoch": 14.77,
"learning_rate": 5.233137829912024e-06,
"loss": 0.0282,
"step": 30225
},
{
"epoch": 14.78,
"learning_rate": 5.220918866080157e-06,
"loss": 0.0232,
"step": 30250
},
{
"epoch": 14.8,
"learning_rate": 5.2086999022482896e-06,
"loss": 0.0323,
"step": 30275
},
{
"epoch": 14.81,
"learning_rate": 5.196480938416423e-06,
"loss": 0.0354,
"step": 30300
},
{
"epoch": 14.82,
"learning_rate": 5.184261974584556e-06,
"loss": 0.0288,
"step": 30325
},
{
"epoch": 14.83,
"learning_rate": 5.1720430107526885e-06,
"loss": 0.032,
"step": 30350
},
{
"epoch": 14.85,
"learning_rate": 5.159824046920822e-06,
"loss": 0.0333,
"step": 30375
},
{
"epoch": 14.86,
"learning_rate": 5.147605083088955e-06,
"loss": 0.0299,
"step": 30400
},
{
"epoch": 14.87,
"learning_rate": 5.135386119257087e-06,
"loss": 0.0395,
"step": 30425
},
{
"epoch": 14.88,
"learning_rate": 5.123167155425221e-06,
"loss": 0.0292,
"step": 30450
},
{
"epoch": 14.89,
"learning_rate": 5.110948191593354e-06,
"loss": 0.0283,
"step": 30475
},
{
"epoch": 14.91,
"learning_rate": 5.098729227761486e-06,
"loss": 0.0315,
"step": 30500
},
{
"epoch": 14.92,
"learning_rate": 5.0865102639296195e-06,
"loss": 0.028,
"step": 30525
},
{
"epoch": 14.93,
"learning_rate": 5.074291300097752e-06,
"loss": 0.0343,
"step": 30550
},
{
"epoch": 14.94,
"learning_rate": 5.062072336265885e-06,
"loss": 0.0243,
"step": 30575
},
{
"epoch": 14.96,
"learning_rate": 5.049853372434018e-06,
"loss": 0.0299,
"step": 30600
},
{
"epoch": 14.97,
"learning_rate": 5.037634408602151e-06,
"loss": 0.0295,
"step": 30625
},
{
"epoch": 14.98,
"learning_rate": 5.025415444770284e-06,
"loss": 0.0278,
"step": 30650
},
{
"epoch": 14.99,
"learning_rate": 5.013196480938417e-06,
"loss": 0.038,
"step": 30675
},
{
"epoch": 15.0,
"learning_rate": 5.00097751710655e-06,
"loss": 0.038,
"step": 30700
},
{
"epoch": 15.02,
"learning_rate": 4.988758553274683e-06,
"loss": 0.0213,
"step": 30725
},
{
"epoch": 15.03,
"learning_rate": 4.976539589442816e-06,
"loss": 0.0222,
"step": 30750
},
{
"epoch": 15.04,
"learning_rate": 4.9643206256109486e-06,
"loss": 0.0252,
"step": 30775
},
{
"epoch": 15.05,
"learning_rate": 4.952101661779082e-06,
"loss": 0.026,
"step": 30800
},
{
"epoch": 15.07,
"learning_rate": 4.939882697947215e-06,
"loss": 0.0261,
"step": 30825
},
{
"epoch": 15.08,
"learning_rate": 4.9276637341153475e-06,
"loss": 0.0296,
"step": 30850
},
{
"epoch": 15.09,
"learning_rate": 4.91544477028348e-06,
"loss": 0.0262,
"step": 30875
},
{
"epoch": 15.1,
"learning_rate": 4.903225806451613e-06,
"loss": 0.0173,
"step": 30900
},
{
"epoch": 15.11,
"learning_rate": 4.891006842619746e-06,
"loss": 0.0303,
"step": 30925
},
{
"epoch": 15.13,
"learning_rate": 4.878787878787879e-06,
"loss": 0.0275,
"step": 30950
},
{
"epoch": 15.14,
"learning_rate": 4.866568914956012e-06,
"loss": 0.0257,
"step": 30975
},
{
"epoch": 15.15,
"learning_rate": 4.854349951124145e-06,
"loss": 0.0221,
"step": 31000
},
{
"epoch": 15.16,
"learning_rate": 4.842130987292278e-06,
"loss": 0.0224,
"step": 31025
},
{
"epoch": 15.18,
"learning_rate": 4.829912023460411e-06,
"loss": 0.0215,
"step": 31050
},
{
"epoch": 15.19,
"learning_rate": 4.817693059628543e-06,
"loss": 0.0303,
"step": 31075
},
{
"epoch": 15.2,
"learning_rate": 4.8054740957966766e-06,
"loss": 0.0222,
"step": 31100
},
{
"epoch": 15.21,
"learning_rate": 4.79325513196481e-06,
"loss": 0.0268,
"step": 31125
},
{
"epoch": 15.22,
"learning_rate": 4.781036168132942e-06,
"loss": 0.0276,
"step": 31150
},
{
"epoch": 15.24,
"learning_rate": 4.7688172043010755e-06,
"loss": 0.0245,
"step": 31175
},
{
"epoch": 15.25,
"learning_rate": 4.757086999022483e-06,
"loss": 0.0185,
"step": 31200
},
{
"epoch": 15.26,
"learning_rate": 4.744868035190617e-06,
"loss": 0.0214,
"step": 31225
},
{
"epoch": 15.27,
"learning_rate": 4.732649071358749e-06,
"loss": 0.0236,
"step": 31250
},
{
"epoch": 15.29,
"learning_rate": 4.720430107526882e-06,
"loss": 0.0289,
"step": 31275
},
{
"epoch": 15.3,
"learning_rate": 4.7082111436950155e-06,
"loss": 0.0335,
"step": 31300
},
{
"epoch": 15.31,
"learning_rate": 4.695992179863148e-06,
"loss": 0.0229,
"step": 31325
},
{
"epoch": 15.32,
"learning_rate": 4.683773216031281e-06,
"loss": 0.0237,
"step": 31350
},
{
"epoch": 15.33,
"learning_rate": 4.6715542521994135e-06,
"loss": 0.0186,
"step": 31375
},
{
"epoch": 15.35,
"learning_rate": 4.659335288367547e-06,
"loss": 0.0219,
"step": 31400
},
{
"epoch": 15.36,
"learning_rate": 4.64711632453568e-06,
"loss": 0.0292,
"step": 31425
},
{
"epoch": 15.37,
"learning_rate": 4.634897360703812e-06,
"loss": 0.0261,
"step": 31450
},
{
"epoch": 15.38,
"learning_rate": 4.622678396871946e-06,
"loss": 0.0301,
"step": 31475
},
{
"epoch": 15.4,
"learning_rate": 4.610459433040079e-06,
"loss": 0.0243,
"step": 31500
},
{
"epoch": 15.41,
"learning_rate": 4.598240469208211e-06,
"loss": 0.034,
"step": 31525
},
{
"epoch": 15.42,
"learning_rate": 4.5860215053763446e-06,
"loss": 0.0253,
"step": 31550
},
{
"epoch": 15.43,
"learning_rate": 4.573802541544478e-06,
"loss": 0.0265,
"step": 31575
},
{
"epoch": 15.44,
"learning_rate": 4.56158357771261e-06,
"loss": 0.0178,
"step": 31600
},
{
"epoch": 15.46,
"learning_rate": 4.5493646138807435e-06,
"loss": 0.0208,
"step": 31625
},
{
"epoch": 15.47,
"learning_rate": 4.537145650048877e-06,
"loss": 0.0264,
"step": 31650
},
{
"epoch": 15.48,
"learning_rate": 4.524926686217009e-06,
"loss": 0.0266,
"step": 31675
},
{
"epoch": 15.49,
"learning_rate": 4.512707722385142e-06,
"loss": 0.0267,
"step": 31700
},
{
"epoch": 15.51,
"learning_rate": 4.500488758553276e-06,
"loss": 0.0218,
"step": 31725
},
{
"epoch": 15.52,
"learning_rate": 4.488269794721408e-06,
"loss": 0.0252,
"step": 31750
},
{
"epoch": 15.53,
"learning_rate": 4.476050830889541e-06,
"loss": 0.023,
"step": 31775
},
{
"epoch": 15.54,
"learning_rate": 4.4638318670576745e-06,
"loss": 0.0216,
"step": 31800
},
{
"epoch": 15.55,
"learning_rate": 4.451612903225807e-06,
"loss": 0.0278,
"step": 31825
},
{
"epoch": 15.57,
"learning_rate": 4.43939393939394e-06,
"loss": 0.0224,
"step": 31850
},
{
"epoch": 15.58,
"learning_rate": 4.4271749755620725e-06,
"loss": 0.0302,
"step": 31875
},
{
"epoch": 15.59,
"learning_rate": 4.414956011730206e-06,
"loss": 0.0213,
"step": 31900
},
{
"epoch": 15.6,
"learning_rate": 4.402737047898338e-06,
"loss": 0.0243,
"step": 31925
},
{
"epoch": 15.62,
"learning_rate": 4.3905180840664714e-06,
"loss": 0.0263,
"step": 31950
},
{
"epoch": 15.63,
"learning_rate": 4.378299120234604e-06,
"loss": 0.018,
"step": 31975
},
{
"epoch": 15.64,
"learning_rate": 4.366080156402737e-06,
"loss": 0.0155,
"step": 32000
},
{
"epoch": 15.65,
"learning_rate": 4.35386119257087e-06,
"loss": 0.0368,
"step": 32025
},
{
"epoch": 15.66,
"learning_rate": 4.341642228739003e-06,
"loss": 0.0271,
"step": 32050
},
{
"epoch": 15.68,
"learning_rate": 4.329423264907136e-06,
"loss": 0.0309,
"step": 32075
},
{
"epoch": 15.69,
"learning_rate": 4.317204301075269e-06,
"loss": 0.0252,
"step": 32100
},
{
"epoch": 15.7,
"learning_rate": 4.304985337243402e-06,
"loss": 0.0353,
"step": 32125
},
{
"epoch": 15.71,
"learning_rate": 4.292766373411535e-06,
"loss": 0.0246,
"step": 32150
},
{
"epoch": 15.73,
"learning_rate": 4.280547409579668e-06,
"loss": 0.0315,
"step": 32175
},
{
"epoch": 15.74,
"learning_rate": 4.2683284457478005e-06,
"loss": 0.0209,
"step": 32200
},
{
"epoch": 15.75,
"learning_rate": 4.256109481915934e-06,
"loss": 0.0209,
"step": 32225
},
{
"epoch": 15.76,
"learning_rate": 4.243890518084067e-06,
"loss": 0.0264,
"step": 32250
},
{
"epoch": 15.77,
"learning_rate": 4.231671554252199e-06,
"loss": 0.0232,
"step": 32275
},
{
"epoch": 15.79,
"learning_rate": 4.219452590420333e-06,
"loss": 0.0259,
"step": 32300
},
{
"epoch": 15.8,
"learning_rate": 4.207233626588466e-06,
"loss": 0.0284,
"step": 32325
},
{
"epoch": 15.81,
"learning_rate": 4.195014662756598e-06,
"loss": 0.0319,
"step": 32350
},
{
"epoch": 15.82,
"learning_rate": 4.1827956989247316e-06,
"loss": 0.0208,
"step": 32375
},
{
"epoch": 15.84,
"learning_rate": 4.170576735092865e-06,
"loss": 0.0265,
"step": 32400
},
{
"epoch": 15.85,
"learning_rate": 4.158357771260997e-06,
"loss": 0.0241,
"step": 32425
},
{
"epoch": 15.86,
"learning_rate": 4.1461388074291305e-06,
"loss": 0.023,
"step": 32450
},
{
"epoch": 15.87,
"learning_rate": 4.133919843597264e-06,
"loss": 0.0263,
"step": 32475
},
{
"epoch": 15.88,
"learning_rate": 4.121700879765396e-06,
"loss": 0.0181,
"step": 32500
},
{
"epoch": 15.9,
"learning_rate": 4.109481915933529e-06,
"loss": 0.0224,
"step": 32525
},
{
"epoch": 15.91,
"learning_rate": 4.097262952101662e-06,
"loss": 0.0259,
"step": 32550
},
{
"epoch": 15.92,
"learning_rate": 4.085043988269795e-06,
"loss": 0.0242,
"step": 32575
},
{
"epoch": 15.93,
"learning_rate": 4.072825024437928e-06,
"loss": 0.0293,
"step": 32600
},
{
"epoch": 15.95,
"learning_rate": 4.060606060606061e-06,
"loss": 0.0252,
"step": 32625
},
{
"epoch": 15.96,
"learning_rate": 4.048387096774194e-06,
"loss": 0.0234,
"step": 32650
},
{
"epoch": 15.97,
"learning_rate": 4.036168132942327e-06,
"loss": 0.0224,
"step": 32675
},
{
"epoch": 15.98,
"learning_rate": 4.0239491691104595e-06,
"loss": 0.025,
"step": 32700
},
{
"epoch": 15.99,
"learning_rate": 4.011730205278593e-06,
"loss": 0.0226,
"step": 32725
},
{
"epoch": 16.01,
"learning_rate": 3.999511241446726e-06,
"loss": 0.0239,
"step": 32750
},
{
"epoch": 16.02,
"learning_rate": 3.9872922776148584e-06,
"loss": 0.0246,
"step": 32775
},
{
"epoch": 16.03,
"learning_rate": 3.975073313782992e-06,
"loss": 0.0157,
"step": 32800
},
{
"epoch": 16.04,
"learning_rate": 3.962854349951125e-06,
"loss": 0.022,
"step": 32825
},
{
"epoch": 16.06,
"learning_rate": 3.950635386119257e-06,
"loss": 0.0236,
"step": 32850
},
{
"epoch": 16.07,
"learning_rate": 3.9384164222873906e-06,
"loss": 0.023,
"step": 32875
},
{
"epoch": 16.08,
"learning_rate": 3.926197458455524e-06,
"loss": 0.034,
"step": 32900
},
{
"epoch": 16.09,
"learning_rate": 3.913978494623656e-06,
"loss": 0.0249,
"step": 32925
},
{
"epoch": 16.1,
"learning_rate": 3.9017595307917895e-06,
"loss": 0.0224,
"step": 32950
},
{
"epoch": 16.12,
"learning_rate": 3.889540566959923e-06,
"loss": 0.0261,
"step": 32975
},
{
"epoch": 16.13,
"learning_rate": 3.877321603128055e-06,
"loss": 0.0153,
"step": 33000
},
{
"epoch": 16.14,
"learning_rate": 3.865102639296188e-06,
"loss": 0.0267,
"step": 33025
},
{
"epoch": 16.15,
"learning_rate": 3.852883675464321e-06,
"loss": 0.0158,
"step": 33050
},
{
"epoch": 16.17,
"learning_rate": 3.840664711632454e-06,
"loss": 0.02,
"step": 33075
},
{
"epoch": 16.18,
"learning_rate": 3.828445747800587e-06,
"loss": 0.0227,
"step": 33100
},
{
"epoch": 16.19,
"learning_rate": 3.81622678396872e-06,
"loss": 0.0202,
"step": 33125
},
{
"epoch": 16.2,
"learning_rate": 3.8040078201368525e-06,
"loss": 0.024,
"step": 33150
},
{
"epoch": 16.21,
"learning_rate": 3.7917888563049853e-06,
"loss": 0.0255,
"step": 33175
},
{
"epoch": 16.23,
"learning_rate": 3.7795698924731186e-06,
"loss": 0.0202,
"step": 33200
},
{
"epoch": 16.24,
"learning_rate": 3.7673509286412514e-06,
"loss": 0.0299,
"step": 33225
},
{
"epoch": 16.25,
"learning_rate": 3.755131964809384e-06,
"loss": 0.0266,
"step": 33250
},
{
"epoch": 16.26,
"learning_rate": 3.742913000977517e-06,
"loss": 0.0232,
"step": 33275
},
{
"epoch": 16.28,
"learning_rate": 3.7306940371456503e-06,
"loss": 0.0193,
"step": 33300
},
{
"epoch": 16.29,
"learning_rate": 3.718475073313783e-06,
"loss": 0.0192,
"step": 33325
},
{
"epoch": 16.3,
"learning_rate": 3.706256109481916e-06,
"loss": 0.0195,
"step": 33350
},
{
"epoch": 16.31,
"learning_rate": 3.694037145650049e-06,
"loss": 0.0182,
"step": 33375
},
{
"epoch": 16.32,
"learning_rate": 3.6823069403714566e-06,
"loss": 0.0261,
"step": 33400
},
{
"epoch": 16.34,
"learning_rate": 3.67008797653959e-06,
"loss": 0.0219,
"step": 33425
},
{
"epoch": 16.35,
"learning_rate": 3.6578690127077227e-06,
"loss": 0.0293,
"step": 33450
},
{
"epoch": 16.36,
"learning_rate": 3.6456500488758555e-06,
"loss": 0.0318,
"step": 33475
},
{
"epoch": 16.37,
"learning_rate": 3.6334310850439888e-06,
"loss": 0.0198,
"step": 33500
},
{
"epoch": 16.39,
"learning_rate": 3.6212121212121216e-06,
"loss": 0.0257,
"step": 33525
},
{
"epoch": 16.4,
"learning_rate": 3.6089931573802544e-06,
"loss": 0.0192,
"step": 33550
},
{
"epoch": 16.41,
"learning_rate": 3.5967741935483872e-06,
"loss": 0.0204,
"step": 33575
},
{
"epoch": 16.42,
"learning_rate": 3.5845552297165205e-06,
"loss": 0.0221,
"step": 33600
},
{
"epoch": 16.43,
"learning_rate": 3.5723362658846533e-06,
"loss": 0.0229,
"step": 33625
},
{
"epoch": 16.45,
"learning_rate": 3.560117302052786e-06,
"loss": 0.0234,
"step": 33650
},
{
"epoch": 16.46,
"learning_rate": 3.5478983382209194e-06,
"loss": 0.0233,
"step": 33675
},
{
"epoch": 16.47,
"learning_rate": 3.535679374389052e-06,
"loss": 0.0248,
"step": 33700
},
{
"epoch": 16.48,
"learning_rate": 3.523460410557185e-06,
"loss": 0.0251,
"step": 33725
},
{
"epoch": 16.5,
"learning_rate": 3.5112414467253183e-06,
"loss": 0.0297,
"step": 33750
},
{
"epoch": 16.51,
"learning_rate": 3.499022482893451e-06,
"loss": 0.0256,
"step": 33775
},
{
"epoch": 16.52,
"learning_rate": 3.486803519061584e-06,
"loss": 0.0194,
"step": 33800
},
{
"epoch": 16.53,
"learning_rate": 3.4745845552297167e-06,
"loss": 0.026,
"step": 33825
},
{
"epoch": 16.54,
"learning_rate": 3.46236559139785e-06,
"loss": 0.0165,
"step": 33850
},
{
"epoch": 16.56,
"learning_rate": 3.450146627565983e-06,
"loss": 0.021,
"step": 33875
},
{
"epoch": 16.57,
"learning_rate": 3.4379276637341156e-06,
"loss": 0.0226,
"step": 33900
},
{
"epoch": 16.58,
"learning_rate": 3.425708699902249e-06,
"loss": 0.0273,
"step": 33925
},
{
"epoch": 16.59,
"learning_rate": 3.4134897360703817e-06,
"loss": 0.0209,
"step": 33950
},
{
"epoch": 16.61,
"learning_rate": 3.4012707722385145e-06,
"loss": 0.0228,
"step": 33975
},
{
"epoch": 16.62,
"learning_rate": 3.3890518084066478e-06,
"loss": 0.026,
"step": 34000
},
{
"epoch": 16.63,
"learning_rate": 3.3768328445747806e-06,
"loss": 0.0192,
"step": 34025
},
{
"epoch": 16.64,
"learning_rate": 3.3646138807429134e-06,
"loss": 0.0226,
"step": 34050
},
{
"epoch": 16.65,
"learning_rate": 3.3523949169110463e-06,
"loss": 0.021,
"step": 34075
},
{
"epoch": 16.67,
"learning_rate": 3.3401759530791795e-06,
"loss": 0.0185,
"step": 34100
},
{
"epoch": 16.68,
"learning_rate": 3.3279569892473123e-06,
"loss": 0.0279,
"step": 34125
},
{
"epoch": 16.69,
"learning_rate": 3.315738025415445e-06,
"loss": 0.0306,
"step": 34150
},
{
"epoch": 16.7,
"learning_rate": 3.3035190615835775e-06,
"loss": 0.033,
"step": 34175
},
{
"epoch": 16.72,
"learning_rate": 3.291300097751711e-06,
"loss": 0.0196,
"step": 34200
},
{
"epoch": 16.73,
"learning_rate": 3.2790811339198436e-06,
"loss": 0.0293,
"step": 34225
},
{
"epoch": 16.74,
"learning_rate": 3.2668621700879764e-06,
"loss": 0.0224,
"step": 34250
},
{
"epoch": 16.75,
"learning_rate": 3.2546432062561097e-06,
"loss": 0.0271,
"step": 34275
},
{
"epoch": 16.76,
"learning_rate": 3.2424242424242425e-06,
"loss": 0.0323,
"step": 34300
},
{
"epoch": 16.78,
"learning_rate": 3.2302052785923753e-06,
"loss": 0.0242,
"step": 34325
},
{
"epoch": 16.79,
"learning_rate": 3.2179863147605086e-06,
"loss": 0.0176,
"step": 34350
},
{
"epoch": 16.8,
"learning_rate": 3.2057673509286414e-06,
"loss": 0.0229,
"step": 34375
},
{
"epoch": 16.81,
"learning_rate": 3.1935483870967742e-06,
"loss": 0.022,
"step": 34400
},
{
"epoch": 16.83,
"learning_rate": 3.181329423264907e-06,
"loss": 0.0181,
"step": 34425
},
{
"epoch": 16.84,
"learning_rate": 3.1691104594330403e-06,
"loss": 0.0219,
"step": 34450
},
{
"epoch": 16.85,
"learning_rate": 3.156891495601173e-06,
"loss": 0.0192,
"step": 34475
},
{
"epoch": 16.86,
"learning_rate": 3.144672531769306e-06,
"loss": 0.0228,
"step": 34500
},
{
"epoch": 16.87,
"learning_rate": 3.132453567937439e-06,
"loss": 0.026,
"step": 34525
},
{
"epoch": 16.89,
"learning_rate": 3.120234604105572e-06,
"loss": 0.0213,
"step": 34550
},
{
"epoch": 16.9,
"learning_rate": 3.108015640273705e-06,
"loss": 0.0219,
"step": 34575
},
{
"epoch": 16.91,
"learning_rate": 3.095796676441838e-06,
"loss": 0.0241,
"step": 34600
},
{
"epoch": 16.92,
"learning_rate": 3.083577712609971e-06,
"loss": 0.0218,
"step": 34625
},
{
"epoch": 16.94,
"learning_rate": 3.0713587487781037e-06,
"loss": 0.0175,
"step": 34650
},
{
"epoch": 16.95,
"learning_rate": 3.0591397849462366e-06,
"loss": 0.0182,
"step": 34675
},
{
"epoch": 16.96,
"learning_rate": 3.04692082111437e-06,
"loss": 0.0235,
"step": 34700
},
{
"epoch": 16.97,
"learning_rate": 3.0347018572825026e-06,
"loss": 0.0176,
"step": 34725
},
{
"epoch": 16.98,
"learning_rate": 3.0224828934506355e-06,
"loss": 0.0187,
"step": 34750
},
{
"epoch": 17.0,
"learning_rate": 3.0102639296187687e-06,
"loss": 0.0206,
"step": 34775
},
{
"epoch": 17.01,
"learning_rate": 2.9980449657869015e-06,
"loss": 0.0201,
"step": 34800
},
{
"epoch": 17.02,
"learning_rate": 2.9858260019550344e-06,
"loss": 0.0172,
"step": 34825
},
{
"epoch": 17.03,
"learning_rate": 2.9736070381231676e-06,
"loss": 0.0219,
"step": 34850
},
{
"epoch": 17.05,
"learning_rate": 2.9613880742913004e-06,
"loss": 0.0164,
"step": 34875
},
{
"epoch": 17.06,
"learning_rate": 2.9491691104594332e-06,
"loss": 0.0212,
"step": 34900
},
{
"epoch": 17.07,
"learning_rate": 2.936950146627566e-06,
"loss": 0.0187,
"step": 34925
},
{
"epoch": 17.08,
"learning_rate": 2.9247311827956993e-06,
"loss": 0.0248,
"step": 34950
},
{
"epoch": 17.09,
"learning_rate": 2.912512218963832e-06,
"loss": 0.0203,
"step": 34975
},
{
"epoch": 17.11,
"learning_rate": 2.900293255131965e-06,
"loss": 0.0255,
"step": 35000
},
{
"epoch": 17.12,
"learning_rate": 2.8880742913000982e-06,
"loss": 0.0222,
"step": 35025
},
{
"epoch": 17.13,
"learning_rate": 2.875855327468231e-06,
"loss": 0.0234,
"step": 35050
},
{
"epoch": 17.14,
"learning_rate": 2.863636363636364e-06,
"loss": 0.0234,
"step": 35075
},
{
"epoch": 17.16,
"learning_rate": 2.851417399804497e-06,
"loss": 0.0174,
"step": 35100
},
{
"epoch": 17.17,
"learning_rate": 2.83919843597263e-06,
"loss": 0.0167,
"step": 35125
},
{
"epoch": 17.18,
"learning_rate": 2.8269794721407628e-06,
"loss": 0.0182,
"step": 35150
},
{
"epoch": 17.19,
"learning_rate": 2.814760508308896e-06,
"loss": 0.0146,
"step": 35175
},
{
"epoch": 17.2,
"learning_rate": 2.802541544477029e-06,
"loss": 0.0199,
"step": 35200
},
{
"epoch": 17.22,
"learning_rate": 2.7903225806451617e-06,
"loss": 0.021,
"step": 35225
},
{
"epoch": 17.23,
"learning_rate": 2.7781036168132945e-06,
"loss": 0.0173,
"step": 35250
},
{
"epoch": 17.24,
"learning_rate": 2.7658846529814277e-06,
"loss": 0.0164,
"step": 35275
},
{
"epoch": 17.25,
"learning_rate": 2.7536656891495605e-06,
"loss": 0.0172,
"step": 35300
},
{
"epoch": 17.27,
"learning_rate": 2.7414467253176934e-06,
"loss": 0.0202,
"step": 35325
},
{
"epoch": 17.28,
"learning_rate": 2.7292277614858266e-06,
"loss": 0.019,
"step": 35350
},
{
"epoch": 17.29,
"learning_rate": 2.7170087976539594e-06,
"loss": 0.0211,
"step": 35375
},
{
"epoch": 17.3,
"learning_rate": 2.7047898338220923e-06,
"loss": 0.0238,
"step": 35400
},
{
"epoch": 17.31,
"learning_rate": 2.6925708699902255e-06,
"loss": 0.0187,
"step": 35425
},
{
"epoch": 17.33,
"learning_rate": 2.680351906158358e-06,
"loss": 0.0218,
"step": 35450
},
{
"epoch": 17.34,
"learning_rate": 2.6681329423264907e-06,
"loss": 0.0138,
"step": 35475
},
{
"epoch": 17.35,
"learning_rate": 2.6559139784946236e-06,
"loss": 0.0146,
"step": 35500
},
{
"epoch": 17.36,
"learning_rate": 2.6436950146627564e-06,
"loss": 0.0183,
"step": 35525
},
{
"epoch": 17.38,
"learning_rate": 2.6314760508308896e-06,
"loss": 0.0126,
"step": 35550
},
{
"epoch": 17.39,
"learning_rate": 2.6192570869990225e-06,
"loss": 0.0127,
"step": 35575
},
{
"epoch": 17.4,
"learning_rate": 2.6070381231671553e-06,
"loss": 0.0194,
"step": 35600
},
{
"epoch": 17.41,
"learning_rate": 2.5948191593352885e-06,
"loss": 0.0206,
"step": 35625
},
{
"epoch": 17.42,
"learning_rate": 2.5826001955034214e-06,
"loss": 0.0194,
"step": 35650
},
{
"epoch": 17.44,
"learning_rate": 2.570381231671554e-06,
"loss": 0.0186,
"step": 35675
},
{
"epoch": 17.45,
"learning_rate": 2.5581622678396874e-06,
"loss": 0.031,
"step": 35700
},
{
"epoch": 17.46,
"learning_rate": 2.5459433040078202e-06,
"loss": 0.0235,
"step": 35725
},
{
"epoch": 17.47,
"learning_rate": 2.533724340175953e-06,
"loss": 0.02,
"step": 35750
},
{
"epoch": 17.49,
"learning_rate": 2.521505376344086e-06,
"loss": 0.0157,
"step": 35775
},
{
"epoch": 17.5,
"learning_rate": 2.509286412512219e-06,
"loss": 0.018,
"step": 35800
},
{
"epoch": 17.51,
"learning_rate": 2.497067448680352e-06,
"loss": 0.0209,
"step": 35825
},
{
"epoch": 17.52,
"learning_rate": 2.4848484848484848e-06,
"loss": 0.0201,
"step": 35850
},
{
"epoch": 17.53,
"learning_rate": 2.472629521016618e-06,
"loss": 0.0174,
"step": 35875
},
{
"epoch": 17.55,
"learning_rate": 2.460410557184751e-06,
"loss": 0.0223,
"step": 35900
},
{
"epoch": 17.56,
"learning_rate": 2.4481915933528837e-06,
"loss": 0.0249,
"step": 35925
},
{
"epoch": 17.57,
"learning_rate": 2.435972629521017e-06,
"loss": 0.0181,
"step": 35950
},
{
"epoch": 17.58,
"learning_rate": 2.4237536656891498e-06,
"loss": 0.0239,
"step": 35975
},
{
"epoch": 17.6,
"learning_rate": 2.4115347018572826e-06,
"loss": 0.0204,
"step": 36000
},
{
"epoch": 17.61,
"learning_rate": 2.399315738025416e-06,
"loss": 0.028,
"step": 36025
},
{
"epoch": 17.62,
"learning_rate": 2.3870967741935486e-06,
"loss": 0.0179,
"step": 36050
},
{
"epoch": 17.63,
"learning_rate": 2.3748778103616815e-06,
"loss": 0.0211,
"step": 36075
},
{
"epoch": 17.64,
"learning_rate": 2.3626588465298143e-06,
"loss": 0.016,
"step": 36100
},
{
"epoch": 17.66,
"learning_rate": 2.3504398826979475e-06,
"loss": 0.0202,
"step": 36125
},
{
"epoch": 17.67,
"learning_rate": 2.3382209188660804e-06,
"loss": 0.0211,
"step": 36150
},
{
"epoch": 17.68,
"learning_rate": 2.326001955034213e-06,
"loss": 0.0198,
"step": 36175
},
{
"epoch": 17.69,
"learning_rate": 2.3137829912023464e-06,
"loss": 0.0241,
"step": 36200
},
{
"epoch": 17.71,
"learning_rate": 2.3015640273704793e-06,
"loss": 0.0189,
"step": 36225
},
{
"epoch": 17.72,
"learning_rate": 2.289345063538612e-06,
"loss": 0.0179,
"step": 36250
},
{
"epoch": 17.73,
"learning_rate": 2.2771260997067453e-06,
"loss": 0.0252,
"step": 36275
},
{
"epoch": 17.74,
"learning_rate": 2.264907135874878e-06,
"loss": 0.019,
"step": 36300
},
{
"epoch": 17.75,
"learning_rate": 2.252688172043011e-06,
"loss": 0.019,
"step": 36325
},
{
"epoch": 17.77,
"learning_rate": 2.240469208211144e-06,
"loss": 0.0178,
"step": 36350
},
{
"epoch": 17.78,
"learning_rate": 2.228250244379277e-06,
"loss": 0.0174,
"step": 36375
},
{
"epoch": 17.79,
"learning_rate": 2.2160312805474095e-06,
"loss": 0.0172,
"step": 36400
},
{
"epoch": 17.8,
"learning_rate": 2.2038123167155427e-06,
"loss": 0.0157,
"step": 36425
},
{
"epoch": 17.82,
"learning_rate": 2.1915933528836755e-06,
"loss": 0.0212,
"step": 36450
},
{
"epoch": 17.83,
"learning_rate": 2.1793743890518083e-06,
"loss": 0.0204,
"step": 36475
},
{
"epoch": 17.84,
"learning_rate": 2.1671554252199416e-06,
"loss": 0.0279,
"step": 36500
},
{
"epoch": 17.85,
"learning_rate": 2.1549364613880744e-06,
"loss": 0.0197,
"step": 36525
},
{
"epoch": 17.86,
"learning_rate": 2.1427174975562072e-06,
"loss": 0.0188,
"step": 36550
},
{
"epoch": 17.88,
"learning_rate": 2.1304985337243405e-06,
"loss": 0.0241,
"step": 36575
},
{
"epoch": 17.89,
"learning_rate": 2.1182795698924733e-06,
"loss": 0.0207,
"step": 36600
},
{
"epoch": 17.9,
"learning_rate": 2.106060606060606e-06,
"loss": 0.0158,
"step": 36625
},
{
"epoch": 17.91,
"learning_rate": 2.093841642228739e-06,
"loss": 0.0214,
"step": 36650
},
{
"epoch": 17.93,
"learning_rate": 2.081622678396872e-06,
"loss": 0.0178,
"step": 36675
},
{
"epoch": 17.94,
"learning_rate": 2.069403714565005e-06,
"loss": 0.0242,
"step": 36700
},
{
"epoch": 17.95,
"learning_rate": 2.057184750733138e-06,
"loss": 0.0229,
"step": 36725
},
{
"epoch": 17.96,
"learning_rate": 2.044965786901271e-06,
"loss": 0.0168,
"step": 36750
},
{
"epoch": 17.97,
"learning_rate": 2.032746823069404e-06,
"loss": 0.0209,
"step": 36775
},
{
"epoch": 17.99,
"learning_rate": 2.0205278592375367e-06,
"loss": 0.021,
"step": 36800
},
{
"epoch": 18.0,
"learning_rate": 2.00830889540567e-06,
"loss": 0.0219,
"step": 36825
},
{
"epoch": 18.01,
"learning_rate": 1.996089931573803e-06,
"loss": 0.0174,
"step": 36850
},
{
"epoch": 18.02,
"learning_rate": 1.9838709677419356e-06,
"loss": 0.0169,
"step": 36875
},
{
"epoch": 18.04,
"learning_rate": 1.9716520039100685e-06,
"loss": 0.0171,
"step": 36900
},
{
"epoch": 18.05,
"learning_rate": 1.9594330400782017e-06,
"loss": 0.0203,
"step": 36925
},
{
"epoch": 18.06,
"learning_rate": 1.9472140762463345e-06,
"loss": 0.0165,
"step": 36950
},
{
"epoch": 18.07,
"learning_rate": 1.9349951124144674e-06,
"loss": 0.012,
"step": 36975
},
{
"epoch": 18.08,
"learning_rate": 1.9227761485826006e-06,
"loss": 0.0178,
"step": 37000
},
{
"epoch": 18.1,
"learning_rate": 1.9105571847507334e-06,
"loss": 0.0165,
"step": 37025
},
{
"epoch": 18.11,
"learning_rate": 1.898338220918866e-06,
"loss": 0.0157,
"step": 37050
},
{
"epoch": 18.12,
"learning_rate": 1.886119257086999e-06,
"loss": 0.0202,
"step": 37075
},
{
"epoch": 18.13,
"learning_rate": 1.873900293255132e-06,
"loss": 0.0149,
"step": 37100
},
{
"epoch": 18.15,
"learning_rate": 1.861681329423265e-06,
"loss": 0.0185,
"step": 37125
},
{
"epoch": 18.16,
"learning_rate": 1.849462365591398e-06,
"loss": 0.0226,
"step": 37150
},
{
"epoch": 18.17,
"learning_rate": 1.8372434017595308e-06,
"loss": 0.0152,
"step": 37175
},
{
"epoch": 18.18,
"learning_rate": 1.8250244379276638e-06,
"loss": 0.0126,
"step": 37200
},
{
"epoch": 18.19,
"learning_rate": 1.8128054740957969e-06,
"loss": 0.0211,
"step": 37225
},
{
"epoch": 18.21,
"learning_rate": 1.8005865102639297e-06,
"loss": 0.017,
"step": 37250
},
{
"epoch": 18.22,
"learning_rate": 1.7883675464320627e-06,
"loss": 0.0217,
"step": 37275
},
{
"epoch": 18.23,
"learning_rate": 1.7761485826001956e-06,
"loss": 0.0195,
"step": 37300
},
{
"epoch": 18.24,
"learning_rate": 1.7639296187683286e-06,
"loss": 0.015,
"step": 37325
},
{
"epoch": 18.26,
"learning_rate": 1.7517106549364616e-06,
"loss": 0.016,
"step": 37350
},
{
"epoch": 18.27,
"learning_rate": 1.7394916911045944e-06,
"loss": 0.019,
"step": 37375
},
{
"epoch": 18.28,
"learning_rate": 1.7272727272727275e-06,
"loss": 0.0206,
"step": 37400
},
{
"epoch": 18.29,
"learning_rate": 1.715542521994135e-06,
"loss": 0.0207,
"step": 37425
},
{
"epoch": 18.3,
"learning_rate": 1.7033235581622678e-06,
"loss": 0.0232,
"step": 37450
},
{
"epoch": 18.32,
"learning_rate": 1.6911045943304008e-06,
"loss": 0.0165,
"step": 37475
},
{
"epoch": 18.33,
"learning_rate": 1.6788856304985338e-06,
"loss": 0.0152,
"step": 37500
},
{
"epoch": 18.34,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.0144,
"step": 37525
},
{
"epoch": 18.35,
"learning_rate": 1.6544477028347997e-06,
"loss": 0.0149,
"step": 37550
},
{
"epoch": 18.37,
"learning_rate": 1.6422287390029325e-06,
"loss": 0.0158,
"step": 37575
},
{
"epoch": 18.38,
"learning_rate": 1.6300097751710656e-06,
"loss": 0.0144,
"step": 37600
},
{
"epoch": 18.39,
"learning_rate": 1.6177908113391986e-06,
"loss": 0.0224,
"step": 37625
},
{
"epoch": 18.4,
"learning_rate": 1.6055718475073314e-06,
"loss": 0.0198,
"step": 37650
},
{
"epoch": 18.41,
"learning_rate": 1.5933528836754645e-06,
"loss": 0.0116,
"step": 37675
},
{
"epoch": 18.43,
"learning_rate": 1.5811339198435973e-06,
"loss": 0.0169,
"step": 37700
},
{
"epoch": 18.44,
"learning_rate": 1.5689149560117303e-06,
"loss": 0.0183,
"step": 37725
},
{
"epoch": 18.45,
"learning_rate": 1.5566959921798633e-06,
"loss": 0.0183,
"step": 37750
},
{
"epoch": 18.46,
"learning_rate": 1.5444770283479962e-06,
"loss": 0.0264,
"step": 37775
},
{
"epoch": 18.48,
"learning_rate": 1.5322580645161292e-06,
"loss": 0.0174,
"step": 37800
},
{
"epoch": 18.49,
"learning_rate": 1.5200391006842622e-06,
"loss": 0.0193,
"step": 37825
},
{
"epoch": 18.5,
"learning_rate": 1.507820136852395e-06,
"loss": 0.0169,
"step": 37850
},
{
"epoch": 18.51,
"learning_rate": 1.495601173020528e-06,
"loss": 0.0188,
"step": 37875
},
{
"epoch": 18.52,
"learning_rate": 1.483382209188661e-06,
"loss": 0.0185,
"step": 37900
},
{
"epoch": 18.54,
"learning_rate": 1.471163245356794e-06,
"loss": 0.0167,
"step": 37925
},
{
"epoch": 18.55,
"learning_rate": 1.458944281524927e-06,
"loss": 0.0199,
"step": 37950
},
{
"epoch": 18.56,
"learning_rate": 1.4467253176930598e-06,
"loss": 0.02,
"step": 37975
},
{
"epoch": 18.57,
"learning_rate": 1.4345063538611929e-06,
"loss": 0.0184,
"step": 38000
},
{
"epoch": 18.59,
"learning_rate": 1.4222873900293257e-06,
"loss": 0.0182,
"step": 38025
},
{
"epoch": 18.6,
"learning_rate": 1.4100684261974587e-06,
"loss": 0.0142,
"step": 38050
},
{
"epoch": 18.61,
"learning_rate": 1.3978494623655913e-06,
"loss": 0.0195,
"step": 38075
},
{
"epoch": 18.62,
"learning_rate": 1.3856304985337244e-06,
"loss": 0.0112,
"step": 38100
},
{
"epoch": 18.63,
"learning_rate": 1.3734115347018572e-06,
"loss": 0.0183,
"step": 38125
},
{
"epoch": 18.65,
"learning_rate": 1.3611925708699902e-06,
"loss": 0.0187,
"step": 38150
},
{
"epoch": 18.66,
"learning_rate": 1.3489736070381233e-06,
"loss": 0.0149,
"step": 38175
},
{
"epoch": 18.67,
"learning_rate": 1.336754643206256e-06,
"loss": 0.0212,
"step": 38200
},
{
"epoch": 18.68,
"learning_rate": 1.3245356793743891e-06,
"loss": 0.0166,
"step": 38225
},
{
"epoch": 18.7,
"learning_rate": 1.3123167155425222e-06,
"loss": 0.0141,
"step": 38250
},
{
"epoch": 18.71,
"learning_rate": 1.300097751710655e-06,
"loss": 0.0147,
"step": 38275
},
{
"epoch": 18.72,
"learning_rate": 1.287878787878788e-06,
"loss": 0.0214,
"step": 38300
},
{
"epoch": 18.73,
"learning_rate": 1.2756598240469208e-06,
"loss": 0.0193,
"step": 38325
},
{
"epoch": 18.74,
"learning_rate": 1.2634408602150539e-06,
"loss": 0.0189,
"step": 38350
},
{
"epoch": 18.76,
"learning_rate": 1.251221896383187e-06,
"loss": 0.0193,
"step": 38375
},
{
"epoch": 18.77,
"learning_rate": 1.2390029325513197e-06,
"loss": 0.0208,
"step": 38400
},
{
"epoch": 18.78,
"learning_rate": 1.2267839687194528e-06,
"loss": 0.0179,
"step": 38425
},
{
"epoch": 18.79,
"learning_rate": 1.2145650048875856e-06,
"loss": 0.0149,
"step": 38450
},
{
"epoch": 18.8,
"learning_rate": 1.2023460410557186e-06,
"loss": 0.0181,
"step": 38475
},
{
"epoch": 18.82,
"learning_rate": 1.1901270772238517e-06,
"loss": 0.0233,
"step": 38500
},
{
"epoch": 18.83,
"learning_rate": 1.1779081133919845e-06,
"loss": 0.0138,
"step": 38525
},
{
"epoch": 18.84,
"learning_rate": 1.1656891495601173e-06,
"loss": 0.0209,
"step": 38550
},
{
"epoch": 18.85,
"learning_rate": 1.1534701857282503e-06,
"loss": 0.0222,
"step": 38575
},
{
"epoch": 18.87,
"learning_rate": 1.1412512218963832e-06,
"loss": 0.0217,
"step": 38600
},
{
"epoch": 18.88,
"learning_rate": 1.1290322580645162e-06,
"loss": 0.0217,
"step": 38625
},
{
"epoch": 18.89,
"learning_rate": 1.1168132942326492e-06,
"loss": 0.0175,
"step": 38650
},
{
"epoch": 18.9,
"learning_rate": 1.104594330400782e-06,
"loss": 0.0215,
"step": 38675
},
{
"epoch": 18.91,
"learning_rate": 1.092375366568915e-06,
"loss": 0.0189,
"step": 38700
},
{
"epoch": 18.93,
"learning_rate": 1.080156402737048e-06,
"loss": 0.0179,
"step": 38725
},
{
"epoch": 18.94,
"learning_rate": 1.067937438905181e-06,
"loss": 0.0128,
"step": 38750
},
{
"epoch": 18.95,
"learning_rate": 1.055718475073314e-06,
"loss": 0.0214,
"step": 38775
},
{
"epoch": 18.96,
"learning_rate": 1.0434995112414468e-06,
"loss": 0.0192,
"step": 38800
},
{
"epoch": 18.98,
"learning_rate": 1.0312805474095798e-06,
"loss": 0.016,
"step": 38825
},
{
"epoch": 18.99,
"learning_rate": 1.0190615835777127e-06,
"loss": 0.0184,
"step": 38850
},
{
"epoch": 19.0,
"learning_rate": 1.0068426197458455e-06,
"loss": 0.0146,
"step": 38875
},
{
"epoch": 19.01,
"learning_rate": 9.946236559139785e-07,
"loss": 0.0122,
"step": 38900
},
{
"epoch": 19.02,
"learning_rate": 9.824046920821116e-07,
"loss": 0.0165,
"step": 38925
},
{
"epoch": 19.04,
"learning_rate": 9.701857282502444e-07,
"loss": 0.0165,
"step": 38950
},
{
"epoch": 19.05,
"learning_rate": 9.579667644183774e-07,
"loss": 0.0188,
"step": 38975
},
{
"epoch": 19.06,
"learning_rate": 9.457478005865104e-07,
"loss": 0.0208,
"step": 39000
},
{
"epoch": 19.07,
"learning_rate": 9.335288367546433e-07,
"loss": 0.0188,
"step": 39025
},
{
"epoch": 19.09,
"learning_rate": 9.213098729227762e-07,
"loss": 0.0133,
"step": 39050
},
{
"epoch": 19.1,
"learning_rate": 9.090909090909091e-07,
"loss": 0.0088,
"step": 39075
},
{
"epoch": 19.11,
"learning_rate": 8.968719452590422e-07,
"loss": 0.0206,
"step": 39100
},
{
"epoch": 19.12,
"learning_rate": 8.846529814271751e-07,
"loss": 0.0149,
"step": 39125
},
{
"epoch": 19.13,
"learning_rate": 8.72434017595308e-07,
"loss": 0.0158,
"step": 39150
},
{
"epoch": 19.15,
"learning_rate": 8.60215053763441e-07,
"loss": 0.0162,
"step": 39175
},
{
"epoch": 19.16,
"learning_rate": 8.479960899315738e-07,
"loss": 0.0144,
"step": 39200
},
{
"epoch": 19.17,
"learning_rate": 8.357771260997067e-07,
"loss": 0.0171,
"step": 39225
},
{
"epoch": 19.18,
"learning_rate": 8.235581622678398e-07,
"loss": 0.0192,
"step": 39250
},
{
"epoch": 19.2,
"learning_rate": 8.113391984359727e-07,
"loss": 0.0156,
"step": 39275
},
{
"epoch": 19.21,
"learning_rate": 7.991202346041056e-07,
"loss": 0.017,
"step": 39300
},
{
"epoch": 19.22,
"learning_rate": 7.869012707722385e-07,
"loss": 0.0192,
"step": 39325
},
{
"epoch": 19.23,
"learning_rate": 7.746823069403715e-07,
"loss": 0.021,
"step": 39350
},
{
"epoch": 19.24,
"learning_rate": 7.624633431085045e-07,
"loss": 0.0211,
"step": 39375
},
{
"epoch": 19.26,
"learning_rate": 7.502443792766374e-07,
"loss": 0.0158,
"step": 39400
},
{
"epoch": 19.27,
"learning_rate": 7.38514173998045e-07,
"loss": 0.0166,
"step": 39425
},
{
"epoch": 19.28,
"learning_rate": 7.262952101661779e-07,
"loss": 0.0116,
"step": 39450
},
{
"epoch": 19.29,
"learning_rate": 7.14076246334311e-07,
"loss": 0.0179,
"step": 39475
},
{
"epoch": 19.31,
"learning_rate": 7.018572825024439e-07,
"loss": 0.0195,
"step": 39500
},
{
"epoch": 19.32,
"learning_rate": 6.896383186705768e-07,
"loss": 0.0198,
"step": 39525
},
{
"epoch": 19.33,
"learning_rate": 6.774193548387098e-07,
"loss": 0.0191,
"step": 39550
},
{
"epoch": 19.34,
"learning_rate": 6.652003910068426e-07,
"loss": 0.0181,
"step": 39575
},
{
"epoch": 19.35,
"learning_rate": 6.529814271749755e-07,
"loss": 0.0167,
"step": 39600
},
{
"epoch": 19.37,
"learning_rate": 6.407624633431086e-07,
"loss": 0.0167,
"step": 39625
},
{
"epoch": 19.38,
"learning_rate": 6.285434995112415e-07,
"loss": 0.0111,
"step": 39650
},
{
"epoch": 19.39,
"learning_rate": 6.163245356793744e-07,
"loss": 0.0152,
"step": 39675
},
{
"epoch": 19.4,
"learning_rate": 6.041055718475073e-07,
"loss": 0.0189,
"step": 39700
},
{
"epoch": 19.42,
"learning_rate": 5.92375366568915e-07,
"loss": 0.0158,
"step": 39725
},
{
"epoch": 19.43,
"learning_rate": 5.801564027370479e-07,
"loss": 0.0207,
"step": 39750
},
{
"epoch": 19.44,
"learning_rate": 5.679374389051809e-07,
"loss": 0.0189,
"step": 39775
},
{
"epoch": 19.45,
"learning_rate": 5.557184750733138e-07,
"loss": 0.0172,
"step": 39800
},
{
"epoch": 19.46,
"learning_rate": 5.434995112414468e-07,
"loss": 0.0173,
"step": 39825
},
{
"epoch": 19.48,
"learning_rate": 5.312805474095798e-07,
"loss": 0.0135,
"step": 39850
},
{
"epoch": 19.49,
"learning_rate": 5.190615835777126e-07,
"loss": 0.0126,
"step": 39875
},
{
"epoch": 19.5,
"learning_rate": 5.068426197458456e-07,
"loss": 0.0183,
"step": 39900
},
{
"epoch": 19.51,
"learning_rate": 4.946236559139786e-07,
"loss": 0.0121,
"step": 39925
},
{
"epoch": 19.53,
"learning_rate": 4.824046920821115e-07,
"loss": 0.0164,
"step": 39950
},
{
"epoch": 19.54,
"learning_rate": 4.701857282502444e-07,
"loss": 0.0227,
"step": 39975
},
{
"epoch": 19.55,
"learning_rate": 4.579667644183774e-07,
"loss": 0.0172,
"step": 40000
},
{
"epoch": 19.56,
"learning_rate": 4.4574780058651033e-07,
"loss": 0.0156,
"step": 40025
},
{
"epoch": 19.57,
"learning_rate": 4.335288367546432e-07,
"loss": 0.0212,
"step": 40050
},
{
"epoch": 19.59,
"learning_rate": 4.213098729227762e-07,
"loss": 0.0138,
"step": 40075
},
{
"epoch": 19.6,
"learning_rate": 4.090909090909091e-07,
"loss": 0.0179,
"step": 40100
},
{
"epoch": 19.61,
"learning_rate": 3.968719452590421e-07,
"loss": 0.0121,
"step": 40125
},
{
"epoch": 19.62,
"learning_rate": 3.8465298142717503e-07,
"loss": 0.0175,
"step": 40150
},
{
"epoch": 19.64,
"learning_rate": 3.7243401759530796e-07,
"loss": 0.0194,
"step": 40175
},
{
"epoch": 19.65,
"learning_rate": 3.602150537634409e-07,
"loss": 0.0119,
"step": 40200
},
{
"epoch": 19.66,
"learning_rate": 3.479960899315738e-07,
"loss": 0.0121,
"step": 40225
},
{
"epoch": 19.67,
"learning_rate": 3.3577712609970675e-07,
"loss": 0.0207,
"step": 40250
},
{
"epoch": 19.68,
"learning_rate": 3.2355816226783973e-07,
"loss": 0.0118,
"step": 40275
},
{
"epoch": 19.7,
"learning_rate": 3.1133919843597266e-07,
"loss": 0.0142,
"step": 40300
},
{
"epoch": 19.71,
"learning_rate": 2.991202346041056e-07,
"loss": 0.0228,
"step": 40325
},
{
"epoch": 19.72,
"learning_rate": 2.869012707722385e-07,
"loss": 0.0185,
"step": 40350
},
{
"epoch": 19.73,
"learning_rate": 2.746823069403715e-07,
"loss": 0.017,
"step": 40375
},
{
"epoch": 19.75,
"learning_rate": 2.6246334310850443e-07,
"loss": 0.0229,
"step": 40400
},
{
"epoch": 19.76,
"learning_rate": 2.5024437927663736e-07,
"loss": 0.0141,
"step": 40425
},
{
"epoch": 19.77,
"learning_rate": 2.3802541544477032e-07,
"loss": 0.0139,
"step": 40450
},
{
"epoch": 19.78,
"learning_rate": 2.2580645161290322e-07,
"loss": 0.0188,
"step": 40475
},
{
"epoch": 19.79,
"learning_rate": 2.1358748778103618e-07,
"loss": 0.0168,
"step": 40500
},
{
"epoch": 19.81,
"learning_rate": 2.0136852394916913e-07,
"loss": 0.0158,
"step": 40525
},
{
"epoch": 19.82,
"learning_rate": 1.8914956011730206e-07,
"loss": 0.0168,
"step": 40550
},
{
"epoch": 19.83,
"learning_rate": 1.7693059628543502e-07,
"loss": 0.0183,
"step": 40575
},
{
"epoch": 19.84,
"learning_rate": 1.6471163245356795e-07,
"loss": 0.019,
"step": 40600
},
{
"epoch": 19.86,
"learning_rate": 1.524926686217009e-07,
"loss": 0.0162,
"step": 40625
},
{
"epoch": 19.87,
"learning_rate": 1.4027370478983383e-07,
"loss": 0.0227,
"step": 40650
},
{
"epoch": 19.88,
"learning_rate": 1.280547409579668e-07,
"loss": 0.0169,
"step": 40675
},
{
"epoch": 19.89,
"learning_rate": 1.1583577712609972e-07,
"loss": 0.0106,
"step": 40700
},
{
"epoch": 19.9,
"learning_rate": 1.0361681329423266e-07,
"loss": 0.0175,
"step": 40725
},
{
"epoch": 19.92,
"learning_rate": 9.13978494623656e-08,
"loss": 0.0197,
"step": 40750
},
{
"epoch": 19.93,
"learning_rate": 7.917888563049853e-08,
"loss": 0.0176,
"step": 40775
},
{
"epoch": 19.94,
"learning_rate": 6.695992179863148e-08,
"loss": 0.0144,
"step": 40800
},
{
"epoch": 19.95,
"learning_rate": 5.4740957966764426e-08,
"loss": 0.0128,
"step": 40825
},
{
"epoch": 19.97,
"learning_rate": 4.252199413489737e-08,
"loss": 0.0136,
"step": 40850
},
{
"epoch": 19.98,
"learning_rate": 3.0303030303030305e-08,
"loss": 0.011,
"step": 40875
},
{
"epoch": 19.99,
"learning_rate": 1.8084066471163248e-08,
"loss": 0.0185,
"step": 40900
},
{
"epoch": 20.0,
"step": 40920,
"total_flos": 5.166258268431053e+17,
"train_loss": 0.12400934287872191,
"train_runtime": 16044.2941,
"train_samples_per_second": 489.522,
"train_steps_per_second": 2.55
}
],
"logging_steps": 25,
"max_steps": 40920,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 5.166258268431053e+17,
"trial_name": null,
"trial_params": null
}