bloom-1b7-evalita / trainer_state.json
basilepp19's picture
Upload 8 files
cb12c89
raw
history blame
313 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"global_step": 25688,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9992214263469324e-05,
"loss": 3.6476,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 1.9984428526938652e-05,
"loss": 0.3029,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 1.9976642790407974e-05,
"loss": 0.4554,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 1.9968857053877296e-05,
"loss": 0.3546,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 1.9961071317346625e-05,
"loss": 0.4369,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 1.9953285580815947e-05,
"loss": 0.4187,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 1.9945499844285272e-05,
"loss": 0.3758,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 1.9937714107754594e-05,
"loss": 0.442,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 1.992992837122392e-05,
"loss": 0.3408,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 1.9922142634693244e-05,
"loss": 0.2933,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.9914356898162566e-05,
"loss": 0.5627,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 1.990657116163189e-05,
"loss": 0.406,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 1.9898785425101217e-05,
"loss": 0.3029,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 1.989099968857054e-05,
"loss": 0.3583,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 1.9883213952039864e-05,
"loss": 0.4022,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 1.987542821550919e-05,
"loss": 0.3508,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 1.986764247897851e-05,
"loss": 0.3816,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 1.9859856742447837e-05,
"loss": 0.422,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 1.9852071005917162e-05,
"loss": 0.4494,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 1.9844285269386484e-05,
"loss": 0.3588,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 1.983649953285581e-05,
"loss": 0.4859,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 1.9828713796325135e-05,
"loss": 0.4338,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 1.9820928059794456e-05,
"loss": 0.4002,
"step": 230
},
{
"epoch": 0.02,
"learning_rate": 1.9813142323263782e-05,
"loss": 0.4142,
"step": 240
},
{
"epoch": 0.02,
"learning_rate": 1.9805356586733107e-05,
"loss": 0.369,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 1.979757085020243e-05,
"loss": 0.2912,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 1.9789785113671754e-05,
"loss": 0.4347,
"step": 270
},
{
"epoch": 0.02,
"learning_rate": 1.978199937714108e-05,
"loss": 0.3762,
"step": 280
},
{
"epoch": 0.02,
"learning_rate": 1.97742136406104e-05,
"loss": 0.2991,
"step": 290
},
{
"epoch": 0.02,
"learning_rate": 1.9766427904079727e-05,
"loss": 0.355,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 1.9758642167549052e-05,
"loss": 0.3871,
"step": 310
},
{
"epoch": 0.02,
"learning_rate": 1.9750856431018377e-05,
"loss": 0.3469,
"step": 320
},
{
"epoch": 0.03,
"learning_rate": 1.97430706944877e-05,
"loss": 0.3732,
"step": 330
},
{
"epoch": 0.03,
"learning_rate": 1.9735284957957025e-05,
"loss": 0.3906,
"step": 340
},
{
"epoch": 0.03,
"learning_rate": 1.972749922142635e-05,
"loss": 0.3773,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 1.9719713484895672e-05,
"loss": 0.4073,
"step": 360
},
{
"epoch": 0.03,
"learning_rate": 1.9711927748364997e-05,
"loss": 0.3961,
"step": 370
},
{
"epoch": 0.03,
"learning_rate": 1.9704142011834322e-05,
"loss": 0.4076,
"step": 380
},
{
"epoch": 0.03,
"learning_rate": 1.9696356275303644e-05,
"loss": 0.4342,
"step": 390
},
{
"epoch": 0.03,
"learning_rate": 1.968857053877297e-05,
"loss": 0.4247,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 1.9680784802242295e-05,
"loss": 0.4857,
"step": 410
},
{
"epoch": 0.03,
"learning_rate": 1.9672999065711617e-05,
"loss": 0.3507,
"step": 420
},
{
"epoch": 0.03,
"learning_rate": 1.9665213329180942e-05,
"loss": 0.447,
"step": 430
},
{
"epoch": 0.03,
"learning_rate": 1.9657427592650267e-05,
"loss": 0.4799,
"step": 440
},
{
"epoch": 0.04,
"learning_rate": 1.964964185611959e-05,
"loss": 0.4324,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 1.9641856119588915e-05,
"loss": 0.3724,
"step": 460
},
{
"epoch": 0.04,
"learning_rate": 1.963407038305824e-05,
"loss": 0.3797,
"step": 470
},
{
"epoch": 0.04,
"learning_rate": 1.9626284646527562e-05,
"loss": 0.342,
"step": 480
},
{
"epoch": 0.04,
"learning_rate": 1.9618498909996887e-05,
"loss": 0.3696,
"step": 490
},
{
"epoch": 0.04,
"learning_rate": 1.9610713173466212e-05,
"loss": 0.3531,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 1.9602927436935534e-05,
"loss": 0.3707,
"step": 510
},
{
"epoch": 0.04,
"learning_rate": 1.959514170040486e-05,
"loss": 0.3054,
"step": 520
},
{
"epoch": 0.04,
"learning_rate": 1.9587355963874185e-05,
"loss": 0.333,
"step": 530
},
{
"epoch": 0.04,
"learning_rate": 1.9579570227343507e-05,
"loss": 0.4072,
"step": 540
},
{
"epoch": 0.04,
"learning_rate": 1.9571784490812832e-05,
"loss": 0.3363,
"step": 550
},
{
"epoch": 0.04,
"learning_rate": 1.9563998754282157e-05,
"loss": 0.3723,
"step": 560
},
{
"epoch": 0.04,
"learning_rate": 1.9556213017751483e-05,
"loss": 0.4651,
"step": 570
},
{
"epoch": 0.05,
"learning_rate": 1.9548427281220805e-05,
"loss": 0.3766,
"step": 580
},
{
"epoch": 0.05,
"learning_rate": 1.954064154469013e-05,
"loss": 0.3247,
"step": 590
},
{
"epoch": 0.05,
"learning_rate": 1.9532855808159455e-05,
"loss": 0.4209,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 1.9525070071628777e-05,
"loss": 0.4038,
"step": 610
},
{
"epoch": 0.05,
"learning_rate": 1.9517284335098102e-05,
"loss": 0.305,
"step": 620
},
{
"epoch": 0.05,
"learning_rate": 1.9509498598567428e-05,
"loss": 0.4737,
"step": 630
},
{
"epoch": 0.05,
"learning_rate": 1.950171286203675e-05,
"loss": 0.3816,
"step": 640
},
{
"epoch": 0.05,
"learning_rate": 1.9493927125506075e-05,
"loss": 0.3406,
"step": 650
},
{
"epoch": 0.05,
"learning_rate": 1.94861413889754e-05,
"loss": 0.4612,
"step": 660
},
{
"epoch": 0.05,
"learning_rate": 1.9478355652444722e-05,
"loss": 0.396,
"step": 670
},
{
"epoch": 0.05,
"learning_rate": 1.9470569915914047e-05,
"loss": 0.2885,
"step": 680
},
{
"epoch": 0.05,
"learning_rate": 1.9462784179383373e-05,
"loss": 0.372,
"step": 690
},
{
"epoch": 0.05,
"learning_rate": 1.9454998442852695e-05,
"loss": 0.3646,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 1.944721270632202e-05,
"loss": 0.3517,
"step": 710
},
{
"epoch": 0.06,
"learning_rate": 1.9439426969791345e-05,
"loss": 0.3244,
"step": 720
},
{
"epoch": 0.06,
"learning_rate": 1.9431641233260667e-05,
"loss": 0.3107,
"step": 730
},
{
"epoch": 0.06,
"learning_rate": 1.9423855496729992e-05,
"loss": 0.4312,
"step": 740
},
{
"epoch": 0.06,
"learning_rate": 1.9416069760199318e-05,
"loss": 0.3501,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 1.940828402366864e-05,
"loss": 0.3394,
"step": 760
},
{
"epoch": 0.06,
"learning_rate": 1.9400498287137965e-05,
"loss": 0.2721,
"step": 770
},
{
"epoch": 0.06,
"learning_rate": 1.939271255060729e-05,
"loss": 0.3824,
"step": 780
},
{
"epoch": 0.06,
"learning_rate": 1.9384926814076612e-05,
"loss": 0.3357,
"step": 790
},
{
"epoch": 0.06,
"learning_rate": 1.9377141077545937e-05,
"loss": 0.5051,
"step": 800
},
{
"epoch": 0.06,
"learning_rate": 1.9369355341015263e-05,
"loss": 0.3467,
"step": 810
},
{
"epoch": 0.06,
"learning_rate": 1.9361569604484588e-05,
"loss": 0.3079,
"step": 820
},
{
"epoch": 0.06,
"learning_rate": 1.935378386795391e-05,
"loss": 0.3362,
"step": 830
},
{
"epoch": 0.07,
"learning_rate": 1.9345998131423235e-05,
"loss": 0.3535,
"step": 840
},
{
"epoch": 0.07,
"learning_rate": 1.933821239489256e-05,
"loss": 0.3338,
"step": 850
},
{
"epoch": 0.07,
"learning_rate": 1.9330426658361883e-05,
"loss": 0.3705,
"step": 860
},
{
"epoch": 0.07,
"learning_rate": 1.9322640921831204e-05,
"loss": 0.3507,
"step": 870
},
{
"epoch": 0.07,
"learning_rate": 1.9314855185300533e-05,
"loss": 0.2796,
"step": 880
},
{
"epoch": 0.07,
"learning_rate": 1.9307069448769855e-05,
"loss": 0.3828,
"step": 890
},
{
"epoch": 0.07,
"learning_rate": 1.9299283712239177e-05,
"loss": 0.3091,
"step": 900
},
{
"epoch": 0.07,
"learning_rate": 1.9291497975708506e-05,
"loss": 0.294,
"step": 910
},
{
"epoch": 0.07,
"learning_rate": 1.9283712239177828e-05,
"loss": 0.3353,
"step": 920
},
{
"epoch": 0.07,
"learning_rate": 1.927592650264715e-05,
"loss": 0.4199,
"step": 930
},
{
"epoch": 0.07,
"learning_rate": 1.9268140766116478e-05,
"loss": 0.4175,
"step": 940
},
{
"epoch": 0.07,
"learning_rate": 1.92603550295858e-05,
"loss": 0.396,
"step": 950
},
{
"epoch": 0.07,
"learning_rate": 1.9252569293055125e-05,
"loss": 0.3726,
"step": 960
},
{
"epoch": 0.08,
"learning_rate": 1.924478355652445e-05,
"loss": 0.4031,
"step": 970
},
{
"epoch": 0.08,
"learning_rate": 1.9236997819993773e-05,
"loss": 0.3575,
"step": 980
},
{
"epoch": 0.08,
"learning_rate": 1.9229212083463098e-05,
"loss": 0.3118,
"step": 990
},
{
"epoch": 0.08,
"learning_rate": 1.922142634693242e-05,
"loss": 0.272,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 1.9213640610401745e-05,
"loss": 0.3698,
"step": 1010
},
{
"epoch": 0.08,
"learning_rate": 1.920585487387107e-05,
"loss": 0.3232,
"step": 1020
},
{
"epoch": 0.08,
"learning_rate": 1.9198069137340392e-05,
"loss": 0.4195,
"step": 1030
},
{
"epoch": 0.08,
"learning_rate": 1.9190283400809718e-05,
"loss": 0.377,
"step": 1040
},
{
"epoch": 0.08,
"learning_rate": 1.9182497664279043e-05,
"loss": 0.371,
"step": 1050
},
{
"epoch": 0.08,
"learning_rate": 1.9174711927748365e-05,
"loss": 0.3674,
"step": 1060
},
{
"epoch": 0.08,
"learning_rate": 1.9166926191217693e-05,
"loss": 0.3503,
"step": 1070
},
{
"epoch": 0.08,
"learning_rate": 1.9159140454687015e-05,
"loss": 0.3805,
"step": 1080
},
{
"epoch": 0.08,
"learning_rate": 1.9151354718156337e-05,
"loss": 0.3474,
"step": 1090
},
{
"epoch": 0.09,
"learning_rate": 1.9143568981625663e-05,
"loss": 0.3868,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 1.9135783245094988e-05,
"loss": 0.3652,
"step": 1110
},
{
"epoch": 0.09,
"learning_rate": 1.912799750856431e-05,
"loss": 0.3521,
"step": 1120
},
{
"epoch": 0.09,
"learning_rate": 1.9120211772033635e-05,
"loss": 0.3004,
"step": 1130
},
{
"epoch": 0.09,
"learning_rate": 1.911242603550296e-05,
"loss": 0.3605,
"step": 1140
},
{
"epoch": 0.09,
"learning_rate": 1.9104640298972282e-05,
"loss": 0.2845,
"step": 1150
},
{
"epoch": 0.09,
"learning_rate": 1.9096854562441608e-05,
"loss": 0.3038,
"step": 1160
},
{
"epoch": 0.09,
"learning_rate": 1.9089068825910933e-05,
"loss": 0.3712,
"step": 1170
},
{
"epoch": 0.09,
"learning_rate": 1.9081283089380255e-05,
"loss": 0.303,
"step": 1180
},
{
"epoch": 0.09,
"learning_rate": 1.907349735284958e-05,
"loss": 0.2412,
"step": 1190
},
{
"epoch": 0.09,
"learning_rate": 1.9065711616318905e-05,
"loss": 0.3342,
"step": 1200
},
{
"epoch": 0.09,
"learning_rate": 1.905792587978823e-05,
"loss": 0.2995,
"step": 1210
},
{
"epoch": 0.09,
"learning_rate": 1.9050140143257553e-05,
"loss": 0.4164,
"step": 1220
},
{
"epoch": 0.1,
"learning_rate": 1.9042354406726878e-05,
"loss": 0.3316,
"step": 1230
},
{
"epoch": 0.1,
"learning_rate": 1.9034568670196203e-05,
"loss": 0.3575,
"step": 1240
},
{
"epoch": 0.1,
"learning_rate": 1.9026782933665525e-05,
"loss": 0.3399,
"step": 1250
},
{
"epoch": 0.1,
"learning_rate": 1.901899719713485e-05,
"loss": 0.3008,
"step": 1260
},
{
"epoch": 0.1,
"learning_rate": 1.9011211460604176e-05,
"loss": 0.3817,
"step": 1270
},
{
"epoch": 0.1,
"learning_rate": 1.9003425724073498e-05,
"loss": 0.2931,
"step": 1280
},
{
"epoch": 0.1,
"learning_rate": 1.8995639987542823e-05,
"loss": 0.3154,
"step": 1290
},
{
"epoch": 0.1,
"learning_rate": 1.8987854251012148e-05,
"loss": 0.3168,
"step": 1300
},
{
"epoch": 0.1,
"learning_rate": 1.898006851448147e-05,
"loss": 0.2592,
"step": 1310
},
{
"epoch": 0.1,
"learning_rate": 1.8972282777950795e-05,
"loss": 0.3804,
"step": 1320
},
{
"epoch": 0.1,
"learning_rate": 1.896449704142012e-05,
"loss": 0.3438,
"step": 1330
},
{
"epoch": 0.1,
"learning_rate": 1.8956711304889443e-05,
"loss": 0.3112,
"step": 1340
},
{
"epoch": 0.11,
"learning_rate": 1.8948925568358768e-05,
"loss": 0.3406,
"step": 1350
},
{
"epoch": 0.11,
"learning_rate": 1.8941139831828093e-05,
"loss": 0.4156,
"step": 1360
},
{
"epoch": 0.11,
"learning_rate": 1.8933354095297415e-05,
"loss": 0.3699,
"step": 1370
},
{
"epoch": 0.11,
"learning_rate": 1.892556835876674e-05,
"loss": 0.3685,
"step": 1380
},
{
"epoch": 0.11,
"learning_rate": 1.8917782622236066e-05,
"loss": 0.3602,
"step": 1390
},
{
"epoch": 0.11,
"learning_rate": 1.8909996885705388e-05,
"loss": 0.3602,
"step": 1400
},
{
"epoch": 0.11,
"learning_rate": 1.8902211149174713e-05,
"loss": 0.399,
"step": 1410
},
{
"epoch": 0.11,
"learning_rate": 1.8894425412644038e-05,
"loss": 0.3202,
"step": 1420
},
{
"epoch": 0.11,
"learning_rate": 1.888663967611336e-05,
"loss": 0.4622,
"step": 1430
},
{
"epoch": 0.11,
"learning_rate": 1.8878853939582685e-05,
"loss": 0.3126,
"step": 1440
},
{
"epoch": 0.11,
"learning_rate": 1.887106820305201e-05,
"loss": 0.3729,
"step": 1450
},
{
"epoch": 0.11,
"learning_rate": 1.8863282466521336e-05,
"loss": 0.345,
"step": 1460
},
{
"epoch": 0.11,
"learning_rate": 1.8855496729990658e-05,
"loss": 0.3146,
"step": 1470
},
{
"epoch": 0.12,
"learning_rate": 1.8847710993459983e-05,
"loss": 0.3615,
"step": 1480
},
{
"epoch": 0.12,
"learning_rate": 1.883992525692931e-05,
"loss": 0.3023,
"step": 1490
},
{
"epoch": 0.12,
"learning_rate": 1.883213952039863e-05,
"loss": 0.3833,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 1.8824353783867956e-05,
"loss": 0.3573,
"step": 1510
},
{
"epoch": 0.12,
"learning_rate": 1.881656804733728e-05,
"loss": 0.2883,
"step": 1520
},
{
"epoch": 0.12,
"learning_rate": 1.8808782310806603e-05,
"loss": 0.4392,
"step": 1530
},
{
"epoch": 0.12,
"learning_rate": 1.880099657427593e-05,
"loss": 0.3942,
"step": 1540
},
{
"epoch": 0.12,
"learning_rate": 1.8793210837745254e-05,
"loss": 0.3247,
"step": 1550
},
{
"epoch": 0.12,
"learning_rate": 1.8785425101214576e-05,
"loss": 0.378,
"step": 1560
},
{
"epoch": 0.12,
"learning_rate": 1.87776393646839e-05,
"loss": 0.3076,
"step": 1570
},
{
"epoch": 0.12,
"learning_rate": 1.8769853628153226e-05,
"loss": 0.3093,
"step": 1580
},
{
"epoch": 0.12,
"learning_rate": 1.8762067891622548e-05,
"loss": 0.2823,
"step": 1590
},
{
"epoch": 0.12,
"learning_rate": 1.8754282155091873e-05,
"loss": 0.2736,
"step": 1600
},
{
"epoch": 0.13,
"learning_rate": 1.87464964185612e-05,
"loss": 0.2913,
"step": 1610
},
{
"epoch": 0.13,
"learning_rate": 1.873871068203052e-05,
"loss": 0.3343,
"step": 1620
},
{
"epoch": 0.13,
"learning_rate": 1.8730924945499846e-05,
"loss": 0.3567,
"step": 1630
},
{
"epoch": 0.13,
"learning_rate": 1.872313920896917e-05,
"loss": 0.3122,
"step": 1640
},
{
"epoch": 0.13,
"learning_rate": 1.8715353472438493e-05,
"loss": 0.2749,
"step": 1650
},
{
"epoch": 0.13,
"learning_rate": 1.870756773590782e-05,
"loss": 0.3274,
"step": 1660
},
{
"epoch": 0.13,
"learning_rate": 1.8699781999377144e-05,
"loss": 0.4423,
"step": 1670
},
{
"epoch": 0.13,
"learning_rate": 1.8691996262846466e-05,
"loss": 0.2746,
"step": 1680
},
{
"epoch": 0.13,
"learning_rate": 1.868421052631579e-05,
"loss": 0.3538,
"step": 1690
},
{
"epoch": 0.13,
"learning_rate": 1.8676424789785116e-05,
"loss": 0.3878,
"step": 1700
},
{
"epoch": 0.13,
"learning_rate": 1.866863905325444e-05,
"loss": 0.3795,
"step": 1710
},
{
"epoch": 0.13,
"learning_rate": 1.8660853316723763e-05,
"loss": 0.3302,
"step": 1720
},
{
"epoch": 0.13,
"learning_rate": 1.865306758019309e-05,
"loss": 0.3231,
"step": 1730
},
{
"epoch": 0.14,
"learning_rate": 1.8645281843662414e-05,
"loss": 0.3356,
"step": 1740
},
{
"epoch": 0.14,
"learning_rate": 1.8637496107131736e-05,
"loss": 0.3151,
"step": 1750
},
{
"epoch": 0.14,
"learning_rate": 1.862971037060106e-05,
"loss": 0.3759,
"step": 1760
},
{
"epoch": 0.14,
"learning_rate": 1.8621924634070386e-05,
"loss": 0.3534,
"step": 1770
},
{
"epoch": 0.14,
"learning_rate": 1.861413889753971e-05,
"loss": 0.303,
"step": 1780
},
{
"epoch": 0.14,
"learning_rate": 1.860635316100903e-05,
"loss": 0.2945,
"step": 1790
},
{
"epoch": 0.14,
"learning_rate": 1.859856742447836e-05,
"loss": 0.349,
"step": 1800
},
{
"epoch": 0.14,
"learning_rate": 1.859078168794768e-05,
"loss": 0.3398,
"step": 1810
},
{
"epoch": 0.14,
"learning_rate": 1.8582995951417006e-05,
"loss": 0.2744,
"step": 1820
},
{
"epoch": 0.14,
"learning_rate": 1.857521021488633e-05,
"loss": 0.3276,
"step": 1830
},
{
"epoch": 0.14,
"learning_rate": 1.8567424478355653e-05,
"loss": 0.2523,
"step": 1840
},
{
"epoch": 0.14,
"learning_rate": 1.855963874182498e-05,
"loss": 0.3286,
"step": 1850
},
{
"epoch": 0.14,
"learning_rate": 1.8551853005294304e-05,
"loss": 0.3119,
"step": 1860
},
{
"epoch": 0.15,
"learning_rate": 1.8544067268763626e-05,
"loss": 0.3293,
"step": 1870
},
{
"epoch": 0.15,
"learning_rate": 1.853628153223295e-05,
"loss": 0.3766,
"step": 1880
},
{
"epoch": 0.15,
"learning_rate": 1.8528495795702277e-05,
"loss": 0.2765,
"step": 1890
},
{
"epoch": 0.15,
"learning_rate": 1.85207100591716e-05,
"loss": 0.4001,
"step": 1900
},
{
"epoch": 0.15,
"learning_rate": 1.8512924322640924e-05,
"loss": 0.3359,
"step": 1910
},
{
"epoch": 0.15,
"learning_rate": 1.8505138586110246e-05,
"loss": 0.3543,
"step": 1920
},
{
"epoch": 0.15,
"learning_rate": 1.849735284957957e-05,
"loss": 0.3326,
"step": 1930
},
{
"epoch": 0.15,
"learning_rate": 1.8489567113048896e-05,
"loss": 0.2847,
"step": 1940
},
{
"epoch": 0.15,
"learning_rate": 1.8481781376518218e-05,
"loss": 0.3548,
"step": 1950
},
{
"epoch": 0.15,
"learning_rate": 1.8473995639987547e-05,
"loss": 0.3486,
"step": 1960
},
{
"epoch": 0.15,
"learning_rate": 1.846620990345687e-05,
"loss": 0.3246,
"step": 1970
},
{
"epoch": 0.15,
"learning_rate": 1.845842416692619e-05,
"loss": 0.4146,
"step": 1980
},
{
"epoch": 0.15,
"learning_rate": 1.845063843039552e-05,
"loss": 0.3075,
"step": 1990
},
{
"epoch": 0.16,
"learning_rate": 1.844285269386484e-05,
"loss": 0.3088,
"step": 2000
},
{
"epoch": 0.16,
"learning_rate": 1.8435066957334163e-05,
"loss": 0.3123,
"step": 2010
},
{
"epoch": 0.16,
"learning_rate": 1.842728122080349e-05,
"loss": 0.3547,
"step": 2020
},
{
"epoch": 0.16,
"learning_rate": 1.8419495484272814e-05,
"loss": 0.2637,
"step": 2030
},
{
"epoch": 0.16,
"learning_rate": 1.8411709747742136e-05,
"loss": 0.3146,
"step": 2040
},
{
"epoch": 0.16,
"learning_rate": 1.840392401121146e-05,
"loss": 0.2766,
"step": 2050
},
{
"epoch": 0.16,
"learning_rate": 1.8396138274680786e-05,
"loss": 0.3732,
"step": 2060
},
{
"epoch": 0.16,
"learning_rate": 1.838835253815011e-05,
"loss": 0.263,
"step": 2070
},
{
"epoch": 0.16,
"learning_rate": 1.8380566801619433e-05,
"loss": 0.3383,
"step": 2080
},
{
"epoch": 0.16,
"learning_rate": 1.837278106508876e-05,
"loss": 0.3495,
"step": 2090
},
{
"epoch": 0.16,
"learning_rate": 1.8364995328558084e-05,
"loss": 0.3986,
"step": 2100
},
{
"epoch": 0.16,
"learning_rate": 1.8357209592027406e-05,
"loss": 0.3264,
"step": 2110
},
{
"epoch": 0.17,
"learning_rate": 1.834942385549673e-05,
"loss": 0.3782,
"step": 2120
},
{
"epoch": 0.17,
"learning_rate": 1.8341638118966057e-05,
"loss": 0.3523,
"step": 2130
},
{
"epoch": 0.17,
"learning_rate": 1.833385238243538e-05,
"loss": 0.2724,
"step": 2140
},
{
"epoch": 0.17,
"learning_rate": 1.8326066645904704e-05,
"loss": 0.3245,
"step": 2150
},
{
"epoch": 0.17,
"learning_rate": 1.831828090937403e-05,
"loss": 0.259,
"step": 2160
},
{
"epoch": 0.17,
"learning_rate": 1.831049517284335e-05,
"loss": 0.4271,
"step": 2170
},
{
"epoch": 0.17,
"learning_rate": 1.8302709436312676e-05,
"loss": 0.2523,
"step": 2180
},
{
"epoch": 0.17,
"learning_rate": 1.8294923699782e-05,
"loss": 0.267,
"step": 2190
},
{
"epoch": 0.17,
"learning_rate": 1.8287137963251324e-05,
"loss": 0.3008,
"step": 2200
},
{
"epoch": 0.17,
"learning_rate": 1.827935222672065e-05,
"loss": 0.3555,
"step": 2210
},
{
"epoch": 0.17,
"learning_rate": 1.8271566490189974e-05,
"loss": 0.3355,
"step": 2220
},
{
"epoch": 0.17,
"learning_rate": 1.8263780753659296e-05,
"loss": 0.2799,
"step": 2230
},
{
"epoch": 0.17,
"learning_rate": 1.825599501712862e-05,
"loss": 0.3444,
"step": 2240
},
{
"epoch": 0.18,
"learning_rate": 1.8248209280597947e-05,
"loss": 0.2557,
"step": 2250
},
{
"epoch": 0.18,
"learning_rate": 1.824042354406727e-05,
"loss": 0.316,
"step": 2260
},
{
"epoch": 0.18,
"learning_rate": 1.8232637807536594e-05,
"loss": 0.2644,
"step": 2270
},
{
"epoch": 0.18,
"learning_rate": 1.822485207100592e-05,
"loss": 0.3134,
"step": 2280
},
{
"epoch": 0.18,
"learning_rate": 1.821706633447524e-05,
"loss": 0.243,
"step": 2290
},
{
"epoch": 0.18,
"learning_rate": 1.8209280597944566e-05,
"loss": 0.4135,
"step": 2300
},
{
"epoch": 0.18,
"learning_rate": 1.820149486141389e-05,
"loss": 0.2781,
"step": 2310
},
{
"epoch": 0.18,
"learning_rate": 1.8193709124883217e-05,
"loss": 0.2512,
"step": 2320
},
{
"epoch": 0.18,
"learning_rate": 1.818592338835254e-05,
"loss": 0.2923,
"step": 2330
},
{
"epoch": 0.18,
"learning_rate": 1.8178137651821864e-05,
"loss": 0.3151,
"step": 2340
},
{
"epoch": 0.18,
"learning_rate": 1.817035191529119e-05,
"loss": 0.2766,
"step": 2350
},
{
"epoch": 0.18,
"learning_rate": 1.816256617876051e-05,
"loss": 0.3372,
"step": 2360
},
{
"epoch": 0.18,
"learning_rate": 1.8154780442229837e-05,
"loss": 0.3924,
"step": 2370
},
{
"epoch": 0.19,
"learning_rate": 1.8146994705699162e-05,
"loss": 0.2954,
"step": 2380
},
{
"epoch": 0.19,
"learning_rate": 1.8139208969168484e-05,
"loss": 0.2981,
"step": 2390
},
{
"epoch": 0.19,
"learning_rate": 1.813142323263781e-05,
"loss": 0.3011,
"step": 2400
},
{
"epoch": 0.19,
"learning_rate": 1.8123637496107134e-05,
"loss": 0.2724,
"step": 2410
},
{
"epoch": 0.19,
"learning_rate": 1.8115851759576456e-05,
"loss": 0.3791,
"step": 2420
},
{
"epoch": 0.19,
"learning_rate": 1.8108066023045782e-05,
"loss": 0.3037,
"step": 2430
},
{
"epoch": 0.19,
"learning_rate": 1.8100280286515107e-05,
"loss": 0.2986,
"step": 2440
},
{
"epoch": 0.19,
"learning_rate": 1.809249454998443e-05,
"loss": 0.3418,
"step": 2450
},
{
"epoch": 0.19,
"learning_rate": 1.8084708813453754e-05,
"loss": 0.3293,
"step": 2460
},
{
"epoch": 0.19,
"learning_rate": 1.807692307692308e-05,
"loss": 0.3038,
"step": 2470
},
{
"epoch": 0.19,
"learning_rate": 1.80691373403924e-05,
"loss": 0.3099,
"step": 2480
},
{
"epoch": 0.19,
"learning_rate": 1.8061351603861727e-05,
"loss": 0.3628,
"step": 2490
},
{
"epoch": 0.19,
"learning_rate": 1.8053565867331052e-05,
"loss": 0.3277,
"step": 2500
},
{
"epoch": 0.2,
"learning_rate": 1.8045780130800374e-05,
"loss": 0.2927,
"step": 2510
},
{
"epoch": 0.2,
"learning_rate": 1.80379943942697e-05,
"loss": 0.2666,
"step": 2520
},
{
"epoch": 0.2,
"learning_rate": 1.8030208657739025e-05,
"loss": 0.4419,
"step": 2530
},
{
"epoch": 0.2,
"learning_rate": 1.8022422921208346e-05,
"loss": 0.3165,
"step": 2540
},
{
"epoch": 0.2,
"learning_rate": 1.8014637184677672e-05,
"loss": 0.3474,
"step": 2550
},
{
"epoch": 0.2,
"learning_rate": 1.8006851448146997e-05,
"loss": 0.3313,
"step": 2560
},
{
"epoch": 0.2,
"learning_rate": 1.7999065711616322e-05,
"loss": 0.4063,
"step": 2570
},
{
"epoch": 0.2,
"learning_rate": 1.7991279975085644e-05,
"loss": 0.3396,
"step": 2580
},
{
"epoch": 0.2,
"learning_rate": 1.798349423855497e-05,
"loss": 0.2792,
"step": 2590
},
{
"epoch": 0.2,
"learning_rate": 1.7975708502024295e-05,
"loss": 0.3595,
"step": 2600
},
{
"epoch": 0.2,
"learning_rate": 1.7967922765493617e-05,
"loss": 0.2986,
"step": 2610
},
{
"epoch": 0.2,
"learning_rate": 1.7960137028962942e-05,
"loss": 0.3442,
"step": 2620
},
{
"epoch": 0.2,
"learning_rate": 1.7952351292432267e-05,
"loss": 0.3373,
"step": 2630
},
{
"epoch": 0.21,
"learning_rate": 1.794456555590159e-05,
"loss": 0.273,
"step": 2640
},
{
"epoch": 0.21,
"learning_rate": 1.7936779819370915e-05,
"loss": 0.3987,
"step": 2650
},
{
"epoch": 0.21,
"learning_rate": 1.792899408284024e-05,
"loss": 0.3173,
"step": 2660
},
{
"epoch": 0.21,
"learning_rate": 1.7921208346309562e-05,
"loss": 0.3384,
"step": 2670
},
{
"epoch": 0.21,
"learning_rate": 1.7913422609778887e-05,
"loss": 0.376,
"step": 2680
},
{
"epoch": 0.21,
"learning_rate": 1.7905636873248212e-05,
"loss": 0.2247,
"step": 2690
},
{
"epoch": 0.21,
"learning_rate": 1.7897851136717534e-05,
"loss": 0.326,
"step": 2700
},
{
"epoch": 0.21,
"learning_rate": 1.789006540018686e-05,
"loss": 0.2287,
"step": 2710
},
{
"epoch": 0.21,
"learning_rate": 1.7882279663656185e-05,
"loss": 0.3976,
"step": 2720
},
{
"epoch": 0.21,
"learning_rate": 1.7874493927125507e-05,
"loss": 0.2625,
"step": 2730
},
{
"epoch": 0.21,
"learning_rate": 1.7866708190594832e-05,
"loss": 0.3386,
"step": 2740
},
{
"epoch": 0.21,
"learning_rate": 1.7858922454064157e-05,
"loss": 0.3251,
"step": 2750
},
{
"epoch": 0.21,
"learning_rate": 1.785113671753348e-05,
"loss": 0.3312,
"step": 2760
},
{
"epoch": 0.22,
"learning_rate": 1.7843350981002805e-05,
"loss": 0.3089,
"step": 2770
},
{
"epoch": 0.22,
"learning_rate": 1.783556524447213e-05,
"loss": 0.3043,
"step": 2780
},
{
"epoch": 0.22,
"learning_rate": 1.7827779507941452e-05,
"loss": 0.3096,
"step": 2790
},
{
"epoch": 0.22,
"learning_rate": 1.7819993771410777e-05,
"loss": 0.282,
"step": 2800
},
{
"epoch": 0.22,
"learning_rate": 1.7812208034880102e-05,
"loss": 0.3608,
"step": 2810
},
{
"epoch": 0.22,
"learning_rate": 1.7804422298349428e-05,
"loss": 0.3096,
"step": 2820
},
{
"epoch": 0.22,
"learning_rate": 1.779663656181875e-05,
"loss": 0.3158,
"step": 2830
},
{
"epoch": 0.22,
"learning_rate": 1.778885082528807e-05,
"loss": 0.3508,
"step": 2840
},
{
"epoch": 0.22,
"learning_rate": 1.77810650887574e-05,
"loss": 0.2838,
"step": 2850
},
{
"epoch": 0.22,
"learning_rate": 1.7773279352226722e-05,
"loss": 0.2463,
"step": 2860
},
{
"epoch": 0.22,
"learning_rate": 1.7765493615696044e-05,
"loss": 0.4101,
"step": 2870
},
{
"epoch": 0.22,
"learning_rate": 1.7757707879165373e-05,
"loss": 0.3024,
"step": 2880
},
{
"epoch": 0.23,
"learning_rate": 1.7749922142634695e-05,
"loss": 0.3887,
"step": 2890
},
{
"epoch": 0.23,
"learning_rate": 1.7742136406104017e-05,
"loss": 0.3184,
"step": 2900
},
{
"epoch": 0.23,
"learning_rate": 1.7734350669573345e-05,
"loss": 0.3213,
"step": 2910
},
{
"epoch": 0.23,
"learning_rate": 1.7726564933042667e-05,
"loss": 0.3363,
"step": 2920
},
{
"epoch": 0.23,
"learning_rate": 1.771877919651199e-05,
"loss": 0.3635,
"step": 2930
},
{
"epoch": 0.23,
"learning_rate": 1.7710993459981314e-05,
"loss": 0.2902,
"step": 2940
},
{
"epoch": 0.23,
"learning_rate": 1.770320772345064e-05,
"loss": 0.3068,
"step": 2950
},
{
"epoch": 0.23,
"learning_rate": 1.7695421986919965e-05,
"loss": 0.2164,
"step": 2960
},
{
"epoch": 0.23,
"learning_rate": 1.7687636250389287e-05,
"loss": 0.3048,
"step": 2970
},
{
"epoch": 0.23,
"learning_rate": 1.7679850513858612e-05,
"loss": 0.3578,
"step": 2980
},
{
"epoch": 0.23,
"learning_rate": 1.7672064777327937e-05,
"loss": 0.3142,
"step": 2990
},
{
"epoch": 0.23,
"learning_rate": 1.766427904079726e-05,
"loss": 0.3153,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 1.7656493304266585e-05,
"loss": 0.254,
"step": 3010
},
{
"epoch": 0.24,
"learning_rate": 1.764870756773591e-05,
"loss": 0.3416,
"step": 3020
},
{
"epoch": 0.24,
"learning_rate": 1.7640921831205232e-05,
"loss": 0.3015,
"step": 3030
},
{
"epoch": 0.24,
"learning_rate": 1.7633136094674557e-05,
"loss": 0.306,
"step": 3040
},
{
"epoch": 0.24,
"learning_rate": 1.7625350358143882e-05,
"loss": 0.3475,
"step": 3050
},
{
"epoch": 0.24,
"learning_rate": 1.7617564621613204e-05,
"loss": 0.2974,
"step": 3060
},
{
"epoch": 0.24,
"learning_rate": 1.760977888508253e-05,
"loss": 0.3741,
"step": 3070
},
{
"epoch": 0.24,
"learning_rate": 1.7601993148551855e-05,
"loss": 0.3203,
"step": 3080
},
{
"epoch": 0.24,
"learning_rate": 1.7594207412021177e-05,
"loss": 0.3682,
"step": 3090
},
{
"epoch": 0.24,
"learning_rate": 1.7586421675490502e-05,
"loss": 0.3191,
"step": 3100
},
{
"epoch": 0.24,
"learning_rate": 1.7578635938959828e-05,
"loss": 0.2515,
"step": 3110
},
{
"epoch": 0.24,
"learning_rate": 1.757085020242915e-05,
"loss": 0.394,
"step": 3120
},
{
"epoch": 0.24,
"learning_rate": 1.7563064465898475e-05,
"loss": 0.317,
"step": 3130
},
{
"epoch": 0.24,
"learning_rate": 1.75552787293678e-05,
"loss": 0.2698,
"step": 3140
},
{
"epoch": 0.25,
"learning_rate": 1.7547492992837122e-05,
"loss": 0.3299,
"step": 3150
},
{
"epoch": 0.25,
"learning_rate": 1.7539707256306447e-05,
"loss": 0.3183,
"step": 3160
},
{
"epoch": 0.25,
"learning_rate": 1.7531921519775773e-05,
"loss": 0.3598,
"step": 3170
},
{
"epoch": 0.25,
"learning_rate": 1.7524135783245094e-05,
"loss": 0.3637,
"step": 3180
},
{
"epoch": 0.25,
"learning_rate": 1.751635004671442e-05,
"loss": 0.2586,
"step": 3190
},
{
"epoch": 0.25,
"learning_rate": 1.7508564310183745e-05,
"loss": 0.3151,
"step": 3200
},
{
"epoch": 0.25,
"learning_rate": 1.750077857365307e-05,
"loss": 0.2587,
"step": 3210
},
{
"epoch": 0.25,
"learning_rate": 1.7492992837122392e-05,
"loss": 0.342,
"step": 3220
},
{
"epoch": 0.25,
"learning_rate": 1.7485207100591718e-05,
"loss": 0.305,
"step": 3230
},
{
"epoch": 0.25,
"learning_rate": 1.7477421364061043e-05,
"loss": 0.3429,
"step": 3240
},
{
"epoch": 0.25,
"learning_rate": 1.7469635627530365e-05,
"loss": 0.2381,
"step": 3250
},
{
"epoch": 0.25,
"learning_rate": 1.746184989099969e-05,
"loss": 0.2777,
"step": 3260
},
{
"epoch": 0.25,
"learning_rate": 1.7454064154469015e-05,
"loss": 0.3232,
"step": 3270
},
{
"epoch": 0.26,
"learning_rate": 1.7446278417938337e-05,
"loss": 0.2442,
"step": 3280
},
{
"epoch": 0.26,
"learning_rate": 1.7438492681407663e-05,
"loss": 0.2881,
"step": 3290
},
{
"epoch": 0.26,
"learning_rate": 1.7430706944876988e-05,
"loss": 0.299,
"step": 3300
},
{
"epoch": 0.26,
"learning_rate": 1.742292120834631e-05,
"loss": 0.2971,
"step": 3310
},
{
"epoch": 0.26,
"learning_rate": 1.7415135471815635e-05,
"loss": 0.2872,
"step": 3320
},
{
"epoch": 0.26,
"learning_rate": 1.740734973528496e-05,
"loss": 0.2707,
"step": 3330
},
{
"epoch": 0.26,
"learning_rate": 1.7399563998754282e-05,
"loss": 0.3922,
"step": 3340
},
{
"epoch": 0.26,
"learning_rate": 1.7391778262223608e-05,
"loss": 0.371,
"step": 3350
},
{
"epoch": 0.26,
"learning_rate": 1.7383992525692933e-05,
"loss": 0.2891,
"step": 3360
},
{
"epoch": 0.26,
"learning_rate": 1.7376206789162255e-05,
"loss": 0.3711,
"step": 3370
},
{
"epoch": 0.26,
"learning_rate": 1.736842105263158e-05,
"loss": 0.2751,
"step": 3380
},
{
"epoch": 0.26,
"learning_rate": 1.7360635316100905e-05,
"loss": 0.3483,
"step": 3390
},
{
"epoch": 0.26,
"learning_rate": 1.7352849579570227e-05,
"loss": 0.3248,
"step": 3400
},
{
"epoch": 0.27,
"learning_rate": 1.7345063843039553e-05,
"loss": 0.2927,
"step": 3410
},
{
"epoch": 0.27,
"learning_rate": 1.7337278106508878e-05,
"loss": 0.3026,
"step": 3420
},
{
"epoch": 0.27,
"learning_rate": 1.73294923699782e-05,
"loss": 0.2577,
"step": 3430
},
{
"epoch": 0.27,
"learning_rate": 1.7321706633447525e-05,
"loss": 0.2536,
"step": 3440
},
{
"epoch": 0.27,
"learning_rate": 1.731392089691685e-05,
"loss": 0.3338,
"step": 3450
},
{
"epoch": 0.27,
"learning_rate": 1.7306135160386176e-05,
"loss": 0.3278,
"step": 3460
},
{
"epoch": 0.27,
"learning_rate": 1.7298349423855498e-05,
"loss": 0.3179,
"step": 3470
},
{
"epoch": 0.27,
"learning_rate": 1.7290563687324823e-05,
"loss": 0.3449,
"step": 3480
},
{
"epoch": 0.27,
"learning_rate": 1.7282777950794148e-05,
"loss": 0.3133,
"step": 3490
},
{
"epoch": 0.27,
"learning_rate": 1.727499221426347e-05,
"loss": 0.3097,
"step": 3500
},
{
"epoch": 0.27,
"learning_rate": 1.7267206477732795e-05,
"loss": 0.3252,
"step": 3510
},
{
"epoch": 0.27,
"learning_rate": 1.725942074120212e-05,
"loss": 0.3041,
"step": 3520
},
{
"epoch": 0.27,
"learning_rate": 1.7251635004671443e-05,
"loss": 0.2786,
"step": 3530
},
{
"epoch": 0.28,
"learning_rate": 1.7243849268140768e-05,
"loss": 0.2478,
"step": 3540
},
{
"epoch": 0.28,
"learning_rate": 1.7236063531610093e-05,
"loss": 0.2706,
"step": 3550
},
{
"epoch": 0.28,
"learning_rate": 1.7228277795079415e-05,
"loss": 0.3272,
"step": 3560
},
{
"epoch": 0.28,
"learning_rate": 1.722049205854874e-05,
"loss": 0.3018,
"step": 3570
},
{
"epoch": 0.28,
"learning_rate": 1.7212706322018066e-05,
"loss": 0.2802,
"step": 3580
},
{
"epoch": 0.28,
"learning_rate": 1.7204920585487388e-05,
"loss": 0.274,
"step": 3590
},
{
"epoch": 0.28,
"learning_rate": 1.7197134848956713e-05,
"loss": 0.3024,
"step": 3600
},
{
"epoch": 0.28,
"learning_rate": 1.7189349112426038e-05,
"loss": 0.3475,
"step": 3610
},
{
"epoch": 0.28,
"learning_rate": 1.718156337589536e-05,
"loss": 0.2996,
"step": 3620
},
{
"epoch": 0.28,
"learning_rate": 1.7173777639364685e-05,
"loss": 0.2765,
"step": 3630
},
{
"epoch": 0.28,
"learning_rate": 1.716599190283401e-05,
"loss": 0.2422,
"step": 3640
},
{
"epoch": 0.28,
"learning_rate": 1.7158206166303333e-05,
"loss": 0.3942,
"step": 3650
},
{
"epoch": 0.28,
"learning_rate": 1.7150420429772658e-05,
"loss": 0.2361,
"step": 3660
},
{
"epoch": 0.29,
"learning_rate": 1.7142634693241983e-05,
"loss": 0.3354,
"step": 3670
},
{
"epoch": 0.29,
"learning_rate": 1.7134848956711305e-05,
"loss": 0.2544,
"step": 3680
},
{
"epoch": 0.29,
"learning_rate": 1.712706322018063e-05,
"loss": 0.3437,
"step": 3690
},
{
"epoch": 0.29,
"learning_rate": 1.7119277483649956e-05,
"loss": 0.2315,
"step": 3700
},
{
"epoch": 0.29,
"learning_rate": 1.711149174711928e-05,
"loss": 0.296,
"step": 3710
},
{
"epoch": 0.29,
"learning_rate": 1.7103706010588603e-05,
"loss": 0.2921,
"step": 3720
},
{
"epoch": 0.29,
"learning_rate": 1.709592027405793e-05,
"loss": 0.33,
"step": 3730
},
{
"epoch": 0.29,
"learning_rate": 1.7088134537527254e-05,
"loss": 0.2921,
"step": 3740
},
{
"epoch": 0.29,
"learning_rate": 1.7080348800996576e-05,
"loss": 0.3615,
"step": 3750
},
{
"epoch": 0.29,
"learning_rate": 1.7072563064465897e-05,
"loss": 0.3131,
"step": 3760
},
{
"epoch": 0.29,
"learning_rate": 1.7064777327935226e-05,
"loss": 0.372,
"step": 3770
},
{
"epoch": 0.29,
"learning_rate": 1.7056991591404548e-05,
"loss": 0.2867,
"step": 3780
},
{
"epoch": 0.3,
"learning_rate": 1.704920585487387e-05,
"loss": 0.3098,
"step": 3790
},
{
"epoch": 0.3,
"learning_rate": 1.70414201183432e-05,
"loss": 0.2592,
"step": 3800
},
{
"epoch": 0.3,
"learning_rate": 1.703363438181252e-05,
"loss": 0.3017,
"step": 3810
},
{
"epoch": 0.3,
"learning_rate": 1.7025848645281846e-05,
"loss": 0.3378,
"step": 3820
},
{
"epoch": 0.3,
"learning_rate": 1.701806290875117e-05,
"loss": 0.2605,
"step": 3830
},
{
"epoch": 0.3,
"learning_rate": 1.7010277172220493e-05,
"loss": 0.2793,
"step": 3840
},
{
"epoch": 0.3,
"learning_rate": 1.700249143568982e-05,
"loss": 0.2787,
"step": 3850
},
{
"epoch": 0.3,
"learning_rate": 1.699470569915914e-05,
"loss": 0.3085,
"step": 3860
},
{
"epoch": 0.3,
"learning_rate": 1.6986919962628466e-05,
"loss": 0.2703,
"step": 3870
},
{
"epoch": 0.3,
"learning_rate": 1.697913422609779e-05,
"loss": 0.3528,
"step": 3880
},
{
"epoch": 0.3,
"learning_rate": 1.6971348489567113e-05,
"loss": 0.2685,
"step": 3890
},
{
"epoch": 0.3,
"learning_rate": 1.6963562753036438e-05,
"loss": 0.3184,
"step": 3900
},
{
"epoch": 0.3,
"learning_rate": 1.6955777016505763e-05,
"loss": 0.2761,
"step": 3910
},
{
"epoch": 0.31,
"learning_rate": 1.6947991279975085e-05,
"loss": 0.2261,
"step": 3920
},
{
"epoch": 0.31,
"learning_rate": 1.694020554344441e-05,
"loss": 0.3712,
"step": 3930
},
{
"epoch": 0.31,
"learning_rate": 1.6932419806913736e-05,
"loss": 0.2567,
"step": 3940
},
{
"epoch": 0.31,
"learning_rate": 1.6924634070383058e-05,
"loss": 0.2936,
"step": 3950
},
{
"epoch": 0.31,
"learning_rate": 1.6916848333852386e-05,
"loss": 0.2667,
"step": 3960
},
{
"epoch": 0.31,
"learning_rate": 1.690906259732171e-05,
"loss": 0.35,
"step": 3970
},
{
"epoch": 0.31,
"learning_rate": 1.690127686079103e-05,
"loss": 0.2335,
"step": 3980
},
{
"epoch": 0.31,
"learning_rate": 1.6893491124260356e-05,
"loss": 0.2586,
"step": 3990
},
{
"epoch": 0.31,
"learning_rate": 1.688570538772968e-05,
"loss": 0.3377,
"step": 4000
},
{
"epoch": 0.31,
"learning_rate": 1.6877919651199003e-05,
"loss": 0.2969,
"step": 4010
},
{
"epoch": 0.31,
"learning_rate": 1.6870133914668328e-05,
"loss": 0.2893,
"step": 4020
},
{
"epoch": 0.31,
"learning_rate": 1.6862348178137653e-05,
"loss": 0.3195,
"step": 4030
},
{
"epoch": 0.31,
"learning_rate": 1.6854562441606975e-05,
"loss": 0.3279,
"step": 4040
},
{
"epoch": 0.32,
"learning_rate": 1.68467767050763e-05,
"loss": 0.2271,
"step": 4050
},
{
"epoch": 0.32,
"learning_rate": 1.6838990968545626e-05,
"loss": 0.3524,
"step": 4060
},
{
"epoch": 0.32,
"learning_rate": 1.683120523201495e-05,
"loss": 0.2838,
"step": 4070
},
{
"epoch": 0.32,
"learning_rate": 1.6823419495484273e-05,
"loss": 0.364,
"step": 4080
},
{
"epoch": 0.32,
"learning_rate": 1.68156337589536e-05,
"loss": 0.2356,
"step": 4090
},
{
"epoch": 0.32,
"learning_rate": 1.6807848022422924e-05,
"loss": 0.33,
"step": 4100
},
{
"epoch": 0.32,
"learning_rate": 1.6800062285892246e-05,
"loss": 0.3097,
"step": 4110
},
{
"epoch": 0.32,
"learning_rate": 1.679227654936157e-05,
"loss": 0.3582,
"step": 4120
},
{
"epoch": 0.32,
"learning_rate": 1.6784490812830896e-05,
"loss": 0.2895,
"step": 4130
},
{
"epoch": 0.32,
"learning_rate": 1.6776705076300218e-05,
"loss": 0.2226,
"step": 4140
},
{
"epoch": 0.32,
"learning_rate": 1.6768919339769543e-05,
"loss": 0.255,
"step": 4150
},
{
"epoch": 0.32,
"learning_rate": 1.676113360323887e-05,
"loss": 0.2919,
"step": 4160
},
{
"epoch": 0.32,
"learning_rate": 1.675334786670819e-05,
"loss": 0.3014,
"step": 4170
},
{
"epoch": 0.33,
"learning_rate": 1.6745562130177516e-05,
"loss": 0.3285,
"step": 4180
},
{
"epoch": 0.33,
"learning_rate": 1.673777639364684e-05,
"loss": 0.3023,
"step": 4190
},
{
"epoch": 0.33,
"learning_rate": 1.6729990657116163e-05,
"loss": 0.2802,
"step": 4200
},
{
"epoch": 0.33,
"learning_rate": 1.672220492058549e-05,
"loss": 0.2365,
"step": 4210
},
{
"epoch": 0.33,
"learning_rate": 1.6714419184054814e-05,
"loss": 0.2338,
"step": 4220
},
{
"epoch": 0.33,
"learning_rate": 1.6706633447524136e-05,
"loss": 0.3102,
"step": 4230
},
{
"epoch": 0.33,
"learning_rate": 1.669884771099346e-05,
"loss": 0.3002,
"step": 4240
},
{
"epoch": 0.33,
"learning_rate": 1.6691061974462786e-05,
"loss": 0.2361,
"step": 4250
},
{
"epoch": 0.33,
"learning_rate": 1.6683276237932108e-05,
"loss": 0.2159,
"step": 4260
},
{
"epoch": 0.33,
"learning_rate": 1.6675490501401433e-05,
"loss": 0.2764,
"step": 4270
},
{
"epoch": 0.33,
"learning_rate": 1.666770476487076e-05,
"loss": 0.2654,
"step": 4280
},
{
"epoch": 0.33,
"learning_rate": 1.665991902834008e-05,
"loss": 0.2997,
"step": 4290
},
{
"epoch": 0.33,
"learning_rate": 1.6652133291809406e-05,
"loss": 0.257,
"step": 4300
},
{
"epoch": 0.34,
"learning_rate": 1.664434755527873e-05,
"loss": 0.3513,
"step": 4310
},
{
"epoch": 0.34,
"learning_rate": 1.6636561818748057e-05,
"loss": 0.2621,
"step": 4320
},
{
"epoch": 0.34,
"learning_rate": 1.662877608221738e-05,
"loss": 0.2865,
"step": 4330
},
{
"epoch": 0.34,
"learning_rate": 1.6620990345686704e-05,
"loss": 0.3538,
"step": 4340
},
{
"epoch": 0.34,
"learning_rate": 1.661320460915603e-05,
"loss": 0.2507,
"step": 4350
},
{
"epoch": 0.34,
"learning_rate": 1.660541887262535e-05,
"loss": 0.2413,
"step": 4360
},
{
"epoch": 0.34,
"learning_rate": 1.6597633136094676e-05,
"loss": 0.2306,
"step": 4370
},
{
"epoch": 0.34,
"learning_rate": 1.6589847399564e-05,
"loss": 0.3176,
"step": 4380
},
{
"epoch": 0.34,
"learning_rate": 1.6582061663033324e-05,
"loss": 0.2894,
"step": 4390
},
{
"epoch": 0.34,
"learning_rate": 1.657427592650265e-05,
"loss": 0.2636,
"step": 4400
},
{
"epoch": 0.34,
"learning_rate": 1.6566490189971974e-05,
"loss": 0.4195,
"step": 4410
},
{
"epoch": 0.34,
"learning_rate": 1.6558704453441296e-05,
"loss": 0.3316,
"step": 4420
},
{
"epoch": 0.34,
"learning_rate": 1.655091871691062e-05,
"loss": 0.2798,
"step": 4430
},
{
"epoch": 0.35,
"learning_rate": 1.6543132980379947e-05,
"loss": 0.3231,
"step": 4440
},
{
"epoch": 0.35,
"learning_rate": 1.653534724384927e-05,
"loss": 0.2856,
"step": 4450
},
{
"epoch": 0.35,
"learning_rate": 1.6527561507318594e-05,
"loss": 0.444,
"step": 4460
},
{
"epoch": 0.35,
"learning_rate": 1.651977577078792e-05,
"loss": 0.2894,
"step": 4470
},
{
"epoch": 0.35,
"learning_rate": 1.651199003425724e-05,
"loss": 0.2602,
"step": 4480
},
{
"epoch": 0.35,
"learning_rate": 1.6504204297726566e-05,
"loss": 0.2079,
"step": 4490
},
{
"epoch": 0.35,
"learning_rate": 1.649641856119589e-05,
"loss": 0.3006,
"step": 4500
},
{
"epoch": 0.35,
"learning_rate": 1.6488632824665214e-05,
"loss": 0.2701,
"step": 4510
},
{
"epoch": 0.35,
"learning_rate": 1.648084708813454e-05,
"loss": 0.2697,
"step": 4520
},
{
"epoch": 0.35,
"learning_rate": 1.6473061351603864e-05,
"loss": 0.3324,
"step": 4530
},
{
"epoch": 0.35,
"learning_rate": 1.6465275615073186e-05,
"loss": 0.3033,
"step": 4540
},
{
"epoch": 0.35,
"learning_rate": 1.645748987854251e-05,
"loss": 0.3216,
"step": 4550
},
{
"epoch": 0.36,
"learning_rate": 1.6449704142011837e-05,
"loss": 0.2848,
"step": 4560
},
{
"epoch": 0.36,
"learning_rate": 1.6441918405481162e-05,
"loss": 0.2466,
"step": 4570
},
{
"epoch": 0.36,
"learning_rate": 1.6434132668950484e-05,
"loss": 0.3451,
"step": 4580
},
{
"epoch": 0.36,
"learning_rate": 1.642634693241981e-05,
"loss": 0.3165,
"step": 4590
},
{
"epoch": 0.36,
"learning_rate": 1.6418561195889134e-05,
"loss": 0.3146,
"step": 4600
},
{
"epoch": 0.36,
"learning_rate": 1.6410775459358456e-05,
"loss": 0.1862,
"step": 4610
},
{
"epoch": 0.36,
"learning_rate": 1.640298972282778e-05,
"loss": 0.255,
"step": 4620
},
{
"epoch": 0.36,
"learning_rate": 1.6395203986297107e-05,
"loss": 0.3015,
"step": 4630
},
{
"epoch": 0.36,
"learning_rate": 1.638741824976643e-05,
"loss": 0.2665,
"step": 4640
},
{
"epoch": 0.36,
"learning_rate": 1.6379632513235754e-05,
"loss": 0.234,
"step": 4650
},
{
"epoch": 0.36,
"learning_rate": 1.637184677670508e-05,
"loss": 0.2748,
"step": 4660
},
{
"epoch": 0.36,
"learning_rate": 1.63640610401744e-05,
"loss": 0.2846,
"step": 4670
},
{
"epoch": 0.36,
"learning_rate": 1.6356275303643723e-05,
"loss": 0.2349,
"step": 4680
},
{
"epoch": 0.37,
"learning_rate": 1.6348489567113052e-05,
"loss": 0.2729,
"step": 4690
},
{
"epoch": 0.37,
"learning_rate": 1.6340703830582374e-05,
"loss": 0.1976,
"step": 4700
},
{
"epoch": 0.37,
"learning_rate": 1.63329180940517e-05,
"loss": 0.3199,
"step": 4710
},
{
"epoch": 0.37,
"learning_rate": 1.6325132357521025e-05,
"loss": 0.4013,
"step": 4720
},
{
"epoch": 0.37,
"learning_rate": 1.6317346620990346e-05,
"loss": 0.2417,
"step": 4730
},
{
"epoch": 0.37,
"learning_rate": 1.6309560884459672e-05,
"loss": 0.2593,
"step": 4740
},
{
"epoch": 0.37,
"learning_rate": 1.6301775147928997e-05,
"loss": 0.2663,
"step": 4750
},
{
"epoch": 0.37,
"learning_rate": 1.629398941139832e-05,
"loss": 0.2613,
"step": 4760
},
{
"epoch": 0.37,
"learning_rate": 1.6286203674867644e-05,
"loss": 0.3266,
"step": 4770
},
{
"epoch": 0.37,
"learning_rate": 1.6278417938336966e-05,
"loss": 0.3107,
"step": 4780
},
{
"epoch": 0.37,
"learning_rate": 1.627063220180629e-05,
"loss": 0.278,
"step": 4790
},
{
"epoch": 0.37,
"learning_rate": 1.6262846465275617e-05,
"loss": 0.3524,
"step": 4800
},
{
"epoch": 0.37,
"learning_rate": 1.625506072874494e-05,
"loss": 0.2858,
"step": 4810
},
{
"epoch": 0.38,
"learning_rate": 1.6247274992214267e-05,
"loss": 0.292,
"step": 4820
},
{
"epoch": 0.38,
"learning_rate": 1.623948925568359e-05,
"loss": 0.2983,
"step": 4830
},
{
"epoch": 0.38,
"learning_rate": 1.623170351915291e-05,
"loss": 0.2949,
"step": 4840
},
{
"epoch": 0.38,
"learning_rate": 1.622391778262224e-05,
"loss": 0.3657,
"step": 4850
},
{
"epoch": 0.38,
"learning_rate": 1.6216132046091562e-05,
"loss": 0.3057,
"step": 4860
},
{
"epoch": 0.38,
"learning_rate": 1.6208346309560884e-05,
"loss": 0.2418,
"step": 4870
},
{
"epoch": 0.38,
"learning_rate": 1.6200560573030212e-05,
"loss": 0.3026,
"step": 4880
},
{
"epoch": 0.38,
"learning_rate": 1.6192774836499534e-05,
"loss": 0.2972,
"step": 4890
},
{
"epoch": 0.38,
"learning_rate": 1.6184989099968856e-05,
"loss": 0.2747,
"step": 4900
},
{
"epoch": 0.38,
"learning_rate": 1.617720336343818e-05,
"loss": 0.2987,
"step": 4910
},
{
"epoch": 0.38,
"learning_rate": 1.6169417626907507e-05,
"loss": 0.3031,
"step": 4920
},
{
"epoch": 0.38,
"learning_rate": 1.616163189037683e-05,
"loss": 0.3612,
"step": 4930
},
{
"epoch": 0.38,
"learning_rate": 1.6153846153846154e-05,
"loss": 0.2909,
"step": 4940
},
{
"epoch": 0.39,
"learning_rate": 1.614606041731548e-05,
"loss": 0.2469,
"step": 4950
},
{
"epoch": 0.39,
"learning_rate": 1.6138274680784805e-05,
"loss": 0.2322,
"step": 4960
},
{
"epoch": 0.39,
"learning_rate": 1.6130488944254127e-05,
"loss": 0.3777,
"step": 4970
},
{
"epoch": 0.39,
"learning_rate": 1.6122703207723452e-05,
"loss": 0.2769,
"step": 4980
},
{
"epoch": 0.39,
"learning_rate": 1.6114917471192777e-05,
"loss": 0.3218,
"step": 4990
},
{
"epoch": 0.39,
"learning_rate": 1.61071317346621e-05,
"loss": 0.3272,
"step": 5000
},
{
"epoch": 0.39,
"learning_rate": 1.6099345998131424e-05,
"loss": 0.3187,
"step": 5010
},
{
"epoch": 0.39,
"learning_rate": 1.609156026160075e-05,
"loss": 0.228,
"step": 5020
},
{
"epoch": 0.39,
"learning_rate": 1.608377452507007e-05,
"loss": 0.2892,
"step": 5030
},
{
"epoch": 0.39,
"learning_rate": 1.6075988788539397e-05,
"loss": 0.3135,
"step": 5040
},
{
"epoch": 0.39,
"learning_rate": 1.6068203052008722e-05,
"loss": 0.2711,
"step": 5050
},
{
"epoch": 0.39,
"learning_rate": 1.6060417315478044e-05,
"loss": 0.2548,
"step": 5060
},
{
"epoch": 0.39,
"learning_rate": 1.605263157894737e-05,
"loss": 0.256,
"step": 5070
},
{
"epoch": 0.4,
"learning_rate": 1.6044845842416695e-05,
"loss": 0.3005,
"step": 5080
},
{
"epoch": 0.4,
"learning_rate": 1.6037060105886017e-05,
"loss": 0.3019,
"step": 5090
},
{
"epoch": 0.4,
"learning_rate": 1.6029274369355342e-05,
"loss": 0.2936,
"step": 5100
},
{
"epoch": 0.4,
"learning_rate": 1.6021488632824667e-05,
"loss": 0.3083,
"step": 5110
},
{
"epoch": 0.4,
"learning_rate": 1.601370289629399e-05,
"loss": 0.2696,
"step": 5120
},
{
"epoch": 0.4,
"learning_rate": 1.6005917159763314e-05,
"loss": 0.3029,
"step": 5130
},
{
"epoch": 0.4,
"learning_rate": 1.599813142323264e-05,
"loss": 0.2439,
"step": 5140
},
{
"epoch": 0.4,
"learning_rate": 1.599034568670196e-05,
"loss": 0.2714,
"step": 5150
},
{
"epoch": 0.4,
"learning_rate": 1.5982559950171287e-05,
"loss": 0.3455,
"step": 5160
},
{
"epoch": 0.4,
"learning_rate": 1.5974774213640612e-05,
"loss": 0.273,
"step": 5170
},
{
"epoch": 0.4,
"learning_rate": 1.5966988477109934e-05,
"loss": 0.216,
"step": 5180
},
{
"epoch": 0.4,
"learning_rate": 1.595920274057926e-05,
"loss": 0.3535,
"step": 5190
},
{
"epoch": 0.4,
"learning_rate": 1.5951417004048585e-05,
"loss": 0.2712,
"step": 5200
},
{
"epoch": 0.41,
"learning_rate": 1.594363126751791e-05,
"loss": 0.4113,
"step": 5210
},
{
"epoch": 0.41,
"learning_rate": 1.5935845530987232e-05,
"loss": 0.2541,
"step": 5220
},
{
"epoch": 0.41,
"learning_rate": 1.5928059794456557e-05,
"loss": 0.2662,
"step": 5230
},
{
"epoch": 0.41,
"learning_rate": 1.5920274057925882e-05,
"loss": 0.2713,
"step": 5240
},
{
"epoch": 0.41,
"learning_rate": 1.5912488321395204e-05,
"loss": 0.252,
"step": 5250
},
{
"epoch": 0.41,
"learning_rate": 1.590470258486453e-05,
"loss": 0.3018,
"step": 5260
},
{
"epoch": 0.41,
"learning_rate": 1.5896916848333855e-05,
"loss": 0.2971,
"step": 5270
},
{
"epoch": 0.41,
"learning_rate": 1.5889131111803177e-05,
"loss": 0.2898,
"step": 5280
},
{
"epoch": 0.41,
"learning_rate": 1.5881345375272502e-05,
"loss": 0.3558,
"step": 5290
},
{
"epoch": 0.41,
"learning_rate": 1.5873559638741827e-05,
"loss": 0.3142,
"step": 5300
},
{
"epoch": 0.41,
"learning_rate": 1.586577390221115e-05,
"loss": 0.308,
"step": 5310
},
{
"epoch": 0.41,
"learning_rate": 1.5857988165680475e-05,
"loss": 0.2551,
"step": 5320
},
{
"epoch": 0.41,
"learning_rate": 1.58502024291498e-05,
"loss": 0.3259,
"step": 5330
},
{
"epoch": 0.42,
"learning_rate": 1.5842416692619122e-05,
"loss": 0.3212,
"step": 5340
},
{
"epoch": 0.42,
"learning_rate": 1.5834630956088447e-05,
"loss": 0.3083,
"step": 5350
},
{
"epoch": 0.42,
"learning_rate": 1.5826845219557773e-05,
"loss": 0.2442,
"step": 5360
},
{
"epoch": 0.42,
"learning_rate": 1.5819059483027094e-05,
"loss": 0.2715,
"step": 5370
},
{
"epoch": 0.42,
"learning_rate": 1.581127374649642e-05,
"loss": 0.314,
"step": 5380
},
{
"epoch": 0.42,
"learning_rate": 1.5803488009965745e-05,
"loss": 0.3391,
"step": 5390
},
{
"epoch": 0.42,
"learning_rate": 1.5795702273435067e-05,
"loss": 0.3402,
"step": 5400
},
{
"epoch": 0.42,
"learning_rate": 1.5787916536904392e-05,
"loss": 0.2857,
"step": 5410
},
{
"epoch": 0.42,
"learning_rate": 1.5780130800373718e-05,
"loss": 0.2099,
"step": 5420
},
{
"epoch": 0.42,
"learning_rate": 1.577234506384304e-05,
"loss": 0.2864,
"step": 5430
},
{
"epoch": 0.42,
"learning_rate": 1.5764559327312365e-05,
"loss": 0.3086,
"step": 5440
},
{
"epoch": 0.42,
"learning_rate": 1.575677359078169e-05,
"loss": 0.2425,
"step": 5450
},
{
"epoch": 0.43,
"learning_rate": 1.5748987854251015e-05,
"loss": 0.3098,
"step": 5460
},
{
"epoch": 0.43,
"learning_rate": 1.5741202117720337e-05,
"loss": 0.2194,
"step": 5470
},
{
"epoch": 0.43,
"learning_rate": 1.5733416381189663e-05,
"loss": 0.3205,
"step": 5480
},
{
"epoch": 0.43,
"learning_rate": 1.5725630644658988e-05,
"loss": 0.3202,
"step": 5490
},
{
"epoch": 0.43,
"learning_rate": 1.571784490812831e-05,
"loss": 0.3467,
"step": 5500
},
{
"epoch": 0.43,
"learning_rate": 1.5710059171597635e-05,
"loss": 0.2815,
"step": 5510
},
{
"epoch": 0.43,
"learning_rate": 1.570227343506696e-05,
"loss": 0.2683,
"step": 5520
},
{
"epoch": 0.43,
"learning_rate": 1.5694487698536282e-05,
"loss": 0.3093,
"step": 5530
},
{
"epoch": 0.43,
"learning_rate": 1.5686701962005608e-05,
"loss": 0.2464,
"step": 5540
},
{
"epoch": 0.43,
"learning_rate": 1.5678916225474933e-05,
"loss": 0.3131,
"step": 5550
},
{
"epoch": 0.43,
"learning_rate": 1.5671130488944255e-05,
"loss": 0.262,
"step": 5560
},
{
"epoch": 0.43,
"learning_rate": 1.566334475241358e-05,
"loss": 0.329,
"step": 5570
},
{
"epoch": 0.43,
"learning_rate": 1.5655559015882905e-05,
"loss": 0.3237,
"step": 5580
},
{
"epoch": 0.44,
"learning_rate": 1.5647773279352227e-05,
"loss": 0.2645,
"step": 5590
},
{
"epoch": 0.44,
"learning_rate": 1.5639987542821553e-05,
"loss": 0.3115,
"step": 5600
},
{
"epoch": 0.44,
"learning_rate": 1.5632201806290878e-05,
"loss": 0.2472,
"step": 5610
},
{
"epoch": 0.44,
"learning_rate": 1.56244160697602e-05,
"loss": 0.2549,
"step": 5620
},
{
"epoch": 0.44,
"learning_rate": 1.5616630333229525e-05,
"loss": 0.3139,
"step": 5630
},
{
"epoch": 0.44,
"learning_rate": 1.560884459669885e-05,
"loss": 0.2782,
"step": 5640
},
{
"epoch": 0.44,
"learning_rate": 1.5601058860168172e-05,
"loss": 0.2747,
"step": 5650
},
{
"epoch": 0.44,
"learning_rate": 1.5593273123637498e-05,
"loss": 0.2519,
"step": 5660
},
{
"epoch": 0.44,
"learning_rate": 1.5585487387106823e-05,
"loss": 0.3232,
"step": 5670
},
{
"epoch": 0.44,
"learning_rate": 1.5577701650576145e-05,
"loss": 0.281,
"step": 5680
},
{
"epoch": 0.44,
"learning_rate": 1.556991591404547e-05,
"loss": 0.2814,
"step": 5690
},
{
"epoch": 0.44,
"learning_rate": 1.5562130177514792e-05,
"loss": 0.3394,
"step": 5700
},
{
"epoch": 0.44,
"learning_rate": 1.555434444098412e-05,
"loss": 0.257,
"step": 5710
},
{
"epoch": 0.45,
"learning_rate": 1.5546558704453443e-05,
"loss": 0.2698,
"step": 5720
},
{
"epoch": 0.45,
"learning_rate": 1.5538772967922765e-05,
"loss": 0.3376,
"step": 5730
},
{
"epoch": 0.45,
"learning_rate": 1.5530987231392093e-05,
"loss": 0.2416,
"step": 5740
},
{
"epoch": 0.45,
"learning_rate": 1.5523201494861415e-05,
"loss": 0.3062,
"step": 5750
},
{
"epoch": 0.45,
"learning_rate": 1.5515415758330737e-05,
"loss": 0.264,
"step": 5760
},
{
"epoch": 0.45,
"learning_rate": 1.5507630021800066e-05,
"loss": 0.2184,
"step": 5770
},
{
"epoch": 0.45,
"learning_rate": 1.5499844285269388e-05,
"loss": 0.2969,
"step": 5780
},
{
"epoch": 0.45,
"learning_rate": 1.549205854873871e-05,
"loss": 0.2512,
"step": 5790
},
{
"epoch": 0.45,
"learning_rate": 1.5484272812208038e-05,
"loss": 0.2614,
"step": 5800
},
{
"epoch": 0.45,
"learning_rate": 1.547648707567736e-05,
"loss": 0.2285,
"step": 5810
},
{
"epoch": 0.45,
"learning_rate": 1.5468701339146685e-05,
"loss": 0.254,
"step": 5820
},
{
"epoch": 0.45,
"learning_rate": 1.5460915602616007e-05,
"loss": 0.2227,
"step": 5830
},
{
"epoch": 0.45,
"learning_rate": 1.5453129866085333e-05,
"loss": 0.2734,
"step": 5840
},
{
"epoch": 0.46,
"learning_rate": 1.5445344129554658e-05,
"loss": 0.2874,
"step": 5850
},
{
"epoch": 0.46,
"learning_rate": 1.543755839302398e-05,
"loss": 0.2869,
"step": 5860
},
{
"epoch": 0.46,
"learning_rate": 1.5429772656493305e-05,
"loss": 0.2445,
"step": 5870
},
{
"epoch": 0.46,
"learning_rate": 1.542198691996263e-05,
"loss": 0.331,
"step": 5880
},
{
"epoch": 0.46,
"learning_rate": 1.5414201183431952e-05,
"loss": 0.2387,
"step": 5890
},
{
"epoch": 0.46,
"learning_rate": 1.5406415446901278e-05,
"loss": 0.2279,
"step": 5900
},
{
"epoch": 0.46,
"learning_rate": 1.5398629710370603e-05,
"loss": 0.2091,
"step": 5910
},
{
"epoch": 0.46,
"learning_rate": 1.5390843973839925e-05,
"loss": 0.2894,
"step": 5920
},
{
"epoch": 0.46,
"learning_rate": 1.538305823730925e-05,
"loss": 0.3092,
"step": 5930
},
{
"epoch": 0.46,
"learning_rate": 1.5375272500778576e-05,
"loss": 0.2876,
"step": 5940
},
{
"epoch": 0.46,
"learning_rate": 1.5367486764247897e-05,
"loss": 0.2665,
"step": 5950
},
{
"epoch": 0.46,
"learning_rate": 1.5359701027717223e-05,
"loss": 0.2591,
"step": 5960
},
{
"epoch": 0.46,
"learning_rate": 1.5351915291186548e-05,
"loss": 0.3199,
"step": 5970
},
{
"epoch": 0.47,
"learning_rate": 1.534412955465587e-05,
"loss": 0.2275,
"step": 5980
},
{
"epoch": 0.47,
"learning_rate": 1.5336343818125195e-05,
"loss": 0.2238,
"step": 5990
},
{
"epoch": 0.47,
"learning_rate": 1.532855808159452e-05,
"loss": 0.2585,
"step": 6000
},
{
"epoch": 0.47,
"learning_rate": 1.5320772345063842e-05,
"loss": 0.241,
"step": 6010
},
{
"epoch": 0.47,
"learning_rate": 1.5312986608533168e-05,
"loss": 0.2876,
"step": 6020
},
{
"epoch": 0.47,
"learning_rate": 1.5305200872002493e-05,
"loss": 0.2868,
"step": 6030
},
{
"epoch": 0.47,
"learning_rate": 1.5297415135471815e-05,
"loss": 0.2299,
"step": 6040
},
{
"epoch": 0.47,
"learning_rate": 1.528962939894114e-05,
"loss": 0.3401,
"step": 6050
},
{
"epoch": 0.47,
"learning_rate": 1.5281843662410466e-05,
"loss": 0.2593,
"step": 6060
},
{
"epoch": 0.47,
"learning_rate": 1.527405792587979e-05,
"loss": 0.2513,
"step": 6070
},
{
"epoch": 0.47,
"learning_rate": 1.5266272189349113e-05,
"loss": 0.3075,
"step": 6080
},
{
"epoch": 0.47,
"learning_rate": 1.5258486452818438e-05,
"loss": 0.3274,
"step": 6090
},
{
"epoch": 0.47,
"learning_rate": 1.5250700716287763e-05,
"loss": 0.2581,
"step": 6100
},
{
"epoch": 0.48,
"learning_rate": 1.5242914979757087e-05,
"loss": 0.2692,
"step": 6110
},
{
"epoch": 0.48,
"learning_rate": 1.523512924322641e-05,
"loss": 0.3244,
"step": 6120
},
{
"epoch": 0.48,
"learning_rate": 1.5227343506695736e-05,
"loss": 0.2415,
"step": 6130
},
{
"epoch": 0.48,
"learning_rate": 1.521955777016506e-05,
"loss": 0.2424,
"step": 6140
},
{
"epoch": 0.48,
"learning_rate": 1.5211772033634381e-05,
"loss": 0.2477,
"step": 6150
},
{
"epoch": 0.48,
"learning_rate": 1.5203986297103708e-05,
"loss": 0.2988,
"step": 6160
},
{
"epoch": 0.48,
"learning_rate": 1.5196200560573032e-05,
"loss": 0.2726,
"step": 6170
},
{
"epoch": 0.48,
"learning_rate": 1.5188414824042354e-05,
"loss": 0.3212,
"step": 6180
},
{
"epoch": 0.48,
"learning_rate": 1.5180629087511681e-05,
"loss": 0.2562,
"step": 6190
},
{
"epoch": 0.48,
"learning_rate": 1.5172843350981003e-05,
"loss": 0.2075,
"step": 6200
},
{
"epoch": 0.48,
"learning_rate": 1.516505761445033e-05,
"loss": 0.3107,
"step": 6210
},
{
"epoch": 0.48,
"learning_rate": 1.5157271877919653e-05,
"loss": 0.267,
"step": 6220
},
{
"epoch": 0.49,
"learning_rate": 1.5149486141388975e-05,
"loss": 0.2532,
"step": 6230
},
{
"epoch": 0.49,
"learning_rate": 1.5141700404858302e-05,
"loss": 0.2778,
"step": 6240
},
{
"epoch": 0.49,
"learning_rate": 1.5133914668327624e-05,
"loss": 0.3576,
"step": 6250
},
{
"epoch": 0.49,
"learning_rate": 1.5126128931796948e-05,
"loss": 0.2662,
"step": 6260
},
{
"epoch": 0.49,
"learning_rate": 1.5118343195266275e-05,
"loss": 0.2563,
"step": 6270
},
{
"epoch": 0.49,
"learning_rate": 1.5110557458735597e-05,
"loss": 0.293,
"step": 6280
},
{
"epoch": 0.49,
"learning_rate": 1.510277172220492e-05,
"loss": 0.279,
"step": 6290
},
{
"epoch": 0.49,
"learning_rate": 1.5094985985674246e-05,
"loss": 0.2837,
"step": 6300
},
{
"epoch": 0.49,
"learning_rate": 1.508720024914357e-05,
"loss": 0.2893,
"step": 6310
},
{
"epoch": 0.49,
"learning_rate": 1.5079414512612896e-05,
"loss": 0.3511,
"step": 6320
},
{
"epoch": 0.49,
"learning_rate": 1.5071628776082218e-05,
"loss": 0.378,
"step": 6330
},
{
"epoch": 0.49,
"learning_rate": 1.5063843039551542e-05,
"loss": 0.3159,
"step": 6340
},
{
"epoch": 0.49,
"learning_rate": 1.5056057303020867e-05,
"loss": 0.2108,
"step": 6350
},
{
"epoch": 0.5,
"learning_rate": 1.504827156649019e-05,
"loss": 0.2183,
"step": 6360
},
{
"epoch": 0.5,
"learning_rate": 1.5040485829959514e-05,
"loss": 0.2395,
"step": 6370
},
{
"epoch": 0.5,
"learning_rate": 1.503270009342884e-05,
"loss": 0.3082,
"step": 6380
},
{
"epoch": 0.5,
"learning_rate": 1.5024914356898163e-05,
"loss": 0.2743,
"step": 6390
},
{
"epoch": 0.5,
"learning_rate": 1.5017128620367487e-05,
"loss": 0.3109,
"step": 6400
},
{
"epoch": 0.5,
"learning_rate": 1.5009342883836812e-05,
"loss": 0.2441,
"step": 6410
},
{
"epoch": 0.5,
"learning_rate": 1.5001557147306136e-05,
"loss": 0.2637,
"step": 6420
},
{
"epoch": 0.5,
"learning_rate": 1.499377141077546e-05,
"loss": 0.2905,
"step": 6430
},
{
"epoch": 0.5,
"learning_rate": 1.4985985674244785e-05,
"loss": 0.2632,
"step": 6440
},
{
"epoch": 0.5,
"learning_rate": 1.4978199937714108e-05,
"loss": 0.217,
"step": 6450
},
{
"epoch": 0.5,
"learning_rate": 1.4970414201183433e-05,
"loss": 0.2873,
"step": 6460
},
{
"epoch": 0.5,
"learning_rate": 1.4962628464652757e-05,
"loss": 0.279,
"step": 6470
},
{
"epoch": 0.5,
"learning_rate": 1.495484272812208e-05,
"loss": 0.2589,
"step": 6480
},
{
"epoch": 0.51,
"learning_rate": 1.4947056991591406e-05,
"loss": 0.3188,
"step": 6490
},
{
"epoch": 0.51,
"learning_rate": 1.493927125506073e-05,
"loss": 0.2329,
"step": 6500
},
{
"epoch": 0.51,
"learning_rate": 1.4931485518530053e-05,
"loss": 0.2875,
"step": 6510
},
{
"epoch": 0.51,
"learning_rate": 1.4923699781999378e-05,
"loss": 0.2437,
"step": 6520
},
{
"epoch": 0.51,
"learning_rate": 1.4915914045468702e-05,
"loss": 0.2798,
"step": 6530
},
{
"epoch": 0.51,
"learning_rate": 1.4908128308938026e-05,
"loss": 0.2659,
"step": 6540
},
{
"epoch": 0.51,
"learning_rate": 1.4900342572407351e-05,
"loss": 0.2605,
"step": 6550
},
{
"epoch": 0.51,
"learning_rate": 1.4892556835876675e-05,
"loss": 0.3059,
"step": 6560
},
{
"epoch": 0.51,
"learning_rate": 1.4884771099346e-05,
"loss": 0.1881,
"step": 6570
},
{
"epoch": 0.51,
"learning_rate": 1.4876985362815324e-05,
"loss": 0.268,
"step": 6580
},
{
"epoch": 0.51,
"learning_rate": 1.4869199626284647e-05,
"loss": 0.2027,
"step": 6590
},
{
"epoch": 0.51,
"learning_rate": 1.4861413889753972e-05,
"loss": 0.2267,
"step": 6600
},
{
"epoch": 0.51,
"learning_rate": 1.4853628153223296e-05,
"loss": 0.2685,
"step": 6610
},
{
"epoch": 0.52,
"learning_rate": 1.484584241669262e-05,
"loss": 0.2352,
"step": 6620
},
{
"epoch": 0.52,
"learning_rate": 1.4838056680161945e-05,
"loss": 0.2105,
"step": 6630
},
{
"epoch": 0.52,
"learning_rate": 1.4830270943631269e-05,
"loss": 0.2554,
"step": 6640
},
{
"epoch": 0.52,
"learning_rate": 1.4822485207100592e-05,
"loss": 0.3014,
"step": 6650
},
{
"epoch": 0.52,
"learning_rate": 1.4814699470569917e-05,
"loss": 0.2456,
"step": 6660
},
{
"epoch": 0.52,
"learning_rate": 1.4806913734039241e-05,
"loss": 0.2588,
"step": 6670
},
{
"epoch": 0.52,
"learning_rate": 1.4799127997508565e-05,
"loss": 0.233,
"step": 6680
},
{
"epoch": 0.52,
"learning_rate": 1.479134226097789e-05,
"loss": 0.2118,
"step": 6690
},
{
"epoch": 0.52,
"learning_rate": 1.4783556524447214e-05,
"loss": 0.3248,
"step": 6700
},
{
"epoch": 0.52,
"learning_rate": 1.4775770787916539e-05,
"loss": 0.2562,
"step": 6710
},
{
"epoch": 0.52,
"learning_rate": 1.4767985051385862e-05,
"loss": 0.3039,
"step": 6720
},
{
"epoch": 0.52,
"learning_rate": 1.4760199314855186e-05,
"loss": 0.2707,
"step": 6730
},
{
"epoch": 0.52,
"learning_rate": 1.4752413578324511e-05,
"loss": 0.2062,
"step": 6740
},
{
"epoch": 0.53,
"learning_rate": 1.4744627841793835e-05,
"loss": 0.2664,
"step": 6750
},
{
"epoch": 0.53,
"learning_rate": 1.4736842105263159e-05,
"loss": 0.2372,
"step": 6760
},
{
"epoch": 0.53,
"learning_rate": 1.4729056368732484e-05,
"loss": 0.3098,
"step": 6770
},
{
"epoch": 0.53,
"learning_rate": 1.4721270632201807e-05,
"loss": 0.2494,
"step": 6780
},
{
"epoch": 0.53,
"learning_rate": 1.4713484895671131e-05,
"loss": 0.3575,
"step": 6790
},
{
"epoch": 0.53,
"learning_rate": 1.4705699159140456e-05,
"loss": 0.2152,
"step": 6800
},
{
"epoch": 0.53,
"learning_rate": 1.469791342260978e-05,
"loss": 0.2933,
"step": 6810
},
{
"epoch": 0.53,
"learning_rate": 1.4690127686079105e-05,
"loss": 0.2765,
"step": 6820
},
{
"epoch": 0.53,
"learning_rate": 1.4682341949548429e-05,
"loss": 0.2998,
"step": 6830
},
{
"epoch": 0.53,
"learning_rate": 1.4674556213017752e-05,
"loss": 0.2421,
"step": 6840
},
{
"epoch": 0.53,
"learning_rate": 1.4666770476487078e-05,
"loss": 0.2567,
"step": 6850
},
{
"epoch": 0.53,
"learning_rate": 1.4658984739956401e-05,
"loss": 0.2217,
"step": 6860
},
{
"epoch": 0.53,
"learning_rate": 1.4651199003425725e-05,
"loss": 0.2064,
"step": 6870
},
{
"epoch": 0.54,
"learning_rate": 1.464341326689505e-05,
"loss": 0.2982,
"step": 6880
},
{
"epoch": 0.54,
"learning_rate": 1.4635627530364374e-05,
"loss": 0.2313,
"step": 6890
},
{
"epoch": 0.54,
"learning_rate": 1.4627841793833698e-05,
"loss": 0.3445,
"step": 6900
},
{
"epoch": 0.54,
"learning_rate": 1.4620056057303023e-05,
"loss": 0.3044,
"step": 6910
},
{
"epoch": 0.54,
"learning_rate": 1.4612270320772346e-05,
"loss": 0.2654,
"step": 6920
},
{
"epoch": 0.54,
"learning_rate": 1.460448458424167e-05,
"loss": 0.2996,
"step": 6930
},
{
"epoch": 0.54,
"learning_rate": 1.4596698847710995e-05,
"loss": 0.2435,
"step": 6940
},
{
"epoch": 0.54,
"learning_rate": 1.4588913111180319e-05,
"loss": 0.348,
"step": 6950
},
{
"epoch": 0.54,
"learning_rate": 1.4581127374649644e-05,
"loss": 0.2329,
"step": 6960
},
{
"epoch": 0.54,
"learning_rate": 1.4573341638118968e-05,
"loss": 0.2526,
"step": 6970
},
{
"epoch": 0.54,
"learning_rate": 1.4565555901588291e-05,
"loss": 0.1841,
"step": 6980
},
{
"epoch": 0.54,
"learning_rate": 1.4557770165057617e-05,
"loss": 0.2716,
"step": 6990
},
{
"epoch": 0.55,
"learning_rate": 1.454998442852694e-05,
"loss": 0.2528,
"step": 7000
},
{
"epoch": 0.55,
"learning_rate": 1.4542198691996264e-05,
"loss": 0.2268,
"step": 7010
},
{
"epoch": 0.55,
"learning_rate": 1.453441295546559e-05,
"loss": 0.2281,
"step": 7020
},
{
"epoch": 0.55,
"learning_rate": 1.4526627218934913e-05,
"loss": 0.2526,
"step": 7030
},
{
"epoch": 0.55,
"learning_rate": 1.4518841482404236e-05,
"loss": 0.3006,
"step": 7040
},
{
"epoch": 0.55,
"learning_rate": 1.4511055745873562e-05,
"loss": 0.3228,
"step": 7050
},
{
"epoch": 0.55,
"learning_rate": 1.4503270009342885e-05,
"loss": 0.2666,
"step": 7060
},
{
"epoch": 0.55,
"learning_rate": 1.449548427281221e-05,
"loss": 0.2406,
"step": 7070
},
{
"epoch": 0.55,
"learning_rate": 1.4487698536281534e-05,
"loss": 0.2733,
"step": 7080
},
{
"epoch": 0.55,
"learning_rate": 1.4479912799750858e-05,
"loss": 0.2883,
"step": 7090
},
{
"epoch": 0.55,
"learning_rate": 1.4472127063220183e-05,
"loss": 0.2873,
"step": 7100
},
{
"epoch": 0.55,
"learning_rate": 1.4464341326689507e-05,
"loss": 0.2031,
"step": 7110
},
{
"epoch": 0.55,
"learning_rate": 1.4456555590158829e-05,
"loss": 0.2494,
"step": 7120
},
{
"epoch": 0.56,
"learning_rate": 1.4448769853628156e-05,
"loss": 0.2951,
"step": 7130
},
{
"epoch": 0.56,
"learning_rate": 1.444098411709748e-05,
"loss": 0.3056,
"step": 7140
},
{
"epoch": 0.56,
"learning_rate": 1.4433198380566801e-05,
"loss": 0.3215,
"step": 7150
},
{
"epoch": 0.56,
"learning_rate": 1.4425412644036128e-05,
"loss": 0.2782,
"step": 7160
},
{
"epoch": 0.56,
"learning_rate": 1.441762690750545e-05,
"loss": 0.28,
"step": 7170
},
{
"epoch": 0.56,
"learning_rate": 1.4409841170974774e-05,
"loss": 0.2594,
"step": 7180
},
{
"epoch": 0.56,
"learning_rate": 1.44020554344441e-05,
"loss": 0.2574,
"step": 7190
},
{
"epoch": 0.56,
"learning_rate": 1.4394269697913423e-05,
"loss": 0.2778,
"step": 7200
},
{
"epoch": 0.56,
"learning_rate": 1.438648396138275e-05,
"loss": 0.3185,
"step": 7210
},
{
"epoch": 0.56,
"learning_rate": 1.4378698224852072e-05,
"loss": 0.2715,
"step": 7220
},
{
"epoch": 0.56,
"learning_rate": 1.4370912488321395e-05,
"loss": 0.2555,
"step": 7230
},
{
"epoch": 0.56,
"learning_rate": 1.4363126751790722e-05,
"loss": 0.2159,
"step": 7240
},
{
"epoch": 0.56,
"learning_rate": 1.4355341015260044e-05,
"loss": 0.3302,
"step": 7250
},
{
"epoch": 0.57,
"learning_rate": 1.4347555278729368e-05,
"loss": 0.1962,
"step": 7260
},
{
"epoch": 0.57,
"learning_rate": 1.4339769542198693e-05,
"loss": 0.223,
"step": 7270
},
{
"epoch": 0.57,
"learning_rate": 1.4331983805668017e-05,
"loss": 0.2492,
"step": 7280
},
{
"epoch": 0.57,
"learning_rate": 1.432419806913734e-05,
"loss": 0.2742,
"step": 7290
},
{
"epoch": 0.57,
"learning_rate": 1.4316412332606665e-05,
"loss": 0.2691,
"step": 7300
},
{
"epoch": 0.57,
"learning_rate": 1.4308626596075989e-05,
"loss": 0.2728,
"step": 7310
},
{
"epoch": 0.57,
"learning_rate": 1.4300840859545316e-05,
"loss": 0.2205,
"step": 7320
},
{
"epoch": 0.57,
"learning_rate": 1.4293055123014638e-05,
"loss": 0.2697,
"step": 7330
},
{
"epoch": 0.57,
"learning_rate": 1.4285269386483962e-05,
"loss": 0.2413,
"step": 7340
},
{
"epoch": 0.57,
"learning_rate": 1.4277483649953287e-05,
"loss": 0.2632,
"step": 7350
},
{
"epoch": 0.57,
"learning_rate": 1.426969791342261e-05,
"loss": 0.2231,
"step": 7360
},
{
"epoch": 0.57,
"learning_rate": 1.4261912176891934e-05,
"loss": 0.2651,
"step": 7370
},
{
"epoch": 0.57,
"learning_rate": 1.425412644036126e-05,
"loss": 0.3421,
"step": 7380
},
{
"epoch": 0.58,
"learning_rate": 1.4246340703830583e-05,
"loss": 0.2889,
"step": 7390
},
{
"epoch": 0.58,
"learning_rate": 1.4238554967299907e-05,
"loss": 0.2374,
"step": 7400
},
{
"epoch": 0.58,
"learning_rate": 1.4230769230769232e-05,
"loss": 0.3111,
"step": 7410
},
{
"epoch": 0.58,
"learning_rate": 1.4222983494238555e-05,
"loss": 0.2267,
"step": 7420
},
{
"epoch": 0.58,
"learning_rate": 1.4215197757707879e-05,
"loss": 0.3142,
"step": 7430
},
{
"epoch": 0.58,
"learning_rate": 1.4207412021177204e-05,
"loss": 0.2473,
"step": 7440
},
{
"epoch": 0.58,
"learning_rate": 1.4199626284646528e-05,
"loss": 0.3039,
"step": 7450
},
{
"epoch": 0.58,
"learning_rate": 1.4191840548115853e-05,
"loss": 0.2859,
"step": 7460
},
{
"epoch": 0.58,
"learning_rate": 1.4184054811585177e-05,
"loss": 0.1975,
"step": 7470
},
{
"epoch": 0.58,
"learning_rate": 1.41762690750545e-05,
"loss": 0.247,
"step": 7480
},
{
"epoch": 0.58,
"learning_rate": 1.4168483338523826e-05,
"loss": 0.1781,
"step": 7490
},
{
"epoch": 0.58,
"learning_rate": 1.416069760199315e-05,
"loss": 0.2485,
"step": 7500
},
{
"epoch": 0.58,
"learning_rate": 1.4152911865462473e-05,
"loss": 0.3146,
"step": 7510
},
{
"epoch": 0.59,
"learning_rate": 1.4145126128931798e-05,
"loss": 0.2644,
"step": 7520
},
{
"epoch": 0.59,
"learning_rate": 1.4137340392401122e-05,
"loss": 0.2583,
"step": 7530
},
{
"epoch": 0.59,
"learning_rate": 1.4129554655870446e-05,
"loss": 0.2273,
"step": 7540
},
{
"epoch": 0.59,
"learning_rate": 1.412176891933977e-05,
"loss": 0.2436,
"step": 7550
},
{
"epoch": 0.59,
"learning_rate": 1.4113983182809094e-05,
"loss": 0.2849,
"step": 7560
},
{
"epoch": 0.59,
"learning_rate": 1.410619744627842e-05,
"loss": 0.2887,
"step": 7570
},
{
"epoch": 0.59,
"learning_rate": 1.4098411709747743e-05,
"loss": 0.2867,
"step": 7580
},
{
"epoch": 0.59,
"learning_rate": 1.4090625973217067e-05,
"loss": 0.2833,
"step": 7590
},
{
"epoch": 0.59,
"learning_rate": 1.4082840236686392e-05,
"loss": 0.2558,
"step": 7600
},
{
"epoch": 0.59,
"learning_rate": 1.4075054500155716e-05,
"loss": 0.2488,
"step": 7610
},
{
"epoch": 0.59,
"learning_rate": 1.406726876362504e-05,
"loss": 0.2157,
"step": 7620
},
{
"epoch": 0.59,
"learning_rate": 1.4059483027094365e-05,
"loss": 0.2722,
"step": 7630
},
{
"epoch": 0.59,
"learning_rate": 1.4051697290563688e-05,
"loss": 0.2399,
"step": 7640
},
{
"epoch": 0.6,
"learning_rate": 1.4043911554033012e-05,
"loss": 0.254,
"step": 7650
},
{
"epoch": 0.6,
"learning_rate": 1.4036125817502337e-05,
"loss": 0.2834,
"step": 7660
},
{
"epoch": 0.6,
"learning_rate": 1.4028340080971661e-05,
"loss": 0.2653,
"step": 7670
},
{
"epoch": 0.6,
"learning_rate": 1.4020554344440984e-05,
"loss": 0.3492,
"step": 7680
},
{
"epoch": 0.6,
"learning_rate": 1.401276860791031e-05,
"loss": 0.2323,
"step": 7690
},
{
"epoch": 0.6,
"learning_rate": 1.4004982871379633e-05,
"loss": 0.2731,
"step": 7700
},
{
"epoch": 0.6,
"learning_rate": 1.3997197134848959e-05,
"loss": 0.2051,
"step": 7710
},
{
"epoch": 0.6,
"learning_rate": 1.3989411398318282e-05,
"loss": 0.2671,
"step": 7720
},
{
"epoch": 0.6,
"learning_rate": 1.3981625661787606e-05,
"loss": 0.2759,
"step": 7730
},
{
"epoch": 0.6,
"learning_rate": 1.3973839925256931e-05,
"loss": 0.2552,
"step": 7740
},
{
"epoch": 0.6,
"learning_rate": 1.3966054188726255e-05,
"loss": 0.2265,
"step": 7750
},
{
"epoch": 0.6,
"learning_rate": 1.3958268452195578e-05,
"loss": 0.2216,
"step": 7760
},
{
"epoch": 0.6,
"learning_rate": 1.3950482715664904e-05,
"loss": 0.2281,
"step": 7770
},
{
"epoch": 0.61,
"learning_rate": 1.3942696979134227e-05,
"loss": 0.1875,
"step": 7780
},
{
"epoch": 0.61,
"learning_rate": 1.3934911242603551e-05,
"loss": 0.2821,
"step": 7790
},
{
"epoch": 0.61,
"learning_rate": 1.3927125506072876e-05,
"loss": 0.2133,
"step": 7800
},
{
"epoch": 0.61,
"learning_rate": 1.39193397695422e-05,
"loss": 0.2294,
"step": 7810
},
{
"epoch": 0.61,
"learning_rate": 1.3911554033011525e-05,
"loss": 0.2857,
"step": 7820
},
{
"epoch": 0.61,
"learning_rate": 1.3903768296480849e-05,
"loss": 0.2717,
"step": 7830
},
{
"epoch": 0.61,
"learning_rate": 1.3895982559950172e-05,
"loss": 0.2796,
"step": 7840
},
{
"epoch": 0.61,
"learning_rate": 1.3888196823419498e-05,
"loss": 0.268,
"step": 7850
},
{
"epoch": 0.61,
"learning_rate": 1.3880411086888821e-05,
"loss": 0.2628,
"step": 7860
},
{
"epoch": 0.61,
"learning_rate": 1.3872625350358145e-05,
"loss": 0.2389,
"step": 7870
},
{
"epoch": 0.61,
"learning_rate": 1.386483961382747e-05,
"loss": 0.2932,
"step": 7880
},
{
"epoch": 0.61,
"learning_rate": 1.3857053877296794e-05,
"loss": 0.2538,
"step": 7890
},
{
"epoch": 0.62,
"learning_rate": 1.3849268140766117e-05,
"loss": 0.2351,
"step": 7900
},
{
"epoch": 0.62,
"learning_rate": 1.3841482404235443e-05,
"loss": 0.2099,
"step": 7910
},
{
"epoch": 0.62,
"learning_rate": 1.3833696667704766e-05,
"loss": 0.1891,
"step": 7920
},
{
"epoch": 0.62,
"learning_rate": 1.382591093117409e-05,
"loss": 0.2529,
"step": 7930
},
{
"epoch": 0.62,
"learning_rate": 1.3818125194643415e-05,
"loss": 0.2546,
"step": 7940
},
{
"epoch": 0.62,
"learning_rate": 1.3810339458112739e-05,
"loss": 0.2655,
"step": 7950
},
{
"epoch": 0.62,
"learning_rate": 1.3802553721582064e-05,
"loss": 0.243,
"step": 7960
},
{
"epoch": 0.62,
"learning_rate": 1.3794767985051388e-05,
"loss": 0.2579,
"step": 7970
},
{
"epoch": 0.62,
"learning_rate": 1.3786982248520711e-05,
"loss": 0.2958,
"step": 7980
},
{
"epoch": 0.62,
"learning_rate": 1.3779196511990037e-05,
"loss": 0.2424,
"step": 7990
},
{
"epoch": 0.62,
"learning_rate": 1.377141077545936e-05,
"loss": 0.2509,
"step": 8000
},
{
"epoch": 0.62,
"learning_rate": 1.3763625038928684e-05,
"loss": 0.2683,
"step": 8010
},
{
"epoch": 0.62,
"learning_rate": 1.3755839302398009e-05,
"loss": 0.1821,
"step": 8020
},
{
"epoch": 0.63,
"learning_rate": 1.3748053565867333e-05,
"loss": 0.2214,
"step": 8030
},
{
"epoch": 0.63,
"learning_rate": 1.3740267829336655e-05,
"loss": 0.258,
"step": 8040
},
{
"epoch": 0.63,
"learning_rate": 1.3732482092805982e-05,
"loss": 0.2458,
"step": 8050
},
{
"epoch": 0.63,
"learning_rate": 1.3724696356275305e-05,
"loss": 0.2039,
"step": 8060
},
{
"epoch": 0.63,
"learning_rate": 1.371691061974463e-05,
"loss": 0.2078,
"step": 8070
},
{
"epoch": 0.63,
"learning_rate": 1.3709124883213954e-05,
"loss": 0.2607,
"step": 8080
},
{
"epoch": 0.63,
"learning_rate": 1.3701339146683276e-05,
"loss": 0.2988,
"step": 8090
},
{
"epoch": 0.63,
"learning_rate": 1.3693553410152603e-05,
"loss": 0.2406,
"step": 8100
},
{
"epoch": 0.63,
"learning_rate": 1.3685767673621927e-05,
"loss": 0.2635,
"step": 8110
},
{
"epoch": 0.63,
"learning_rate": 1.3677981937091248e-05,
"loss": 0.2343,
"step": 8120
},
{
"epoch": 0.63,
"learning_rate": 1.3670196200560575e-05,
"loss": 0.2251,
"step": 8130
},
{
"epoch": 0.63,
"learning_rate": 1.3662410464029897e-05,
"loss": 0.2247,
"step": 8140
},
{
"epoch": 0.63,
"learning_rate": 1.3654624727499221e-05,
"loss": 0.2332,
"step": 8150
},
{
"epoch": 0.64,
"learning_rate": 1.3646838990968548e-05,
"loss": 0.2405,
"step": 8160
},
{
"epoch": 0.64,
"learning_rate": 1.363905325443787e-05,
"loss": 0.3167,
"step": 8170
},
{
"epoch": 0.64,
"learning_rate": 1.3631267517907194e-05,
"loss": 0.293,
"step": 8180
},
{
"epoch": 0.64,
"learning_rate": 1.362348178137652e-05,
"loss": 0.2745,
"step": 8190
},
{
"epoch": 0.64,
"learning_rate": 1.3615696044845842e-05,
"loss": 0.201,
"step": 8200
},
{
"epoch": 0.64,
"learning_rate": 1.360791030831517e-05,
"loss": 0.2546,
"step": 8210
},
{
"epoch": 0.64,
"learning_rate": 1.3600124571784491e-05,
"loss": 0.219,
"step": 8220
},
{
"epoch": 0.64,
"learning_rate": 1.3592338835253815e-05,
"loss": 0.2208,
"step": 8230
},
{
"epoch": 0.64,
"learning_rate": 1.3584553098723142e-05,
"loss": 0.2707,
"step": 8240
},
{
"epoch": 0.64,
"learning_rate": 1.3576767362192464e-05,
"loss": 0.3258,
"step": 8250
},
{
"epoch": 0.64,
"learning_rate": 1.3568981625661787e-05,
"loss": 0.2519,
"step": 8260
},
{
"epoch": 0.64,
"learning_rate": 1.3561195889131113e-05,
"loss": 0.236,
"step": 8270
},
{
"epoch": 0.64,
"learning_rate": 1.3553410152600436e-05,
"loss": 0.2893,
"step": 8280
},
{
"epoch": 0.65,
"learning_rate": 1.354562441606976e-05,
"loss": 0.2164,
"step": 8290
},
{
"epoch": 0.65,
"learning_rate": 1.3537838679539085e-05,
"loss": 0.2337,
"step": 8300
},
{
"epoch": 0.65,
"learning_rate": 1.3530052943008409e-05,
"loss": 0.2531,
"step": 8310
},
{
"epoch": 0.65,
"learning_rate": 1.3522267206477734e-05,
"loss": 0.2735,
"step": 8320
},
{
"epoch": 0.65,
"learning_rate": 1.3514481469947058e-05,
"loss": 0.2704,
"step": 8330
},
{
"epoch": 0.65,
"learning_rate": 1.3506695733416381e-05,
"loss": 0.2442,
"step": 8340
},
{
"epoch": 0.65,
"learning_rate": 1.3498909996885707e-05,
"loss": 0.2818,
"step": 8350
},
{
"epoch": 0.65,
"learning_rate": 1.349112426035503e-05,
"loss": 0.2394,
"step": 8360
},
{
"epoch": 0.65,
"learning_rate": 1.3483338523824354e-05,
"loss": 0.2207,
"step": 8370
},
{
"epoch": 0.65,
"learning_rate": 1.347555278729368e-05,
"loss": 0.2092,
"step": 8380
},
{
"epoch": 0.65,
"learning_rate": 1.3467767050763003e-05,
"loss": 0.2016,
"step": 8390
},
{
"epoch": 0.65,
"learning_rate": 1.3459981314232326e-05,
"loss": 0.2382,
"step": 8400
},
{
"epoch": 0.65,
"learning_rate": 1.3452195577701652e-05,
"loss": 0.2259,
"step": 8410
},
{
"epoch": 0.66,
"learning_rate": 1.3444409841170975e-05,
"loss": 0.2597,
"step": 8420
},
{
"epoch": 0.66,
"learning_rate": 1.3436624104640299e-05,
"loss": 0.1858,
"step": 8430
},
{
"epoch": 0.66,
"learning_rate": 1.3428838368109624e-05,
"loss": 0.2511,
"step": 8440
},
{
"epoch": 0.66,
"learning_rate": 1.3421052631578948e-05,
"loss": 0.2191,
"step": 8450
},
{
"epoch": 0.66,
"learning_rate": 1.3413266895048273e-05,
"loss": 0.259,
"step": 8460
},
{
"epoch": 0.66,
"learning_rate": 1.3405481158517597e-05,
"loss": 0.2004,
"step": 8470
},
{
"epoch": 0.66,
"learning_rate": 1.339769542198692e-05,
"loss": 0.2483,
"step": 8480
},
{
"epoch": 0.66,
"learning_rate": 1.3389909685456246e-05,
"loss": 0.214,
"step": 8490
},
{
"epoch": 0.66,
"learning_rate": 1.338212394892557e-05,
"loss": 0.2958,
"step": 8500
},
{
"epoch": 0.66,
"learning_rate": 1.3374338212394893e-05,
"loss": 0.3244,
"step": 8510
},
{
"epoch": 0.66,
"learning_rate": 1.3366552475864218e-05,
"loss": 0.2065,
"step": 8520
},
{
"epoch": 0.66,
"learning_rate": 1.3358766739333542e-05,
"loss": 0.249,
"step": 8530
},
{
"epoch": 0.66,
"learning_rate": 1.3350981002802865e-05,
"loss": 0.2386,
"step": 8540
},
{
"epoch": 0.67,
"learning_rate": 1.334319526627219e-05,
"loss": 0.1867,
"step": 8550
},
{
"epoch": 0.67,
"learning_rate": 1.3335409529741514e-05,
"loss": 0.2001,
"step": 8560
},
{
"epoch": 0.67,
"learning_rate": 1.332762379321084e-05,
"loss": 0.2559,
"step": 8570
},
{
"epoch": 0.67,
"learning_rate": 1.3319838056680163e-05,
"loss": 0.2514,
"step": 8580
},
{
"epoch": 0.67,
"learning_rate": 1.3312052320149487e-05,
"loss": 0.2386,
"step": 8590
},
{
"epoch": 0.67,
"learning_rate": 1.3304266583618812e-05,
"loss": 0.3099,
"step": 8600
},
{
"epoch": 0.67,
"learning_rate": 1.3296480847088136e-05,
"loss": 0.1941,
"step": 8610
},
{
"epoch": 0.67,
"learning_rate": 1.328869511055746e-05,
"loss": 0.1931,
"step": 8620
},
{
"epoch": 0.67,
"learning_rate": 1.3280909374026785e-05,
"loss": 0.2676,
"step": 8630
},
{
"epoch": 0.67,
"learning_rate": 1.3273123637496108e-05,
"loss": 0.2689,
"step": 8640
},
{
"epoch": 0.67,
"learning_rate": 1.3265337900965432e-05,
"loss": 0.2499,
"step": 8650
},
{
"epoch": 0.67,
"learning_rate": 1.3257552164434757e-05,
"loss": 0.1597,
"step": 8660
},
{
"epoch": 0.68,
"learning_rate": 1.324976642790408e-05,
"loss": 0.2385,
"step": 8670
},
{
"epoch": 0.68,
"learning_rate": 1.3241980691373404e-05,
"loss": 0.3206,
"step": 8680
},
{
"epoch": 0.68,
"learning_rate": 1.323419495484273e-05,
"loss": 0.2182,
"step": 8690
},
{
"epoch": 0.68,
"learning_rate": 1.3226409218312053e-05,
"loss": 0.2932,
"step": 8700
},
{
"epoch": 0.68,
"learning_rate": 1.3218623481781378e-05,
"loss": 0.1684,
"step": 8710
},
{
"epoch": 0.68,
"learning_rate": 1.3210837745250702e-05,
"loss": 0.2822,
"step": 8720
},
{
"epoch": 0.68,
"learning_rate": 1.3203052008720026e-05,
"loss": 0.2572,
"step": 8730
},
{
"epoch": 0.68,
"learning_rate": 1.3195266272189351e-05,
"loss": 0.2549,
"step": 8740
},
{
"epoch": 0.68,
"learning_rate": 1.3187480535658675e-05,
"loss": 0.2183,
"step": 8750
},
{
"epoch": 0.68,
"learning_rate": 1.3179694799127998e-05,
"loss": 0.3112,
"step": 8760
},
{
"epoch": 0.68,
"learning_rate": 1.3171909062597323e-05,
"loss": 0.2417,
"step": 8770
},
{
"epoch": 0.68,
"learning_rate": 1.3164123326066647e-05,
"loss": 0.2393,
"step": 8780
},
{
"epoch": 0.68,
"learning_rate": 1.315633758953597e-05,
"loss": 0.2481,
"step": 8790
},
{
"epoch": 0.69,
"learning_rate": 1.3148551853005296e-05,
"loss": 0.2245,
"step": 8800
},
{
"epoch": 0.69,
"learning_rate": 1.314076611647462e-05,
"loss": 0.2221,
"step": 8810
},
{
"epoch": 0.69,
"learning_rate": 1.3132980379943945e-05,
"loss": 0.2406,
"step": 8820
},
{
"epoch": 0.69,
"learning_rate": 1.3125194643413269e-05,
"loss": 0.3024,
"step": 8830
},
{
"epoch": 0.69,
"learning_rate": 1.3117408906882592e-05,
"loss": 0.2108,
"step": 8840
},
{
"epoch": 0.69,
"learning_rate": 1.3109623170351917e-05,
"loss": 0.281,
"step": 8850
},
{
"epoch": 0.69,
"learning_rate": 1.3101837433821241e-05,
"loss": 0.2789,
"step": 8860
},
{
"epoch": 0.69,
"learning_rate": 1.3094051697290565e-05,
"loss": 0.1919,
"step": 8870
},
{
"epoch": 0.69,
"learning_rate": 1.308626596075989e-05,
"loss": 0.3249,
"step": 8880
},
{
"epoch": 0.69,
"learning_rate": 1.3078480224229214e-05,
"loss": 0.2109,
"step": 8890
},
{
"epoch": 0.69,
"learning_rate": 1.3070694487698537e-05,
"loss": 0.3476,
"step": 8900
},
{
"epoch": 0.69,
"learning_rate": 1.3062908751167862e-05,
"loss": 0.2504,
"step": 8910
},
{
"epoch": 0.69,
"learning_rate": 1.3055123014637186e-05,
"loss": 0.2318,
"step": 8920
},
{
"epoch": 0.7,
"learning_rate": 1.304733727810651e-05,
"loss": 0.196,
"step": 8930
},
{
"epoch": 0.7,
"learning_rate": 1.3039551541575835e-05,
"loss": 0.2634,
"step": 8940
},
{
"epoch": 0.7,
"learning_rate": 1.3031765805045159e-05,
"loss": 0.2615,
"step": 8950
},
{
"epoch": 0.7,
"learning_rate": 1.3023980068514484e-05,
"loss": 0.1575,
"step": 8960
},
{
"epoch": 0.7,
"learning_rate": 1.3016194331983807e-05,
"loss": 0.2671,
"step": 8970
},
{
"epoch": 0.7,
"learning_rate": 1.3008408595453131e-05,
"loss": 0.2605,
"step": 8980
},
{
"epoch": 0.7,
"learning_rate": 1.3000622858922456e-05,
"loss": 0.2248,
"step": 8990
},
{
"epoch": 0.7,
"learning_rate": 1.299283712239178e-05,
"loss": 0.3365,
"step": 9000
},
{
"epoch": 0.7,
"learning_rate": 1.2985051385861102e-05,
"loss": 0.1897,
"step": 9010
},
{
"epoch": 0.7,
"learning_rate": 1.2977265649330429e-05,
"loss": 0.1951,
"step": 9020
},
{
"epoch": 0.7,
"learning_rate": 1.2969479912799752e-05,
"loss": 0.2807,
"step": 9030
},
{
"epoch": 0.7,
"learning_rate": 1.2961694176269074e-05,
"loss": 0.2551,
"step": 9040
},
{
"epoch": 0.7,
"learning_rate": 1.2953908439738401e-05,
"loss": 0.1983,
"step": 9050
},
{
"epoch": 0.71,
"learning_rate": 1.2946122703207723e-05,
"loss": 0.2754,
"step": 9060
},
{
"epoch": 0.71,
"learning_rate": 1.293833696667705e-05,
"loss": 0.2593,
"step": 9070
},
{
"epoch": 0.71,
"learning_rate": 1.2930551230146374e-05,
"loss": 0.1994,
"step": 9080
},
{
"epoch": 0.71,
"learning_rate": 1.2922765493615696e-05,
"loss": 0.2162,
"step": 9090
},
{
"epoch": 0.71,
"learning_rate": 1.2914979757085023e-05,
"loss": 0.2505,
"step": 9100
},
{
"epoch": 0.71,
"learning_rate": 1.2907194020554346e-05,
"loss": 0.2122,
"step": 9110
},
{
"epoch": 0.71,
"learning_rate": 1.2899408284023668e-05,
"loss": 0.2839,
"step": 9120
},
{
"epoch": 0.71,
"learning_rate": 1.2891622547492995e-05,
"loss": 0.2256,
"step": 9130
},
{
"epoch": 0.71,
"learning_rate": 1.2883836810962317e-05,
"loss": 0.2733,
"step": 9140
},
{
"epoch": 0.71,
"learning_rate": 1.287605107443164e-05,
"loss": 0.2434,
"step": 9150
},
{
"epoch": 0.71,
"learning_rate": 1.2868265337900968e-05,
"loss": 0.2805,
"step": 9160
},
{
"epoch": 0.71,
"learning_rate": 1.286047960137029e-05,
"loss": 0.235,
"step": 9170
},
{
"epoch": 0.71,
"learning_rate": 1.2852693864839613e-05,
"loss": 0.3376,
"step": 9180
},
{
"epoch": 0.72,
"learning_rate": 1.2844908128308939e-05,
"loss": 0.2328,
"step": 9190
},
{
"epoch": 0.72,
"learning_rate": 1.2837122391778262e-05,
"loss": 0.2354,
"step": 9200
},
{
"epoch": 0.72,
"learning_rate": 1.282933665524759e-05,
"loss": 0.1856,
"step": 9210
},
{
"epoch": 0.72,
"learning_rate": 1.2821550918716911e-05,
"loss": 0.2262,
"step": 9220
},
{
"epoch": 0.72,
"learning_rate": 1.2813765182186235e-05,
"loss": 0.2725,
"step": 9230
},
{
"epoch": 0.72,
"learning_rate": 1.280597944565556e-05,
"loss": 0.2022,
"step": 9240
},
{
"epoch": 0.72,
"learning_rate": 1.2798193709124884e-05,
"loss": 0.2515,
"step": 9250
},
{
"epoch": 0.72,
"learning_rate": 1.2790407972594207e-05,
"loss": 0.2786,
"step": 9260
},
{
"epoch": 0.72,
"learning_rate": 1.2782622236063533e-05,
"loss": 0.1973,
"step": 9270
},
{
"epoch": 0.72,
"learning_rate": 1.2774836499532856e-05,
"loss": 0.275,
"step": 9280
},
{
"epoch": 0.72,
"learning_rate": 1.276705076300218e-05,
"loss": 0.1834,
"step": 9290
},
{
"epoch": 0.72,
"learning_rate": 1.2759265026471505e-05,
"loss": 0.2149,
"step": 9300
},
{
"epoch": 0.72,
"learning_rate": 1.2751479289940829e-05,
"loss": 0.2387,
"step": 9310
},
{
"epoch": 0.73,
"learning_rate": 1.2743693553410154e-05,
"loss": 0.195,
"step": 9320
},
{
"epoch": 0.73,
"learning_rate": 1.2735907816879478e-05,
"loss": 0.2405,
"step": 9330
},
{
"epoch": 0.73,
"learning_rate": 1.2728122080348801e-05,
"loss": 0.2592,
"step": 9340
},
{
"epoch": 0.73,
"learning_rate": 1.2720336343818126e-05,
"loss": 0.3315,
"step": 9350
},
{
"epoch": 0.73,
"learning_rate": 1.271255060728745e-05,
"loss": 0.1888,
"step": 9360
},
{
"epoch": 0.73,
"learning_rate": 1.2704764870756774e-05,
"loss": 0.2336,
"step": 9370
},
{
"epoch": 0.73,
"learning_rate": 1.2696979134226099e-05,
"loss": 0.1942,
"step": 9380
},
{
"epoch": 0.73,
"learning_rate": 1.2689193397695423e-05,
"loss": 0.1785,
"step": 9390
},
{
"epoch": 0.73,
"learning_rate": 1.2681407661164746e-05,
"loss": 0.3373,
"step": 9400
},
{
"epoch": 0.73,
"learning_rate": 1.2673621924634071e-05,
"loss": 0.2567,
"step": 9410
},
{
"epoch": 0.73,
"learning_rate": 1.2665836188103395e-05,
"loss": 0.1999,
"step": 9420
},
{
"epoch": 0.73,
"learning_rate": 1.2658050451572719e-05,
"loss": 0.2082,
"step": 9430
},
{
"epoch": 0.73,
"learning_rate": 1.2650264715042044e-05,
"loss": 0.2549,
"step": 9440
},
{
"epoch": 0.74,
"learning_rate": 1.2642478978511368e-05,
"loss": 0.2606,
"step": 9450
},
{
"epoch": 0.74,
"learning_rate": 1.2634693241980693e-05,
"loss": 0.2784,
"step": 9460
},
{
"epoch": 0.74,
"learning_rate": 1.2626907505450017e-05,
"loss": 0.2298,
"step": 9470
},
{
"epoch": 0.74,
"learning_rate": 1.261912176891934e-05,
"loss": 0.2609,
"step": 9480
},
{
"epoch": 0.74,
"learning_rate": 1.2611336032388665e-05,
"loss": 0.229,
"step": 9490
},
{
"epoch": 0.74,
"learning_rate": 1.2603550295857989e-05,
"loss": 0.1955,
"step": 9500
},
{
"epoch": 0.74,
"learning_rate": 1.2595764559327313e-05,
"loss": 0.2197,
"step": 9510
},
{
"epoch": 0.74,
"learning_rate": 1.2587978822796638e-05,
"loss": 0.2211,
"step": 9520
},
{
"epoch": 0.74,
"learning_rate": 1.2580193086265962e-05,
"loss": 0.2508,
"step": 9530
},
{
"epoch": 0.74,
"learning_rate": 1.2572407349735285e-05,
"loss": 0.2029,
"step": 9540
},
{
"epoch": 0.74,
"learning_rate": 1.256462161320461e-05,
"loss": 0.2052,
"step": 9550
},
{
"epoch": 0.74,
"learning_rate": 1.2556835876673934e-05,
"loss": 0.1875,
"step": 9560
},
{
"epoch": 0.75,
"learning_rate": 1.254905014014326e-05,
"loss": 0.2552,
"step": 9570
},
{
"epoch": 0.75,
"learning_rate": 1.2541264403612583e-05,
"loss": 0.2349,
"step": 9580
},
{
"epoch": 0.75,
"learning_rate": 1.2533478667081907e-05,
"loss": 0.2103,
"step": 9590
},
{
"epoch": 0.75,
"learning_rate": 1.2525692930551232e-05,
"loss": 0.2292,
"step": 9600
},
{
"epoch": 0.75,
"learning_rate": 1.2517907194020555e-05,
"loss": 0.2763,
"step": 9610
},
{
"epoch": 0.75,
"learning_rate": 1.2510121457489879e-05,
"loss": 0.186,
"step": 9620
},
{
"epoch": 0.75,
"learning_rate": 1.2502335720959204e-05,
"loss": 0.1946,
"step": 9630
},
{
"epoch": 0.75,
"learning_rate": 1.2494549984428528e-05,
"loss": 0.2341,
"step": 9640
},
{
"epoch": 0.75,
"learning_rate": 1.2486764247897852e-05,
"loss": 0.2072,
"step": 9650
},
{
"epoch": 0.75,
"learning_rate": 1.2478978511367177e-05,
"loss": 0.2128,
"step": 9660
},
{
"epoch": 0.75,
"learning_rate": 1.24711927748365e-05,
"loss": 0.2115,
"step": 9670
},
{
"epoch": 0.75,
"learning_rate": 1.2463407038305824e-05,
"loss": 0.2096,
"step": 9680
},
{
"epoch": 0.75,
"learning_rate": 1.245562130177515e-05,
"loss": 0.2418,
"step": 9690
},
{
"epoch": 0.76,
"learning_rate": 1.2447835565244473e-05,
"loss": 0.2884,
"step": 9700
},
{
"epoch": 0.76,
"learning_rate": 1.2440049828713798e-05,
"loss": 0.3362,
"step": 9710
},
{
"epoch": 0.76,
"learning_rate": 1.2432264092183122e-05,
"loss": 0.1847,
"step": 9720
},
{
"epoch": 0.76,
"learning_rate": 1.2424478355652445e-05,
"loss": 0.2206,
"step": 9730
},
{
"epoch": 0.76,
"learning_rate": 1.241669261912177e-05,
"loss": 0.2059,
"step": 9740
},
{
"epoch": 0.76,
"learning_rate": 1.2408906882591094e-05,
"loss": 0.2209,
"step": 9750
},
{
"epoch": 0.76,
"learning_rate": 1.2401121146060418e-05,
"loss": 0.2191,
"step": 9760
},
{
"epoch": 0.76,
"learning_rate": 1.2393335409529743e-05,
"loss": 0.2439,
"step": 9770
},
{
"epoch": 0.76,
"learning_rate": 1.2385549672999067e-05,
"loss": 0.2058,
"step": 9780
},
{
"epoch": 0.76,
"learning_rate": 1.237776393646839e-05,
"loss": 0.1416,
"step": 9790
},
{
"epoch": 0.76,
"learning_rate": 1.2369978199937716e-05,
"loss": 0.2964,
"step": 9800
},
{
"epoch": 0.76,
"learning_rate": 1.236219246340704e-05,
"loss": 0.2656,
"step": 9810
},
{
"epoch": 0.76,
"learning_rate": 1.2354406726876365e-05,
"loss": 0.2732,
"step": 9820
},
{
"epoch": 0.77,
"learning_rate": 1.2346620990345688e-05,
"loss": 0.2273,
"step": 9830
},
{
"epoch": 0.77,
"learning_rate": 1.2338835253815012e-05,
"loss": 0.245,
"step": 9840
},
{
"epoch": 0.77,
"learning_rate": 1.2331049517284337e-05,
"loss": 0.1903,
"step": 9850
},
{
"epoch": 0.77,
"learning_rate": 1.232326378075366e-05,
"loss": 0.219,
"step": 9860
},
{
"epoch": 0.77,
"learning_rate": 1.2315478044222984e-05,
"loss": 0.2203,
"step": 9870
},
{
"epoch": 0.77,
"learning_rate": 1.230769230769231e-05,
"loss": 0.3253,
"step": 9880
},
{
"epoch": 0.77,
"learning_rate": 1.2299906571161633e-05,
"loss": 0.1897,
"step": 9890
},
{
"epoch": 0.77,
"learning_rate": 1.2292120834630957e-05,
"loss": 0.3111,
"step": 9900
},
{
"epoch": 0.77,
"learning_rate": 1.2284335098100282e-05,
"loss": 0.2446,
"step": 9910
},
{
"epoch": 0.77,
"learning_rate": 1.2276549361569606e-05,
"loss": 0.2078,
"step": 9920
},
{
"epoch": 0.77,
"learning_rate": 1.2268763625038928e-05,
"loss": 0.1927,
"step": 9930
},
{
"epoch": 0.77,
"learning_rate": 1.2260977888508255e-05,
"loss": 0.2107,
"step": 9940
},
{
"epoch": 0.77,
"learning_rate": 1.2253192151977578e-05,
"loss": 0.1688,
"step": 9950
},
{
"epoch": 0.78,
"learning_rate": 1.2245406415446904e-05,
"loss": 0.2085,
"step": 9960
},
{
"epoch": 0.78,
"learning_rate": 1.2237620678916227e-05,
"loss": 0.1623,
"step": 9970
},
{
"epoch": 0.78,
"learning_rate": 1.222983494238555e-05,
"loss": 0.2193,
"step": 9980
},
{
"epoch": 0.78,
"learning_rate": 1.2222049205854876e-05,
"loss": 0.2748,
"step": 9990
},
{
"epoch": 0.78,
"learning_rate": 1.22142634693242e-05,
"loss": 0.2054,
"step": 10000
},
{
"epoch": 0.78,
"learning_rate": 1.2206477732793522e-05,
"loss": 0.1815,
"step": 10010
},
{
"epoch": 0.78,
"learning_rate": 1.2198691996262849e-05,
"loss": 0.2605,
"step": 10020
},
{
"epoch": 0.78,
"learning_rate": 1.2190906259732172e-05,
"loss": 0.2719,
"step": 10030
},
{
"epoch": 0.78,
"learning_rate": 1.2183120523201494e-05,
"loss": 0.2284,
"step": 10040
},
{
"epoch": 0.78,
"learning_rate": 1.2175334786670821e-05,
"loss": 0.2895,
"step": 10050
},
{
"epoch": 0.78,
"learning_rate": 1.2167549050140143e-05,
"loss": 0.2278,
"step": 10060
},
{
"epoch": 0.78,
"learning_rate": 1.215976331360947e-05,
"loss": 0.2383,
"step": 10070
},
{
"epoch": 0.78,
"learning_rate": 1.2151977577078794e-05,
"loss": 0.2191,
"step": 10080
},
{
"epoch": 0.79,
"learning_rate": 1.2144191840548116e-05,
"loss": 0.1865,
"step": 10090
},
{
"epoch": 0.79,
"learning_rate": 1.2136406104017443e-05,
"loss": 0.3054,
"step": 10100
},
{
"epoch": 0.79,
"learning_rate": 1.2128620367486765e-05,
"loss": 0.245,
"step": 10110
},
{
"epoch": 0.79,
"learning_rate": 1.2120834630956088e-05,
"loss": 0.2497,
"step": 10120
},
{
"epoch": 0.79,
"learning_rate": 1.2113048894425415e-05,
"loss": 0.2181,
"step": 10130
},
{
"epoch": 0.79,
"learning_rate": 1.2105263157894737e-05,
"loss": 0.1896,
"step": 10140
},
{
"epoch": 0.79,
"learning_rate": 1.209747742136406e-05,
"loss": 0.2177,
"step": 10150
},
{
"epoch": 0.79,
"learning_rate": 1.2089691684833386e-05,
"loss": 0.2672,
"step": 10160
},
{
"epoch": 0.79,
"learning_rate": 1.208190594830271e-05,
"loss": 0.2449,
"step": 10170
},
{
"epoch": 0.79,
"learning_rate": 1.2074120211772033e-05,
"loss": 0.2086,
"step": 10180
},
{
"epoch": 0.79,
"learning_rate": 1.2066334475241358e-05,
"loss": 0.2144,
"step": 10190
},
{
"epoch": 0.79,
"learning_rate": 1.2058548738710682e-05,
"loss": 0.212,
"step": 10200
},
{
"epoch": 0.79,
"learning_rate": 1.2050763002180007e-05,
"loss": 0.3234,
"step": 10210
},
{
"epoch": 0.8,
"learning_rate": 1.2042977265649331e-05,
"loss": 0.23,
"step": 10220
},
{
"epoch": 0.8,
"learning_rate": 1.2035191529118655e-05,
"loss": 0.2608,
"step": 10230
},
{
"epoch": 0.8,
"learning_rate": 1.202740579258798e-05,
"loss": 0.2053,
"step": 10240
},
{
"epoch": 0.8,
"learning_rate": 1.2019620056057303e-05,
"loss": 0.1606,
"step": 10250
},
{
"epoch": 0.8,
"learning_rate": 1.2011834319526627e-05,
"loss": 0.2766,
"step": 10260
},
{
"epoch": 0.8,
"learning_rate": 1.2004048582995952e-05,
"loss": 0.2145,
"step": 10270
},
{
"epoch": 0.8,
"learning_rate": 1.1996262846465276e-05,
"loss": 0.2709,
"step": 10280
},
{
"epoch": 0.8,
"learning_rate": 1.19884771099346e-05,
"loss": 0.2038,
"step": 10290
},
{
"epoch": 0.8,
"learning_rate": 1.1980691373403925e-05,
"loss": 0.2132,
"step": 10300
},
{
"epoch": 0.8,
"learning_rate": 1.1972905636873248e-05,
"loss": 0.2122,
"step": 10310
},
{
"epoch": 0.8,
"learning_rate": 1.1965119900342574e-05,
"loss": 0.2436,
"step": 10320
},
{
"epoch": 0.8,
"learning_rate": 1.1957334163811897e-05,
"loss": 0.2474,
"step": 10330
},
{
"epoch": 0.81,
"learning_rate": 1.1949548427281221e-05,
"loss": 0.2912,
"step": 10340
},
{
"epoch": 0.81,
"learning_rate": 1.1941762690750546e-05,
"loss": 0.1962,
"step": 10350
},
{
"epoch": 0.81,
"learning_rate": 1.193397695421987e-05,
"loss": 0.1368,
"step": 10360
},
{
"epoch": 0.81,
"learning_rate": 1.1926191217689194e-05,
"loss": 0.2864,
"step": 10370
},
{
"epoch": 0.81,
"learning_rate": 1.1918405481158519e-05,
"loss": 0.152,
"step": 10380
},
{
"epoch": 0.81,
"learning_rate": 1.1910619744627842e-05,
"loss": 0.1998,
"step": 10390
},
{
"epoch": 0.81,
"learning_rate": 1.1902834008097166e-05,
"loss": 0.256,
"step": 10400
},
{
"epoch": 0.81,
"learning_rate": 1.1895048271566491e-05,
"loss": 0.1882,
"step": 10410
},
{
"epoch": 0.81,
"learning_rate": 1.1887262535035815e-05,
"loss": 0.1613,
"step": 10420
},
{
"epoch": 0.81,
"learning_rate": 1.1879476798505139e-05,
"loss": 0.257,
"step": 10430
},
{
"epoch": 0.81,
"learning_rate": 1.1871691061974464e-05,
"loss": 0.253,
"step": 10440
},
{
"epoch": 0.81,
"learning_rate": 1.1863905325443787e-05,
"loss": 0.2488,
"step": 10450
},
{
"epoch": 0.81,
"learning_rate": 1.1856119588913113e-05,
"loss": 0.2032,
"step": 10460
},
{
"epoch": 0.82,
"learning_rate": 1.1848333852382436e-05,
"loss": 0.1911,
"step": 10470
},
{
"epoch": 0.82,
"learning_rate": 1.184054811585176e-05,
"loss": 0.1919,
"step": 10480
},
{
"epoch": 0.82,
"learning_rate": 1.1832762379321085e-05,
"loss": 0.2179,
"step": 10490
},
{
"epoch": 0.82,
"learning_rate": 1.1824976642790409e-05,
"loss": 0.1749,
"step": 10500
},
{
"epoch": 0.82,
"learning_rate": 1.1817190906259732e-05,
"loss": 0.2601,
"step": 10510
},
{
"epoch": 0.82,
"learning_rate": 1.1809405169729058e-05,
"loss": 0.1821,
"step": 10520
},
{
"epoch": 0.82,
"learning_rate": 1.1801619433198381e-05,
"loss": 0.1973,
"step": 10530
},
{
"epoch": 0.82,
"learning_rate": 1.1793833696667705e-05,
"loss": 0.2062,
"step": 10540
},
{
"epoch": 0.82,
"learning_rate": 1.178604796013703e-05,
"loss": 0.2057,
"step": 10550
},
{
"epoch": 0.82,
"learning_rate": 1.1778262223606354e-05,
"loss": 0.1592,
"step": 10560
},
{
"epoch": 0.82,
"learning_rate": 1.177047648707568e-05,
"loss": 0.2175,
"step": 10570
},
{
"epoch": 0.82,
"learning_rate": 1.1762690750545003e-05,
"loss": 0.1738,
"step": 10580
},
{
"epoch": 0.82,
"learning_rate": 1.1754905014014326e-05,
"loss": 0.2378,
"step": 10590
},
{
"epoch": 0.83,
"learning_rate": 1.1747119277483652e-05,
"loss": 0.1743,
"step": 10600
},
{
"epoch": 0.83,
"learning_rate": 1.1739333540952975e-05,
"loss": 0.2354,
"step": 10610
},
{
"epoch": 0.83,
"learning_rate": 1.1731547804422299e-05,
"loss": 0.1741,
"step": 10620
},
{
"epoch": 0.83,
"learning_rate": 1.1723762067891624e-05,
"loss": 0.1684,
"step": 10630
},
{
"epoch": 0.83,
"learning_rate": 1.1715976331360948e-05,
"loss": 0.2199,
"step": 10640
},
{
"epoch": 0.83,
"learning_rate": 1.1708190594830271e-05,
"loss": 0.2124,
"step": 10650
},
{
"epoch": 0.83,
"learning_rate": 1.1700404858299597e-05,
"loss": 0.2659,
"step": 10660
},
{
"epoch": 0.83,
"learning_rate": 1.169261912176892e-05,
"loss": 0.2294,
"step": 10670
},
{
"epoch": 0.83,
"learning_rate": 1.1684833385238244e-05,
"loss": 0.1339,
"step": 10680
},
{
"epoch": 0.83,
"learning_rate": 1.167704764870757e-05,
"loss": 0.2392,
"step": 10690
},
{
"epoch": 0.83,
"learning_rate": 1.1669261912176893e-05,
"loss": 0.1526,
"step": 10700
},
{
"epoch": 0.83,
"learning_rate": 1.1661476175646218e-05,
"loss": 0.1836,
"step": 10710
},
{
"epoch": 0.83,
"learning_rate": 1.1653690439115542e-05,
"loss": 0.2258,
"step": 10720
},
{
"epoch": 0.84,
"learning_rate": 1.1645904702584865e-05,
"loss": 0.1856,
"step": 10730
},
{
"epoch": 0.84,
"learning_rate": 1.163811896605419e-05,
"loss": 0.2287,
"step": 10740
},
{
"epoch": 0.84,
"learning_rate": 1.1630333229523514e-05,
"loss": 0.2017,
"step": 10750
},
{
"epoch": 0.84,
"learning_rate": 1.1622547492992838e-05,
"loss": 0.1682,
"step": 10760
},
{
"epoch": 0.84,
"learning_rate": 1.1614761756462163e-05,
"loss": 0.2564,
"step": 10770
},
{
"epoch": 0.84,
"learning_rate": 1.1606976019931487e-05,
"loss": 0.1868,
"step": 10780
},
{
"epoch": 0.84,
"learning_rate": 1.159919028340081e-05,
"loss": 0.1721,
"step": 10790
},
{
"epoch": 0.84,
"learning_rate": 1.1591404546870136e-05,
"loss": 0.2094,
"step": 10800
},
{
"epoch": 0.84,
"learning_rate": 1.158361881033946e-05,
"loss": 0.2318,
"step": 10810
},
{
"epoch": 0.84,
"learning_rate": 1.1575833073808785e-05,
"loss": 0.2198,
"step": 10820
},
{
"epoch": 0.84,
"learning_rate": 1.1568047337278108e-05,
"loss": 0.2162,
"step": 10830
},
{
"epoch": 0.84,
"learning_rate": 1.1560261600747432e-05,
"loss": 0.2424,
"step": 10840
},
{
"epoch": 0.84,
"learning_rate": 1.1552475864216757e-05,
"loss": 0.2163,
"step": 10850
},
{
"epoch": 0.85,
"learning_rate": 1.154469012768608e-05,
"loss": 0.1945,
"step": 10860
},
{
"epoch": 0.85,
"learning_rate": 1.1536904391155404e-05,
"loss": 0.1453,
"step": 10870
},
{
"epoch": 0.85,
"learning_rate": 1.152911865462473e-05,
"loss": 0.2658,
"step": 10880
},
{
"epoch": 0.85,
"learning_rate": 1.1521332918094053e-05,
"loss": 0.1744,
"step": 10890
},
{
"epoch": 0.85,
"learning_rate": 1.1513547181563375e-05,
"loss": 0.1922,
"step": 10900
},
{
"epoch": 0.85,
"learning_rate": 1.1505761445032702e-05,
"loss": 0.2594,
"step": 10910
},
{
"epoch": 0.85,
"learning_rate": 1.1497975708502026e-05,
"loss": 0.2348,
"step": 10920
},
{
"epoch": 0.85,
"learning_rate": 1.1490189971971348e-05,
"loss": 0.2265,
"step": 10930
},
{
"epoch": 0.85,
"learning_rate": 1.1482404235440675e-05,
"loss": 0.231,
"step": 10940
},
{
"epoch": 0.85,
"learning_rate": 1.1474618498909998e-05,
"loss": 0.2046,
"step": 10950
},
{
"epoch": 0.85,
"learning_rate": 1.1466832762379323e-05,
"loss": 0.2101,
"step": 10960
},
{
"epoch": 0.85,
"learning_rate": 1.1459047025848647e-05,
"loss": 0.224,
"step": 10970
},
{
"epoch": 0.85,
"learning_rate": 1.1451261289317969e-05,
"loss": 0.1564,
"step": 10980
},
{
"epoch": 0.86,
"learning_rate": 1.1443475552787296e-05,
"loss": 0.2457,
"step": 10990
},
{
"epoch": 0.86,
"learning_rate": 1.143568981625662e-05,
"loss": 0.2481,
"step": 11000
},
{
"epoch": 0.86,
"learning_rate": 1.1427904079725942e-05,
"loss": 0.2572,
"step": 11010
},
{
"epoch": 0.86,
"learning_rate": 1.1420118343195268e-05,
"loss": 0.1696,
"step": 11020
},
{
"epoch": 0.86,
"learning_rate": 1.141233260666459e-05,
"loss": 0.247,
"step": 11030
},
{
"epoch": 0.86,
"learning_rate": 1.1404546870133914e-05,
"loss": 0.2109,
"step": 11040
},
{
"epoch": 0.86,
"learning_rate": 1.1396761133603241e-05,
"loss": 0.1912,
"step": 11050
},
{
"epoch": 0.86,
"learning_rate": 1.1388975397072563e-05,
"loss": 0.2224,
"step": 11060
},
{
"epoch": 0.86,
"learning_rate": 1.138118966054189e-05,
"loss": 0.1924,
"step": 11070
},
{
"epoch": 0.86,
"learning_rate": 1.1373403924011212e-05,
"loss": 0.2314,
"step": 11080
},
{
"epoch": 0.86,
"learning_rate": 1.1365618187480535e-05,
"loss": 0.256,
"step": 11090
},
{
"epoch": 0.86,
"learning_rate": 1.1357832450949862e-05,
"loss": 0.2141,
"step": 11100
},
{
"epoch": 0.86,
"learning_rate": 1.1350046714419184e-05,
"loss": 0.1559,
"step": 11110
},
{
"epoch": 0.87,
"learning_rate": 1.1342260977888508e-05,
"loss": 0.1879,
"step": 11120
},
{
"epoch": 0.87,
"learning_rate": 1.1334475241357833e-05,
"loss": 0.1849,
"step": 11130
},
{
"epoch": 0.87,
"learning_rate": 1.1326689504827157e-05,
"loss": 0.1812,
"step": 11140
},
{
"epoch": 0.87,
"learning_rate": 1.131890376829648e-05,
"loss": 0.1766,
"step": 11150
},
{
"epoch": 0.87,
"learning_rate": 1.1311118031765806e-05,
"loss": 0.1857,
"step": 11160
},
{
"epoch": 0.87,
"learning_rate": 1.130333229523513e-05,
"loss": 0.1891,
"step": 11170
},
{
"epoch": 0.87,
"learning_rate": 1.1295546558704453e-05,
"loss": 0.2172,
"step": 11180
},
{
"epoch": 0.87,
"learning_rate": 1.1287760822173778e-05,
"loss": 0.2251,
"step": 11190
},
{
"epoch": 0.87,
"learning_rate": 1.1279975085643102e-05,
"loss": 0.2082,
"step": 11200
},
{
"epoch": 0.87,
"learning_rate": 1.1272189349112427e-05,
"loss": 0.1885,
"step": 11210
},
{
"epoch": 0.87,
"learning_rate": 1.126440361258175e-05,
"loss": 0.2178,
"step": 11220
},
{
"epoch": 0.87,
"learning_rate": 1.1256617876051074e-05,
"loss": 0.1842,
"step": 11230
},
{
"epoch": 0.88,
"learning_rate": 1.12488321395204e-05,
"loss": 0.221,
"step": 11240
},
{
"epoch": 0.88,
"learning_rate": 1.1241046402989723e-05,
"loss": 0.2308,
"step": 11250
},
{
"epoch": 0.88,
"learning_rate": 1.1233260666459047e-05,
"loss": 0.178,
"step": 11260
},
{
"epoch": 0.88,
"learning_rate": 1.1225474929928372e-05,
"loss": 0.266,
"step": 11270
},
{
"epoch": 0.88,
"learning_rate": 1.1217689193397696e-05,
"loss": 0.2185,
"step": 11280
},
{
"epoch": 0.88,
"learning_rate": 1.120990345686702e-05,
"loss": 0.1754,
"step": 11290
},
{
"epoch": 0.88,
"learning_rate": 1.1202117720336345e-05,
"loss": 0.181,
"step": 11300
},
{
"epoch": 0.88,
"learning_rate": 1.1194331983805668e-05,
"loss": 0.2556,
"step": 11310
},
{
"epoch": 0.88,
"learning_rate": 1.1186546247274994e-05,
"loss": 0.1735,
"step": 11320
},
{
"epoch": 0.88,
"learning_rate": 1.1178760510744317e-05,
"loss": 0.1829,
"step": 11330
},
{
"epoch": 0.88,
"learning_rate": 1.117097477421364e-05,
"loss": 0.2738,
"step": 11340
},
{
"epoch": 0.88,
"learning_rate": 1.1163189037682966e-05,
"loss": 0.2314,
"step": 11350
},
{
"epoch": 0.88,
"learning_rate": 1.115540330115229e-05,
"loss": 0.2023,
"step": 11360
},
{
"epoch": 0.89,
"learning_rate": 1.1147617564621613e-05,
"loss": 0.2612,
"step": 11370
},
{
"epoch": 0.89,
"learning_rate": 1.1139831828090939e-05,
"loss": 0.298,
"step": 11380
},
{
"epoch": 0.89,
"learning_rate": 1.1132046091560262e-05,
"loss": 0.2385,
"step": 11390
},
{
"epoch": 0.89,
"learning_rate": 1.1124260355029586e-05,
"loss": 0.2713,
"step": 11400
},
{
"epoch": 0.89,
"learning_rate": 1.1116474618498911e-05,
"loss": 0.1887,
"step": 11410
},
{
"epoch": 0.89,
"learning_rate": 1.1108688881968235e-05,
"loss": 0.2056,
"step": 11420
},
{
"epoch": 0.89,
"learning_rate": 1.1100903145437558e-05,
"loss": 0.2475,
"step": 11430
},
{
"epoch": 0.89,
"learning_rate": 1.1093117408906884e-05,
"loss": 0.1947,
"step": 11440
},
{
"epoch": 0.89,
"learning_rate": 1.1085331672376207e-05,
"loss": 0.2345,
"step": 11450
},
{
"epoch": 0.89,
"learning_rate": 1.1077545935845533e-05,
"loss": 0.2265,
"step": 11460
},
{
"epoch": 0.89,
"learning_rate": 1.1069760199314856e-05,
"loss": 0.2264,
"step": 11470
},
{
"epoch": 0.89,
"learning_rate": 1.106197446278418e-05,
"loss": 0.1302,
"step": 11480
},
{
"epoch": 0.89,
"learning_rate": 1.1054188726253505e-05,
"loss": 0.2255,
"step": 11490
},
{
"epoch": 0.9,
"learning_rate": 1.1046402989722829e-05,
"loss": 0.1996,
"step": 11500
},
{
"epoch": 0.9,
"learning_rate": 1.1038617253192152e-05,
"loss": 0.1856,
"step": 11510
},
{
"epoch": 0.9,
"learning_rate": 1.1030831516661478e-05,
"loss": 0.2494,
"step": 11520
},
{
"epoch": 0.9,
"learning_rate": 1.1023045780130801e-05,
"loss": 0.1817,
"step": 11530
},
{
"epoch": 0.9,
"learning_rate": 1.1015260043600125e-05,
"loss": 0.2428,
"step": 11540
},
{
"epoch": 0.9,
"learning_rate": 1.100747430706945e-05,
"loss": 0.1772,
"step": 11550
},
{
"epoch": 0.9,
"learning_rate": 1.0999688570538774e-05,
"loss": 0.1547,
"step": 11560
},
{
"epoch": 0.9,
"learning_rate": 1.0991902834008099e-05,
"loss": 0.1982,
"step": 11570
},
{
"epoch": 0.9,
"learning_rate": 1.0984117097477423e-05,
"loss": 0.1974,
"step": 11580
},
{
"epoch": 0.9,
"learning_rate": 1.0976331360946746e-05,
"loss": 0.2009,
"step": 11590
},
{
"epoch": 0.9,
"learning_rate": 1.0968545624416071e-05,
"loss": 0.1843,
"step": 11600
},
{
"epoch": 0.9,
"learning_rate": 1.0960759887885395e-05,
"loss": 0.2159,
"step": 11610
},
{
"epoch": 0.9,
"learning_rate": 1.0952974151354719e-05,
"loss": 0.2529,
"step": 11620
},
{
"epoch": 0.91,
"learning_rate": 1.0945188414824044e-05,
"loss": 0.1819,
"step": 11630
},
{
"epoch": 0.91,
"learning_rate": 1.0937402678293368e-05,
"loss": 0.1801,
"step": 11640
},
{
"epoch": 0.91,
"learning_rate": 1.0929616941762691e-05,
"loss": 0.1975,
"step": 11650
},
{
"epoch": 0.91,
"learning_rate": 1.0921831205232017e-05,
"loss": 0.2526,
"step": 11660
},
{
"epoch": 0.91,
"learning_rate": 1.091404546870134e-05,
"loss": 0.1886,
"step": 11670
},
{
"epoch": 0.91,
"learning_rate": 1.0906259732170664e-05,
"loss": 0.2286,
"step": 11680
},
{
"epoch": 0.91,
"learning_rate": 1.0898473995639989e-05,
"loss": 0.1451,
"step": 11690
},
{
"epoch": 0.91,
"learning_rate": 1.0890688259109313e-05,
"loss": 0.2952,
"step": 11700
},
{
"epoch": 0.91,
"learning_rate": 1.0882902522578638e-05,
"loss": 0.2533,
"step": 11710
},
{
"epoch": 0.91,
"learning_rate": 1.0875116786047962e-05,
"loss": 0.2076,
"step": 11720
},
{
"epoch": 0.91,
"learning_rate": 1.0867331049517285e-05,
"loss": 0.1807,
"step": 11730
},
{
"epoch": 0.91,
"learning_rate": 1.085954531298661e-05,
"loss": 0.2167,
"step": 11740
},
{
"epoch": 0.91,
"learning_rate": 1.0851759576455934e-05,
"loss": 0.2054,
"step": 11750
},
{
"epoch": 0.92,
"learning_rate": 1.0843973839925258e-05,
"loss": 0.2179,
"step": 11760
},
{
"epoch": 0.92,
"learning_rate": 1.0836188103394583e-05,
"loss": 0.2269,
"step": 11770
},
{
"epoch": 0.92,
"learning_rate": 1.0828402366863907e-05,
"loss": 0.166,
"step": 11780
},
{
"epoch": 0.92,
"learning_rate": 1.082061663033323e-05,
"loss": 0.2171,
"step": 11790
},
{
"epoch": 0.92,
"learning_rate": 1.0812830893802555e-05,
"loss": 0.1731,
"step": 11800
},
{
"epoch": 0.92,
"learning_rate": 1.0805045157271879e-05,
"loss": 0.2032,
"step": 11810
},
{
"epoch": 0.92,
"learning_rate": 1.0797259420741204e-05,
"loss": 0.1816,
"step": 11820
},
{
"epoch": 0.92,
"learning_rate": 1.0789473684210528e-05,
"loss": 0.1449,
"step": 11830
},
{
"epoch": 0.92,
"learning_rate": 1.0781687947679852e-05,
"loss": 0.275,
"step": 11840
},
{
"epoch": 0.92,
"learning_rate": 1.0773902211149177e-05,
"loss": 0.2051,
"step": 11850
},
{
"epoch": 0.92,
"learning_rate": 1.07661164746185e-05,
"loss": 0.2313,
"step": 11860
},
{
"epoch": 0.92,
"learning_rate": 1.0758330738087824e-05,
"loss": 0.218,
"step": 11870
},
{
"epoch": 0.92,
"learning_rate": 1.075054500155715e-05,
"loss": 0.2417,
"step": 11880
},
{
"epoch": 0.93,
"learning_rate": 1.0742759265026473e-05,
"loss": 0.2445,
"step": 11890
},
{
"epoch": 0.93,
"learning_rate": 1.0734973528495795e-05,
"loss": 0.1577,
"step": 11900
},
{
"epoch": 0.93,
"learning_rate": 1.0727187791965122e-05,
"loss": 0.1642,
"step": 11910
},
{
"epoch": 0.93,
"learning_rate": 1.0719402055434445e-05,
"loss": 0.1732,
"step": 11920
},
{
"epoch": 0.93,
"learning_rate": 1.0711616318903767e-05,
"loss": 0.2135,
"step": 11930
},
{
"epoch": 0.93,
"learning_rate": 1.0703830582373094e-05,
"loss": 0.2901,
"step": 11940
},
{
"epoch": 0.93,
"learning_rate": 1.0696044845842416e-05,
"loss": 0.205,
"step": 11950
},
{
"epoch": 0.93,
"learning_rate": 1.0688259109311743e-05,
"loss": 0.1833,
"step": 11960
},
{
"epoch": 0.93,
"learning_rate": 1.0680473372781067e-05,
"loss": 0.1664,
"step": 11970
},
{
"epoch": 0.93,
"learning_rate": 1.0672687636250389e-05,
"loss": 0.1935,
"step": 11980
},
{
"epoch": 0.93,
"learning_rate": 1.0664901899719716e-05,
"loss": 0.2205,
"step": 11990
},
{
"epoch": 0.93,
"learning_rate": 1.0657116163189038e-05,
"loss": 0.1953,
"step": 12000
},
{
"epoch": 0.94,
"learning_rate": 1.0649330426658361e-05,
"loss": 0.1638,
"step": 12010
},
{
"epoch": 0.94,
"learning_rate": 1.0641544690127688e-05,
"loss": 0.2432,
"step": 12020
},
{
"epoch": 0.94,
"learning_rate": 1.063375895359701e-05,
"loss": 0.1405,
"step": 12030
},
{
"epoch": 0.94,
"learning_rate": 1.0625973217066334e-05,
"loss": 0.1838,
"step": 12040
},
{
"epoch": 0.94,
"learning_rate": 1.0618187480535659e-05,
"loss": 0.2223,
"step": 12050
},
{
"epoch": 0.94,
"learning_rate": 1.0610401744004983e-05,
"loss": 0.1746,
"step": 12060
},
{
"epoch": 0.94,
"learning_rate": 1.060261600747431e-05,
"loss": 0.2268,
"step": 12070
},
{
"epoch": 0.94,
"learning_rate": 1.0594830270943632e-05,
"loss": 0.1739,
"step": 12080
},
{
"epoch": 0.94,
"learning_rate": 1.0587044534412955e-05,
"loss": 0.1762,
"step": 12090
},
{
"epoch": 0.94,
"learning_rate": 1.057925879788228e-05,
"loss": 0.2067,
"step": 12100
},
{
"epoch": 0.94,
"learning_rate": 1.0571473061351604e-05,
"loss": 0.1499,
"step": 12110
},
{
"epoch": 0.94,
"learning_rate": 1.0563687324820928e-05,
"loss": 0.1828,
"step": 12120
},
{
"epoch": 0.94,
"learning_rate": 1.0555901588290253e-05,
"loss": 0.1995,
"step": 12130
},
{
"epoch": 0.95,
"learning_rate": 1.0548115851759577e-05,
"loss": 0.2062,
"step": 12140
},
{
"epoch": 0.95,
"learning_rate": 1.05403301152289e-05,
"loss": 0.2472,
"step": 12150
},
{
"epoch": 0.95,
"learning_rate": 1.0532544378698226e-05,
"loss": 0.2155,
"step": 12160
},
{
"epoch": 0.95,
"learning_rate": 1.052475864216755e-05,
"loss": 0.2805,
"step": 12170
},
{
"epoch": 0.95,
"learning_rate": 1.0516972905636873e-05,
"loss": 0.1677,
"step": 12180
},
{
"epoch": 0.95,
"learning_rate": 1.0509187169106198e-05,
"loss": 0.1805,
"step": 12190
},
{
"epoch": 0.95,
"learning_rate": 1.0501401432575522e-05,
"loss": 0.2232,
"step": 12200
},
{
"epoch": 0.95,
"learning_rate": 1.0493615696044847e-05,
"loss": 0.2086,
"step": 12210
},
{
"epoch": 0.95,
"learning_rate": 1.048582995951417e-05,
"loss": 0.1905,
"step": 12220
},
{
"epoch": 0.95,
"learning_rate": 1.0478044222983494e-05,
"loss": 0.2523,
"step": 12230
},
{
"epoch": 0.95,
"learning_rate": 1.047025848645282e-05,
"loss": 0.1813,
"step": 12240
},
{
"epoch": 0.95,
"learning_rate": 1.0462472749922143e-05,
"loss": 0.1759,
"step": 12250
},
{
"epoch": 0.95,
"learning_rate": 1.0454687013391467e-05,
"loss": 0.1958,
"step": 12260
},
{
"epoch": 0.96,
"learning_rate": 1.0446901276860792e-05,
"loss": 0.143,
"step": 12270
},
{
"epoch": 0.96,
"learning_rate": 1.0439115540330116e-05,
"loss": 0.2086,
"step": 12280
},
{
"epoch": 0.96,
"learning_rate": 1.043132980379944e-05,
"loss": 0.2147,
"step": 12290
},
{
"epoch": 0.96,
"learning_rate": 1.0423544067268765e-05,
"loss": 0.2334,
"step": 12300
},
{
"epoch": 0.96,
"learning_rate": 1.0415758330738088e-05,
"loss": 0.1718,
"step": 12310
},
{
"epoch": 0.96,
"learning_rate": 1.0407972594207413e-05,
"loss": 0.2122,
"step": 12320
},
{
"epoch": 0.96,
"learning_rate": 1.0400186857676737e-05,
"loss": 0.2273,
"step": 12330
},
{
"epoch": 0.96,
"learning_rate": 1.039240112114606e-05,
"loss": 0.1885,
"step": 12340
},
{
"epoch": 0.96,
"learning_rate": 1.0384615384615386e-05,
"loss": 0.217,
"step": 12350
},
{
"epoch": 0.96,
"learning_rate": 1.037682964808471e-05,
"loss": 0.2304,
"step": 12360
},
{
"epoch": 0.96,
"learning_rate": 1.0369043911554033e-05,
"loss": 0.178,
"step": 12370
},
{
"epoch": 0.96,
"learning_rate": 1.0361258175023358e-05,
"loss": 0.1575,
"step": 12380
},
{
"epoch": 0.96,
"learning_rate": 1.0353472438492682e-05,
"loss": 0.227,
"step": 12390
},
{
"epoch": 0.97,
"learning_rate": 1.0345686701962006e-05,
"loss": 0.2079,
"step": 12400
},
{
"epoch": 0.97,
"learning_rate": 1.0337900965431331e-05,
"loss": 0.1606,
"step": 12410
},
{
"epoch": 0.97,
"learning_rate": 1.0330115228900655e-05,
"loss": 0.2617,
"step": 12420
},
{
"epoch": 0.97,
"learning_rate": 1.0322329492369978e-05,
"loss": 0.1823,
"step": 12430
},
{
"epoch": 0.97,
"learning_rate": 1.0314543755839303e-05,
"loss": 0.1926,
"step": 12440
},
{
"epoch": 0.97,
"learning_rate": 1.0306758019308627e-05,
"loss": 0.183,
"step": 12450
},
{
"epoch": 0.97,
"learning_rate": 1.0298972282777952e-05,
"loss": 0.1285,
"step": 12460
},
{
"epoch": 0.97,
"learning_rate": 1.0291186546247276e-05,
"loss": 0.1676,
"step": 12470
},
{
"epoch": 0.97,
"learning_rate": 1.02834008097166e-05,
"loss": 0.2193,
"step": 12480
},
{
"epoch": 0.97,
"learning_rate": 1.0275615073185925e-05,
"loss": 0.1855,
"step": 12490
},
{
"epoch": 0.97,
"learning_rate": 1.0267829336655248e-05,
"loss": 0.1543,
"step": 12500
},
{
"epoch": 0.97,
"learning_rate": 1.0260043600124572e-05,
"loss": 0.1837,
"step": 12510
},
{
"epoch": 0.97,
"learning_rate": 1.0252257863593897e-05,
"loss": 0.2156,
"step": 12520
},
{
"epoch": 0.98,
"learning_rate": 1.0244472127063221e-05,
"loss": 0.2071,
"step": 12530
},
{
"epoch": 0.98,
"learning_rate": 1.0236686390532545e-05,
"loss": 0.2326,
"step": 12540
},
{
"epoch": 0.98,
"learning_rate": 1.022890065400187e-05,
"loss": 0.1803,
"step": 12550
},
{
"epoch": 0.98,
"learning_rate": 1.0221114917471193e-05,
"loss": 0.206,
"step": 12560
},
{
"epoch": 0.98,
"learning_rate": 1.0213329180940519e-05,
"loss": 0.2131,
"step": 12570
},
{
"epoch": 0.98,
"learning_rate": 1.0205543444409842e-05,
"loss": 0.2522,
"step": 12580
},
{
"epoch": 0.98,
"learning_rate": 1.0197757707879166e-05,
"loss": 0.1994,
"step": 12590
},
{
"epoch": 0.98,
"learning_rate": 1.0189971971348491e-05,
"loss": 0.1764,
"step": 12600
},
{
"epoch": 0.98,
"learning_rate": 1.0182186234817815e-05,
"loss": 0.1961,
"step": 12610
},
{
"epoch": 0.98,
"learning_rate": 1.0174400498287139e-05,
"loss": 0.2062,
"step": 12620
},
{
"epoch": 0.98,
"learning_rate": 1.0166614761756464e-05,
"loss": 0.1866,
"step": 12630
},
{
"epoch": 0.98,
"learning_rate": 1.0158829025225787e-05,
"loss": 0.2196,
"step": 12640
},
{
"epoch": 0.98,
"learning_rate": 1.0151043288695111e-05,
"loss": 0.1873,
"step": 12650
},
{
"epoch": 0.99,
"learning_rate": 1.0143257552164436e-05,
"loss": 0.1491,
"step": 12660
},
{
"epoch": 0.99,
"learning_rate": 1.013547181563376e-05,
"loss": 0.2145,
"step": 12670
},
{
"epoch": 0.99,
"learning_rate": 1.0127686079103084e-05,
"loss": 0.2338,
"step": 12680
},
{
"epoch": 0.99,
"learning_rate": 1.0119900342572409e-05,
"loss": 0.1597,
"step": 12690
},
{
"epoch": 0.99,
"learning_rate": 1.0112114606041732e-05,
"loss": 0.1908,
"step": 12700
},
{
"epoch": 0.99,
"learning_rate": 1.0104328869511058e-05,
"loss": 0.1368,
"step": 12710
},
{
"epoch": 0.99,
"learning_rate": 1.0096543132980381e-05,
"loss": 0.2238,
"step": 12720
},
{
"epoch": 0.99,
"learning_rate": 1.0088757396449705e-05,
"loss": 0.1706,
"step": 12730
},
{
"epoch": 0.99,
"learning_rate": 1.008097165991903e-05,
"loss": 0.1616,
"step": 12740
},
{
"epoch": 0.99,
"learning_rate": 1.0073185923388354e-05,
"loss": 0.1904,
"step": 12750
},
{
"epoch": 0.99,
"learning_rate": 1.0065400186857677e-05,
"loss": 0.2075,
"step": 12760
},
{
"epoch": 0.99,
"learning_rate": 1.0057614450327003e-05,
"loss": 0.1753,
"step": 12770
},
{
"epoch": 1.0,
"learning_rate": 1.0049828713796326e-05,
"loss": 0.2173,
"step": 12780
},
{
"epoch": 1.0,
"learning_rate": 1.004204297726565e-05,
"loss": 0.1809,
"step": 12790
},
{
"epoch": 1.0,
"learning_rate": 1.0034257240734975e-05,
"loss": 0.1903,
"step": 12800
},
{
"epoch": 1.0,
"learning_rate": 1.0026471504204299e-05,
"loss": 0.1887,
"step": 12810
},
{
"epoch": 1.0,
"learning_rate": 1.0018685767673624e-05,
"loss": 0.203,
"step": 12820
},
{
"epoch": 1.0,
"learning_rate": 1.0010900031142948e-05,
"loss": 0.2308,
"step": 12830
},
{
"epoch": 1.0,
"learning_rate": 1.0003114294612271e-05,
"loss": 0.1726,
"step": 12840
},
{
"epoch": 1.0,
"learning_rate": 9.995328558081595e-06,
"loss": 0.1402,
"step": 12850
},
{
"epoch": 1.0,
"learning_rate": 9.98754282155092e-06,
"loss": 0.0868,
"step": 12860
},
{
"epoch": 1.0,
"learning_rate": 9.979757085020244e-06,
"loss": 0.1362,
"step": 12870
},
{
"epoch": 1.0,
"learning_rate": 9.971971348489567e-06,
"loss": 0.0793,
"step": 12880
},
{
"epoch": 1.0,
"learning_rate": 9.964185611958893e-06,
"loss": 0.1042,
"step": 12890
},
{
"epoch": 1.0,
"learning_rate": 9.956399875428216e-06,
"loss": 0.0904,
"step": 12900
},
{
"epoch": 1.01,
"learning_rate": 9.948614138897542e-06,
"loss": 0.1634,
"step": 12910
},
{
"epoch": 1.01,
"learning_rate": 9.940828402366864e-06,
"loss": 0.1025,
"step": 12920
},
{
"epoch": 1.01,
"learning_rate": 9.933042665836189e-06,
"loss": 0.1222,
"step": 12930
},
{
"epoch": 1.01,
"learning_rate": 9.925256929305514e-06,
"loss": 0.1175,
"step": 12940
},
{
"epoch": 1.01,
"learning_rate": 9.917471192774836e-06,
"loss": 0.084,
"step": 12950
},
{
"epoch": 1.01,
"learning_rate": 9.909685456244161e-06,
"loss": 0.0964,
"step": 12960
},
{
"epoch": 1.01,
"learning_rate": 9.901899719713485e-06,
"loss": 0.0858,
"step": 12970
},
{
"epoch": 1.01,
"learning_rate": 9.89411398318281e-06,
"loss": 0.117,
"step": 12980
},
{
"epoch": 1.01,
"learning_rate": 9.886328246652134e-06,
"loss": 0.089,
"step": 12990
},
{
"epoch": 1.01,
"learning_rate": 9.878542510121458e-06,
"loss": 0.0906,
"step": 13000
},
{
"epoch": 1.01,
"learning_rate": 9.870756773590783e-06,
"loss": 0.0955,
"step": 13010
},
{
"epoch": 1.01,
"learning_rate": 9.862971037060106e-06,
"loss": 0.0976,
"step": 13020
},
{
"epoch": 1.01,
"learning_rate": 9.85518530052943e-06,
"loss": 0.1087,
"step": 13030
},
{
"epoch": 1.02,
"learning_rate": 9.847399563998755e-06,
"loss": 0.1139,
"step": 13040
},
{
"epoch": 1.02,
"learning_rate": 9.839613827468079e-06,
"loss": 0.1174,
"step": 13050
},
{
"epoch": 1.02,
"learning_rate": 9.831828090937403e-06,
"loss": 0.1103,
"step": 13060
},
{
"epoch": 1.02,
"learning_rate": 9.824042354406728e-06,
"loss": 0.148,
"step": 13070
},
{
"epoch": 1.02,
"learning_rate": 9.816256617876051e-06,
"loss": 0.0997,
"step": 13080
},
{
"epoch": 1.02,
"learning_rate": 9.808470881345377e-06,
"loss": 0.0874,
"step": 13090
},
{
"epoch": 1.02,
"learning_rate": 9.8006851448147e-06,
"loss": 0.1347,
"step": 13100
},
{
"epoch": 1.02,
"learning_rate": 9.792899408284024e-06,
"loss": 0.1064,
"step": 13110
},
{
"epoch": 1.02,
"learning_rate": 9.78511367175335e-06,
"loss": 0.09,
"step": 13120
},
{
"epoch": 1.02,
"learning_rate": 9.777327935222673e-06,
"loss": 0.1006,
"step": 13130
},
{
"epoch": 1.02,
"learning_rate": 9.769542198691996e-06,
"loss": 0.0901,
"step": 13140
},
{
"epoch": 1.02,
"learning_rate": 9.761756462161322e-06,
"loss": 0.0818,
"step": 13150
},
{
"epoch": 1.02,
"learning_rate": 9.753970725630645e-06,
"loss": 0.1102,
"step": 13160
},
{
"epoch": 1.03,
"learning_rate": 9.746184989099969e-06,
"loss": 0.1036,
"step": 13170
},
{
"epoch": 1.03,
"learning_rate": 9.738399252569294e-06,
"loss": 0.0712,
"step": 13180
},
{
"epoch": 1.03,
"learning_rate": 9.730613516038618e-06,
"loss": 0.1214,
"step": 13190
},
{
"epoch": 1.03,
"learning_rate": 9.722827779507941e-06,
"loss": 0.0889,
"step": 13200
},
{
"epoch": 1.03,
"learning_rate": 9.715042042977267e-06,
"loss": 0.1578,
"step": 13210
},
{
"epoch": 1.03,
"learning_rate": 9.70725630644659e-06,
"loss": 0.1033,
"step": 13220
},
{
"epoch": 1.03,
"learning_rate": 9.699470569915916e-06,
"loss": 0.0833,
"step": 13230
},
{
"epoch": 1.03,
"learning_rate": 9.69168483338524e-06,
"loss": 0.0876,
"step": 13240
},
{
"epoch": 1.03,
"learning_rate": 9.683899096854563e-06,
"loss": 0.0916,
"step": 13250
},
{
"epoch": 1.03,
"learning_rate": 9.676113360323888e-06,
"loss": 0.0962,
"step": 13260
},
{
"epoch": 1.03,
"learning_rate": 9.668327623793212e-06,
"loss": 0.075,
"step": 13270
},
{
"epoch": 1.03,
"learning_rate": 9.660541887262535e-06,
"loss": 0.1243,
"step": 13280
},
{
"epoch": 1.03,
"learning_rate": 9.65275615073186e-06,
"loss": 0.1184,
"step": 13290
},
{
"epoch": 1.04,
"learning_rate": 9.644970414201184e-06,
"loss": 0.0915,
"step": 13300
},
{
"epoch": 1.04,
"learning_rate": 9.637184677670508e-06,
"loss": 0.0818,
"step": 13310
},
{
"epoch": 1.04,
"learning_rate": 9.629398941139833e-06,
"loss": 0.0907,
"step": 13320
},
{
"epoch": 1.04,
"learning_rate": 9.621613204609157e-06,
"loss": 0.1174,
"step": 13330
},
{
"epoch": 1.04,
"learning_rate": 9.613827468078482e-06,
"loss": 0.0875,
"step": 13340
},
{
"epoch": 1.04,
"learning_rate": 9.606041731547806e-06,
"loss": 0.1012,
"step": 13350
},
{
"epoch": 1.04,
"learning_rate": 9.59825599501713e-06,
"loss": 0.1012,
"step": 13360
},
{
"epoch": 1.04,
"learning_rate": 9.590470258486455e-06,
"loss": 0.1122,
"step": 13370
},
{
"epoch": 1.04,
"learning_rate": 9.582684521955777e-06,
"loss": 0.1086,
"step": 13380
},
{
"epoch": 1.04,
"learning_rate": 9.574898785425102e-06,
"loss": 0.1006,
"step": 13390
},
{
"epoch": 1.04,
"learning_rate": 9.567113048894427e-06,
"loss": 0.0776,
"step": 13400
},
{
"epoch": 1.04,
"learning_rate": 9.55932731236375e-06,
"loss": 0.0899,
"step": 13410
},
{
"epoch": 1.04,
"learning_rate": 9.551541575833074e-06,
"loss": 0.1003,
"step": 13420
},
{
"epoch": 1.05,
"learning_rate": 9.543755839302398e-06,
"loss": 0.0925,
"step": 13430
},
{
"epoch": 1.05,
"learning_rate": 9.535970102771723e-06,
"loss": 0.1014,
"step": 13440
},
{
"epoch": 1.05,
"learning_rate": 9.528184366241047e-06,
"loss": 0.098,
"step": 13450
},
{
"epoch": 1.05,
"learning_rate": 9.52039862971037e-06,
"loss": 0.1273,
"step": 13460
},
{
"epoch": 1.05,
"learning_rate": 9.512612893179696e-06,
"loss": 0.101,
"step": 13470
},
{
"epoch": 1.05,
"learning_rate": 9.50482715664902e-06,
"loss": 0.0918,
"step": 13480
},
{
"epoch": 1.05,
"learning_rate": 9.497041420118343e-06,
"loss": 0.1491,
"step": 13490
},
{
"epoch": 1.05,
"learning_rate": 9.489255683587668e-06,
"loss": 0.096,
"step": 13500
},
{
"epoch": 1.05,
"learning_rate": 9.481469947056992e-06,
"loss": 0.1107,
"step": 13510
},
{
"epoch": 1.05,
"learning_rate": 9.473684210526315e-06,
"loss": 0.0973,
"step": 13520
},
{
"epoch": 1.05,
"learning_rate": 9.46589847399564e-06,
"loss": 0.1056,
"step": 13530
},
{
"epoch": 1.05,
"learning_rate": 9.458112737464964e-06,
"loss": 0.1128,
"step": 13540
},
{
"epoch": 1.05,
"learning_rate": 9.45032700093429e-06,
"loss": 0.1049,
"step": 13550
},
{
"epoch": 1.06,
"learning_rate": 9.442541264403613e-06,
"loss": 0.1248,
"step": 13560
},
{
"epoch": 1.06,
"learning_rate": 9.434755527872937e-06,
"loss": 0.1062,
"step": 13570
},
{
"epoch": 1.06,
"learning_rate": 9.426969791342262e-06,
"loss": 0.1301,
"step": 13580
},
{
"epoch": 1.06,
"learning_rate": 9.419184054811586e-06,
"loss": 0.0943,
"step": 13590
},
{
"epoch": 1.06,
"learning_rate": 9.41139831828091e-06,
"loss": 0.099,
"step": 13600
},
{
"epoch": 1.06,
"learning_rate": 9.403612581750235e-06,
"loss": 0.0961,
"step": 13610
},
{
"epoch": 1.06,
"learning_rate": 9.395826845219558e-06,
"loss": 0.0944,
"step": 13620
},
{
"epoch": 1.06,
"learning_rate": 9.388041108688882e-06,
"loss": 0.0837,
"step": 13630
},
{
"epoch": 1.06,
"learning_rate": 9.380255372158207e-06,
"loss": 0.117,
"step": 13640
},
{
"epoch": 1.06,
"learning_rate": 9.37246963562753e-06,
"loss": 0.1119,
"step": 13650
},
{
"epoch": 1.06,
"learning_rate": 9.364683899096856e-06,
"loss": 0.1023,
"step": 13660
},
{
"epoch": 1.06,
"learning_rate": 9.35689816256618e-06,
"loss": 0.1201,
"step": 13670
},
{
"epoch": 1.07,
"learning_rate": 9.349112426035503e-06,
"loss": 0.0881,
"step": 13680
},
{
"epoch": 1.07,
"learning_rate": 9.341326689504829e-06,
"loss": 0.1116,
"step": 13690
},
{
"epoch": 1.07,
"learning_rate": 9.333540952974152e-06,
"loss": 0.1148,
"step": 13700
},
{
"epoch": 1.07,
"learning_rate": 9.325755216443476e-06,
"loss": 0.1727,
"step": 13710
},
{
"epoch": 1.07,
"learning_rate": 9.317969479912801e-06,
"loss": 0.0856,
"step": 13720
},
{
"epoch": 1.07,
"learning_rate": 9.310183743382125e-06,
"loss": 0.109,
"step": 13730
},
{
"epoch": 1.07,
"learning_rate": 9.302398006851448e-06,
"loss": 0.1113,
"step": 13740
},
{
"epoch": 1.07,
"learning_rate": 9.294612270320774e-06,
"loss": 0.1035,
"step": 13750
},
{
"epoch": 1.07,
"learning_rate": 9.286826533790097e-06,
"loss": 0.1176,
"step": 13760
},
{
"epoch": 1.07,
"learning_rate": 9.279040797259421e-06,
"loss": 0.1036,
"step": 13770
},
{
"epoch": 1.07,
"learning_rate": 9.271255060728746e-06,
"loss": 0.0757,
"step": 13780
},
{
"epoch": 1.07,
"learning_rate": 9.26346932419807e-06,
"loss": 0.0792,
"step": 13790
},
{
"epoch": 1.07,
"learning_rate": 9.255683587667395e-06,
"loss": 0.0927,
"step": 13800
},
{
"epoch": 1.08,
"learning_rate": 9.247897851136719e-06,
"loss": 0.0981,
"step": 13810
},
{
"epoch": 1.08,
"learning_rate": 9.240112114606042e-06,
"loss": 0.0994,
"step": 13820
},
{
"epoch": 1.08,
"learning_rate": 9.232326378075368e-06,
"loss": 0.1099,
"step": 13830
},
{
"epoch": 1.08,
"learning_rate": 9.224540641544691e-06,
"loss": 0.0999,
"step": 13840
},
{
"epoch": 1.08,
"learning_rate": 9.216754905014015e-06,
"loss": 0.0755,
"step": 13850
},
{
"epoch": 1.08,
"learning_rate": 9.20896916848334e-06,
"loss": 0.1006,
"step": 13860
},
{
"epoch": 1.08,
"learning_rate": 9.201183431952664e-06,
"loss": 0.0706,
"step": 13870
},
{
"epoch": 1.08,
"learning_rate": 9.193397695421987e-06,
"loss": 0.0903,
"step": 13880
},
{
"epoch": 1.08,
"learning_rate": 9.185611958891311e-06,
"loss": 0.1197,
"step": 13890
},
{
"epoch": 1.08,
"learning_rate": 9.177826222360636e-06,
"loss": 0.1052,
"step": 13900
},
{
"epoch": 1.08,
"learning_rate": 9.170040485829962e-06,
"loss": 0.0887,
"step": 13910
},
{
"epoch": 1.08,
"learning_rate": 9.162254749299283e-06,
"loss": 0.1081,
"step": 13920
},
{
"epoch": 1.08,
"learning_rate": 9.154469012768609e-06,
"loss": 0.1074,
"step": 13930
},
{
"epoch": 1.09,
"learning_rate": 9.146683276237934e-06,
"loss": 0.072,
"step": 13940
},
{
"epoch": 1.09,
"learning_rate": 9.138897539707256e-06,
"loss": 0.1217,
"step": 13950
},
{
"epoch": 1.09,
"learning_rate": 9.131111803176581e-06,
"loss": 0.0872,
"step": 13960
},
{
"epoch": 1.09,
"learning_rate": 9.123326066645905e-06,
"loss": 0.0789,
"step": 13970
},
{
"epoch": 1.09,
"learning_rate": 9.11554033011523e-06,
"loss": 0.0576,
"step": 13980
},
{
"epoch": 1.09,
"learning_rate": 9.107754593584554e-06,
"loss": 0.0967,
"step": 13990
},
{
"epoch": 1.09,
"learning_rate": 9.099968857053877e-06,
"loss": 0.0982,
"step": 14000
},
{
"epoch": 1.09,
"learning_rate": 9.092183120523203e-06,
"loss": 0.1194,
"step": 14010
},
{
"epoch": 1.09,
"learning_rate": 9.084397383992526e-06,
"loss": 0.0902,
"step": 14020
},
{
"epoch": 1.09,
"learning_rate": 9.07661164746185e-06,
"loss": 0.0964,
"step": 14030
},
{
"epoch": 1.09,
"learning_rate": 9.068825910931175e-06,
"loss": 0.1205,
"step": 14040
},
{
"epoch": 1.09,
"learning_rate": 9.061040174400499e-06,
"loss": 0.1087,
"step": 14050
},
{
"epoch": 1.09,
"learning_rate": 9.053254437869822e-06,
"loss": 0.1053,
"step": 14060
},
{
"epoch": 1.1,
"learning_rate": 9.045468701339148e-06,
"loss": 0.1332,
"step": 14070
},
{
"epoch": 1.1,
"learning_rate": 9.037682964808471e-06,
"loss": 0.1147,
"step": 14080
},
{
"epoch": 1.1,
"learning_rate": 9.029897228277797e-06,
"loss": 0.1198,
"step": 14090
},
{
"epoch": 1.1,
"learning_rate": 9.02211149174712e-06,
"loss": 0.0964,
"step": 14100
},
{
"epoch": 1.1,
"learning_rate": 9.014325755216444e-06,
"loss": 0.0887,
"step": 14110
},
{
"epoch": 1.1,
"learning_rate": 9.006540018685769e-06,
"loss": 0.0665,
"step": 14120
},
{
"epoch": 1.1,
"learning_rate": 8.998754282155093e-06,
"loss": 0.1118,
"step": 14130
},
{
"epoch": 1.1,
"learning_rate": 8.990968545624416e-06,
"loss": 0.1073,
"step": 14140
},
{
"epoch": 1.1,
"learning_rate": 8.983182809093742e-06,
"loss": 0.1199,
"step": 14150
},
{
"epoch": 1.1,
"learning_rate": 8.975397072563065e-06,
"loss": 0.1028,
"step": 14160
},
{
"epoch": 1.1,
"learning_rate": 8.967611336032389e-06,
"loss": 0.0827,
"step": 14170
},
{
"epoch": 1.1,
"learning_rate": 8.959825599501714e-06,
"loss": 0.0956,
"step": 14180
},
{
"epoch": 1.1,
"learning_rate": 8.952039862971038e-06,
"loss": 0.0765,
"step": 14190
},
{
"epoch": 1.11,
"learning_rate": 8.944254126440361e-06,
"loss": 0.0938,
"step": 14200
},
{
"epoch": 1.11,
"learning_rate": 8.936468389909687e-06,
"loss": 0.0877,
"step": 14210
},
{
"epoch": 1.11,
"learning_rate": 8.92868265337901e-06,
"loss": 0.0644,
"step": 14220
},
{
"epoch": 1.11,
"learning_rate": 8.920896916848336e-06,
"loss": 0.1061,
"step": 14230
},
{
"epoch": 1.11,
"learning_rate": 8.913111180317659e-06,
"loss": 0.0974,
"step": 14240
},
{
"epoch": 1.11,
"learning_rate": 8.905325443786983e-06,
"loss": 0.096,
"step": 14250
},
{
"epoch": 1.11,
"learning_rate": 8.897539707256308e-06,
"loss": 0.0791,
"step": 14260
},
{
"epoch": 1.11,
"learning_rate": 8.889753970725632e-06,
"loss": 0.0857,
"step": 14270
},
{
"epoch": 1.11,
"learning_rate": 8.881968234194955e-06,
"loss": 0.1289,
"step": 14280
},
{
"epoch": 1.11,
"learning_rate": 8.87418249766428e-06,
"loss": 0.0815,
"step": 14290
},
{
"epoch": 1.11,
"learning_rate": 8.866396761133604e-06,
"loss": 0.0872,
"step": 14300
},
{
"epoch": 1.11,
"learning_rate": 8.858611024602928e-06,
"loss": 0.1081,
"step": 14310
},
{
"epoch": 1.11,
"learning_rate": 8.850825288072253e-06,
"loss": 0.0825,
"step": 14320
},
{
"epoch": 1.12,
"learning_rate": 8.843039551541577e-06,
"loss": 0.0949,
"step": 14330
},
{
"epoch": 1.12,
"learning_rate": 8.835253815010902e-06,
"loss": 0.1213,
"step": 14340
},
{
"epoch": 1.12,
"learning_rate": 8.827468078480224e-06,
"loss": 0.0725,
"step": 14350
},
{
"epoch": 1.12,
"learning_rate": 8.81968234194955e-06,
"loss": 0.0984,
"step": 14360
},
{
"epoch": 1.12,
"learning_rate": 8.811896605418874e-06,
"loss": 0.1204,
"step": 14370
},
{
"epoch": 1.12,
"learning_rate": 8.804110868888196e-06,
"loss": 0.1016,
"step": 14380
},
{
"epoch": 1.12,
"learning_rate": 8.796325132357522e-06,
"loss": 0.0758,
"step": 14390
},
{
"epoch": 1.12,
"learning_rate": 8.788539395826847e-06,
"loss": 0.0979,
"step": 14400
},
{
"epoch": 1.12,
"learning_rate": 8.78075365929617e-06,
"loss": 0.0934,
"step": 14410
},
{
"epoch": 1.12,
"learning_rate": 8.772967922765494e-06,
"loss": 0.1046,
"step": 14420
},
{
"epoch": 1.12,
"learning_rate": 8.765182186234818e-06,
"loss": 0.0962,
"step": 14430
},
{
"epoch": 1.12,
"learning_rate": 8.757396449704143e-06,
"loss": 0.1194,
"step": 14440
},
{
"epoch": 1.13,
"learning_rate": 8.749610713173467e-06,
"loss": 0.1216,
"step": 14450
},
{
"epoch": 1.13,
"learning_rate": 8.74182497664279e-06,
"loss": 0.1329,
"step": 14460
},
{
"epoch": 1.13,
"learning_rate": 8.734039240112116e-06,
"loss": 0.0969,
"step": 14470
},
{
"epoch": 1.13,
"learning_rate": 8.72625350358144e-06,
"loss": 0.0835,
"step": 14480
},
{
"epoch": 1.13,
"learning_rate": 8.718467767050763e-06,
"loss": 0.1152,
"step": 14490
},
{
"epoch": 1.13,
"learning_rate": 8.710682030520088e-06,
"loss": 0.0784,
"step": 14500
},
{
"epoch": 1.13,
"learning_rate": 8.702896293989412e-06,
"loss": 0.1241,
"step": 14510
},
{
"epoch": 1.13,
"learning_rate": 8.695110557458735e-06,
"loss": 0.1151,
"step": 14520
},
{
"epoch": 1.13,
"learning_rate": 8.68732482092806e-06,
"loss": 0.0946,
"step": 14530
},
{
"epoch": 1.13,
"learning_rate": 8.679539084397384e-06,
"loss": 0.1031,
"step": 14540
},
{
"epoch": 1.13,
"learning_rate": 8.67175334786671e-06,
"loss": 0.1101,
"step": 14550
},
{
"epoch": 1.13,
"learning_rate": 8.663967611336033e-06,
"loss": 0.0964,
"step": 14560
},
{
"epoch": 1.13,
"learning_rate": 8.656181874805357e-06,
"loss": 0.106,
"step": 14570
},
{
"epoch": 1.14,
"learning_rate": 8.648396138274682e-06,
"loss": 0.0775,
"step": 14580
},
{
"epoch": 1.14,
"learning_rate": 8.640610401744006e-06,
"loss": 0.0869,
"step": 14590
},
{
"epoch": 1.14,
"learning_rate": 8.63282466521333e-06,
"loss": 0.096,
"step": 14600
},
{
"epoch": 1.14,
"learning_rate": 8.625038928682655e-06,
"loss": 0.0728,
"step": 14610
},
{
"epoch": 1.14,
"learning_rate": 8.617253192151978e-06,
"loss": 0.0959,
"step": 14620
},
{
"epoch": 1.14,
"learning_rate": 8.609467455621302e-06,
"loss": 0.0824,
"step": 14630
},
{
"epoch": 1.14,
"learning_rate": 8.601681719090627e-06,
"loss": 0.0838,
"step": 14640
},
{
"epoch": 1.14,
"learning_rate": 8.59389598255995e-06,
"loss": 0.1209,
"step": 14650
},
{
"epoch": 1.14,
"learning_rate": 8.586110246029276e-06,
"loss": 0.1263,
"step": 14660
},
{
"epoch": 1.14,
"learning_rate": 8.5783245094986e-06,
"loss": 0.0975,
"step": 14670
},
{
"epoch": 1.14,
"learning_rate": 8.570538772967923e-06,
"loss": 0.0879,
"step": 14680
},
{
"epoch": 1.14,
"learning_rate": 8.562753036437248e-06,
"loss": 0.0806,
"step": 14690
},
{
"epoch": 1.14,
"learning_rate": 8.554967299906572e-06,
"loss": 0.0818,
"step": 14700
},
{
"epoch": 1.15,
"learning_rate": 8.547181563375896e-06,
"loss": 0.1075,
"step": 14710
},
{
"epoch": 1.15,
"learning_rate": 8.539395826845221e-06,
"loss": 0.1059,
"step": 14720
},
{
"epoch": 1.15,
"learning_rate": 8.531610090314545e-06,
"loss": 0.0742,
"step": 14730
},
{
"epoch": 1.15,
"learning_rate": 8.523824353783868e-06,
"loss": 0.1018,
"step": 14740
},
{
"epoch": 1.15,
"learning_rate": 8.516038617253193e-06,
"loss": 0.1284,
"step": 14750
},
{
"epoch": 1.15,
"learning_rate": 8.508252880722517e-06,
"loss": 0.0703,
"step": 14760
},
{
"epoch": 1.15,
"learning_rate": 8.50046714419184e-06,
"loss": 0.1002,
"step": 14770
},
{
"epoch": 1.15,
"learning_rate": 8.492681407661166e-06,
"loss": 0.0786,
"step": 14780
},
{
"epoch": 1.15,
"learning_rate": 8.48489567113049e-06,
"loss": 0.0997,
"step": 14790
},
{
"epoch": 1.15,
"learning_rate": 8.477109934599815e-06,
"loss": 0.0951,
"step": 14800
},
{
"epoch": 1.15,
"learning_rate": 8.469324198069137e-06,
"loss": 0.0839,
"step": 14810
},
{
"epoch": 1.15,
"learning_rate": 8.461538461538462e-06,
"loss": 0.0955,
"step": 14820
},
{
"epoch": 1.15,
"learning_rate": 8.453752725007787e-06,
"loss": 0.11,
"step": 14830
},
{
"epoch": 1.16,
"learning_rate": 8.445966988477111e-06,
"loss": 0.137,
"step": 14840
},
{
"epoch": 1.16,
"learning_rate": 8.438181251946435e-06,
"loss": 0.0971,
"step": 14850
},
{
"epoch": 1.16,
"learning_rate": 8.43039551541576e-06,
"loss": 0.0825,
"step": 14860
},
{
"epoch": 1.16,
"learning_rate": 8.422609778885084e-06,
"loss": 0.0767,
"step": 14870
},
{
"epoch": 1.16,
"learning_rate": 8.414824042354407e-06,
"loss": 0.0661,
"step": 14880
},
{
"epoch": 1.16,
"learning_rate": 8.40703830582373e-06,
"loss": 0.1102,
"step": 14890
},
{
"epoch": 1.16,
"learning_rate": 8.399252569293056e-06,
"loss": 0.1012,
"step": 14900
},
{
"epoch": 1.16,
"learning_rate": 8.391466832762381e-06,
"loss": 0.1119,
"step": 14910
},
{
"epoch": 1.16,
"learning_rate": 8.383681096231703e-06,
"loss": 0.1084,
"step": 14920
},
{
"epoch": 1.16,
"learning_rate": 8.375895359701029e-06,
"loss": 0.1127,
"step": 14930
},
{
"epoch": 1.16,
"learning_rate": 8.368109623170352e-06,
"loss": 0.0896,
"step": 14940
},
{
"epoch": 1.16,
"learning_rate": 8.360323886639676e-06,
"loss": 0.132,
"step": 14950
},
{
"epoch": 1.16,
"learning_rate": 8.352538150109001e-06,
"loss": 0.1157,
"step": 14960
},
{
"epoch": 1.17,
"learning_rate": 8.344752413578325e-06,
"loss": 0.079,
"step": 14970
},
{
"epoch": 1.17,
"learning_rate": 8.33696667704765e-06,
"loss": 0.0762,
"step": 14980
},
{
"epoch": 1.17,
"learning_rate": 8.329180940516974e-06,
"loss": 0.116,
"step": 14990
},
{
"epoch": 1.17,
"learning_rate": 8.321395203986297e-06,
"loss": 0.1088,
"step": 15000
},
{
"epoch": 1.17,
"learning_rate": 8.313609467455622e-06,
"loss": 0.081,
"step": 15010
},
{
"epoch": 1.17,
"learning_rate": 8.305823730924946e-06,
"loss": 0.1092,
"step": 15020
},
{
"epoch": 1.17,
"learning_rate": 8.29803799439427e-06,
"loss": 0.0923,
"step": 15030
},
{
"epoch": 1.17,
"learning_rate": 8.290252257863595e-06,
"loss": 0.0755,
"step": 15040
},
{
"epoch": 1.17,
"learning_rate": 8.282466521332919e-06,
"loss": 0.0827,
"step": 15050
},
{
"epoch": 1.17,
"learning_rate": 8.274680784802242e-06,
"loss": 0.0851,
"step": 15060
},
{
"epoch": 1.17,
"learning_rate": 8.266895048271567e-06,
"loss": 0.1001,
"step": 15070
},
{
"epoch": 1.17,
"learning_rate": 8.259109311740891e-06,
"loss": 0.0813,
"step": 15080
},
{
"epoch": 1.17,
"learning_rate": 8.251323575210216e-06,
"loss": 0.0875,
"step": 15090
},
{
"epoch": 1.18,
"learning_rate": 8.24353783867954e-06,
"loss": 0.0938,
"step": 15100
},
{
"epoch": 1.18,
"learning_rate": 8.235752102148864e-06,
"loss": 0.0932,
"step": 15110
},
{
"epoch": 1.18,
"learning_rate": 8.227966365618189e-06,
"loss": 0.1039,
"step": 15120
},
{
"epoch": 1.18,
"learning_rate": 8.220180629087512e-06,
"loss": 0.1193,
"step": 15130
},
{
"epoch": 1.18,
"learning_rate": 8.212394892556836e-06,
"loss": 0.0989,
"step": 15140
},
{
"epoch": 1.18,
"learning_rate": 8.204609156026161e-06,
"loss": 0.1035,
"step": 15150
},
{
"epoch": 1.18,
"learning_rate": 8.196823419495485e-06,
"loss": 0.1034,
"step": 15160
},
{
"epoch": 1.18,
"learning_rate": 8.189037682964809e-06,
"loss": 0.0728,
"step": 15170
},
{
"epoch": 1.18,
"learning_rate": 8.181251946434134e-06,
"loss": 0.0792,
"step": 15180
},
{
"epoch": 1.18,
"learning_rate": 8.173466209903458e-06,
"loss": 0.0954,
"step": 15190
},
{
"epoch": 1.18,
"learning_rate": 8.165680473372781e-06,
"loss": 0.0944,
"step": 15200
},
{
"epoch": 1.18,
"learning_rate": 8.157894736842106e-06,
"loss": 0.1044,
"step": 15210
},
{
"epoch": 1.18,
"learning_rate": 8.15010900031143e-06,
"loss": 0.0921,
"step": 15220
},
{
"epoch": 1.19,
"learning_rate": 8.142323263780755e-06,
"loss": 0.107,
"step": 15230
},
{
"epoch": 1.19,
"learning_rate": 8.134537527250079e-06,
"loss": 0.0823,
"step": 15240
},
{
"epoch": 1.19,
"learning_rate": 8.126751790719403e-06,
"loss": 0.0961,
"step": 15250
},
{
"epoch": 1.19,
"learning_rate": 8.118966054188728e-06,
"loss": 0.0944,
"step": 15260
},
{
"epoch": 1.19,
"learning_rate": 8.11118031765805e-06,
"loss": 0.1194,
"step": 15270
},
{
"epoch": 1.19,
"learning_rate": 8.103394581127375e-06,
"loss": 0.0987,
"step": 15280
},
{
"epoch": 1.19,
"learning_rate": 8.0956088445967e-06,
"loss": 0.1077,
"step": 15290
},
{
"epoch": 1.19,
"learning_rate": 8.087823108066024e-06,
"loss": 0.0814,
"step": 15300
},
{
"epoch": 1.19,
"learning_rate": 8.080037371535348e-06,
"loss": 0.0841,
"step": 15310
},
{
"epoch": 1.19,
"learning_rate": 8.072251635004673e-06,
"loss": 0.0788,
"step": 15320
},
{
"epoch": 1.19,
"learning_rate": 8.064465898473996e-06,
"loss": 0.099,
"step": 15330
},
{
"epoch": 1.19,
"learning_rate": 8.056680161943322e-06,
"loss": 0.0757,
"step": 15340
},
{
"epoch": 1.2,
"learning_rate": 8.048894425412644e-06,
"loss": 0.0975,
"step": 15350
},
{
"epoch": 1.2,
"learning_rate": 8.041108688881969e-06,
"loss": 0.1129,
"step": 15360
},
{
"epoch": 1.2,
"learning_rate": 8.033322952351294e-06,
"loss": 0.0953,
"step": 15370
},
{
"epoch": 1.2,
"learning_rate": 8.025537215820616e-06,
"loss": 0.0825,
"step": 15380
},
{
"epoch": 1.2,
"learning_rate": 8.017751479289941e-06,
"loss": 0.1078,
"step": 15390
},
{
"epoch": 1.2,
"learning_rate": 8.009965742759265e-06,
"loss": 0.1187,
"step": 15400
},
{
"epoch": 1.2,
"learning_rate": 8.00218000622859e-06,
"loss": 0.0976,
"step": 15410
},
{
"epoch": 1.2,
"learning_rate": 7.994394269697914e-06,
"loss": 0.0899,
"step": 15420
},
{
"epoch": 1.2,
"learning_rate": 7.986608533167238e-06,
"loss": 0.0967,
"step": 15430
},
{
"epoch": 1.2,
"learning_rate": 7.978822796636563e-06,
"loss": 0.0962,
"step": 15440
},
{
"epoch": 1.2,
"learning_rate": 7.971037060105886e-06,
"loss": 0.0989,
"step": 15450
},
{
"epoch": 1.2,
"learning_rate": 7.96325132357521e-06,
"loss": 0.1049,
"step": 15460
},
{
"epoch": 1.2,
"learning_rate": 7.955465587044535e-06,
"loss": 0.0862,
"step": 15470
},
{
"epoch": 1.21,
"learning_rate": 7.947679850513859e-06,
"loss": 0.0736,
"step": 15480
},
{
"epoch": 1.21,
"learning_rate": 7.939894113983183e-06,
"loss": 0.0788,
"step": 15490
},
{
"epoch": 1.21,
"learning_rate": 7.932108377452508e-06,
"loss": 0.1044,
"step": 15500
},
{
"epoch": 1.21,
"learning_rate": 7.924322640921832e-06,
"loss": 0.0755,
"step": 15510
},
{
"epoch": 1.21,
"learning_rate": 7.916536904391155e-06,
"loss": 0.102,
"step": 15520
},
{
"epoch": 1.21,
"learning_rate": 7.90875116786048e-06,
"loss": 0.0904,
"step": 15530
},
{
"epoch": 1.21,
"learning_rate": 7.900965431329804e-06,
"loss": 0.0756,
"step": 15540
},
{
"epoch": 1.21,
"learning_rate": 7.89317969479913e-06,
"loss": 0.133,
"step": 15550
},
{
"epoch": 1.21,
"learning_rate": 7.885393958268453e-06,
"loss": 0.0754,
"step": 15560
},
{
"epoch": 1.21,
"learning_rate": 7.877608221737777e-06,
"loss": 0.0811,
"step": 15570
},
{
"epoch": 1.21,
"learning_rate": 7.869822485207102e-06,
"loss": 0.0692,
"step": 15580
},
{
"epoch": 1.21,
"learning_rate": 7.862036748676425e-06,
"loss": 0.0918,
"step": 15590
},
{
"epoch": 1.21,
"learning_rate": 7.854251012145749e-06,
"loss": 0.0908,
"step": 15600
},
{
"epoch": 1.22,
"learning_rate": 7.846465275615074e-06,
"loss": 0.1203,
"step": 15610
},
{
"epoch": 1.22,
"learning_rate": 7.838679539084398e-06,
"loss": 0.1036,
"step": 15620
},
{
"epoch": 1.22,
"learning_rate": 7.830893802553722e-06,
"loss": 0.089,
"step": 15630
},
{
"epoch": 1.22,
"learning_rate": 7.823108066023047e-06,
"loss": 0.0854,
"step": 15640
},
{
"epoch": 1.22,
"learning_rate": 7.81532232949237e-06,
"loss": 0.0985,
"step": 15650
},
{
"epoch": 1.22,
"learning_rate": 7.807536592961696e-06,
"loss": 0.1006,
"step": 15660
},
{
"epoch": 1.22,
"learning_rate": 7.79975085643102e-06,
"loss": 0.0798,
"step": 15670
},
{
"epoch": 1.22,
"learning_rate": 7.791965119900343e-06,
"loss": 0.0875,
"step": 15680
},
{
"epoch": 1.22,
"learning_rate": 7.784179383369668e-06,
"loss": 0.0901,
"step": 15690
},
{
"epoch": 1.22,
"learning_rate": 7.776393646838992e-06,
"loss": 0.0961,
"step": 15700
},
{
"epoch": 1.22,
"learning_rate": 7.768607910308315e-06,
"loss": 0.0895,
"step": 15710
},
{
"epoch": 1.22,
"learning_rate": 7.76082217377764e-06,
"loss": 0.1057,
"step": 15720
},
{
"epoch": 1.22,
"learning_rate": 7.753036437246964e-06,
"loss": 0.088,
"step": 15730
},
{
"epoch": 1.23,
"learning_rate": 7.745250700716288e-06,
"loss": 0.0826,
"step": 15740
},
{
"epoch": 1.23,
"learning_rate": 7.737464964185613e-06,
"loss": 0.0609,
"step": 15750
},
{
"epoch": 1.23,
"learning_rate": 7.729679227654937e-06,
"loss": 0.1069,
"step": 15760
},
{
"epoch": 1.23,
"learning_rate": 7.72189349112426e-06,
"loss": 0.0939,
"step": 15770
},
{
"epoch": 1.23,
"learning_rate": 7.714107754593586e-06,
"loss": 0.0867,
"step": 15780
},
{
"epoch": 1.23,
"learning_rate": 7.70632201806291e-06,
"loss": 0.0757,
"step": 15790
},
{
"epoch": 1.23,
"learning_rate": 7.698536281532235e-06,
"loss": 0.1131,
"step": 15800
},
{
"epoch": 1.23,
"learning_rate": 7.690750545001557e-06,
"loss": 0.086,
"step": 15810
},
{
"epoch": 1.23,
"learning_rate": 7.682964808470882e-06,
"loss": 0.0866,
"step": 15820
},
{
"epoch": 1.23,
"learning_rate": 7.675179071940207e-06,
"loss": 0.0794,
"step": 15830
},
{
"epoch": 1.23,
"learning_rate": 7.66739333540953e-06,
"loss": 0.064,
"step": 15840
},
{
"epoch": 1.23,
"learning_rate": 7.659607598878854e-06,
"loss": 0.1027,
"step": 15850
},
{
"epoch": 1.23,
"learning_rate": 7.651821862348178e-06,
"loss": 0.09,
"step": 15860
},
{
"epoch": 1.24,
"learning_rate": 7.644036125817503e-06,
"loss": 0.0792,
"step": 15870
},
{
"epoch": 1.24,
"learning_rate": 7.636250389286827e-06,
"loss": 0.1208,
"step": 15880
},
{
"epoch": 1.24,
"learning_rate": 7.628464652756151e-06,
"loss": 0.0938,
"step": 15890
},
{
"epoch": 1.24,
"learning_rate": 7.620678916225476e-06,
"loss": 0.0949,
"step": 15900
},
{
"epoch": 1.24,
"learning_rate": 7.6128931796948e-06,
"loss": 0.094,
"step": 15910
},
{
"epoch": 1.24,
"learning_rate": 7.605107443164124e-06,
"loss": 0.1024,
"step": 15920
},
{
"epoch": 1.24,
"learning_rate": 7.597321706633448e-06,
"loss": 0.1052,
"step": 15930
},
{
"epoch": 1.24,
"learning_rate": 7.589535970102773e-06,
"loss": 0.1041,
"step": 15940
},
{
"epoch": 1.24,
"learning_rate": 7.581750233572096e-06,
"loss": 0.0907,
"step": 15950
},
{
"epoch": 1.24,
"learning_rate": 7.573964497041421e-06,
"loss": 0.1208,
"step": 15960
},
{
"epoch": 1.24,
"learning_rate": 7.566178760510745e-06,
"loss": 0.0868,
"step": 15970
},
{
"epoch": 1.24,
"learning_rate": 7.55839302398007e-06,
"loss": 0.0865,
"step": 15980
},
{
"epoch": 1.24,
"learning_rate": 7.550607287449393e-06,
"loss": 0.0884,
"step": 15990
},
{
"epoch": 1.25,
"learning_rate": 7.542821550918718e-06,
"loss": 0.066,
"step": 16000
},
{
"epoch": 1.25,
"learning_rate": 7.535035814388042e-06,
"loss": 0.0754,
"step": 16010
},
{
"epoch": 1.25,
"learning_rate": 7.527250077857365e-06,
"loss": 0.0815,
"step": 16020
},
{
"epoch": 1.25,
"learning_rate": 7.5194643413266895e-06,
"loss": 0.0702,
"step": 16030
},
{
"epoch": 1.25,
"learning_rate": 7.511678604796015e-06,
"loss": 0.1028,
"step": 16040
},
{
"epoch": 1.25,
"learning_rate": 7.503892868265339e-06,
"loss": 0.0928,
"step": 16050
},
{
"epoch": 1.25,
"learning_rate": 7.496107131734662e-06,
"loss": 0.1022,
"step": 16060
},
{
"epoch": 1.25,
"learning_rate": 7.4883213952039864e-06,
"loss": 0.0959,
"step": 16070
},
{
"epoch": 1.25,
"learning_rate": 7.480535658673312e-06,
"loss": 0.0732,
"step": 16080
},
{
"epoch": 1.25,
"learning_rate": 7.472749922142636e-06,
"loss": 0.0803,
"step": 16090
},
{
"epoch": 1.25,
"learning_rate": 7.464964185611959e-06,
"loss": 0.0852,
"step": 16100
},
{
"epoch": 1.25,
"learning_rate": 7.457178449081283e-06,
"loss": 0.1012,
"step": 16110
},
{
"epoch": 1.26,
"learning_rate": 7.449392712550608e-06,
"loss": 0.0867,
"step": 16120
},
{
"epoch": 1.26,
"learning_rate": 7.4416069760199315e-06,
"loss": 0.075,
"step": 16130
},
{
"epoch": 1.26,
"learning_rate": 7.433821239489256e-06,
"loss": 0.0927,
"step": 16140
},
{
"epoch": 1.26,
"learning_rate": 7.42603550295858e-06,
"loss": 0.111,
"step": 16150
},
{
"epoch": 1.26,
"learning_rate": 7.418249766427905e-06,
"loss": 0.0784,
"step": 16160
},
{
"epoch": 1.26,
"learning_rate": 7.4104640298972284e-06,
"loss": 0.0985,
"step": 16170
},
{
"epoch": 1.26,
"learning_rate": 7.402678293366553e-06,
"loss": 0.0681,
"step": 16180
},
{
"epoch": 1.26,
"learning_rate": 7.394892556835877e-06,
"loss": 0.1078,
"step": 16190
},
{
"epoch": 1.26,
"learning_rate": 7.387106820305201e-06,
"loss": 0.0726,
"step": 16200
},
{
"epoch": 1.26,
"learning_rate": 7.379321083774525e-06,
"loss": 0.1075,
"step": 16210
},
{
"epoch": 1.26,
"learning_rate": 7.37153534724385e-06,
"loss": 0.0938,
"step": 16220
},
{
"epoch": 1.26,
"learning_rate": 7.363749610713174e-06,
"loss": 0.1013,
"step": 16230
},
{
"epoch": 1.26,
"learning_rate": 7.355963874182498e-06,
"loss": 0.1037,
"step": 16240
},
{
"epoch": 1.27,
"learning_rate": 7.348178137651822e-06,
"loss": 0.0574,
"step": 16250
},
{
"epoch": 1.27,
"learning_rate": 7.340392401121147e-06,
"loss": 0.0783,
"step": 16260
},
{
"epoch": 1.27,
"learning_rate": 7.33260666459047e-06,
"loss": 0.0768,
"step": 16270
},
{
"epoch": 1.27,
"learning_rate": 7.324820928059795e-06,
"loss": 0.0784,
"step": 16280
},
{
"epoch": 1.27,
"learning_rate": 7.317035191529119e-06,
"loss": 0.1078,
"step": 16290
},
{
"epoch": 1.27,
"learning_rate": 7.309249454998444e-06,
"loss": 0.0976,
"step": 16300
},
{
"epoch": 1.27,
"learning_rate": 7.301463718467767e-06,
"loss": 0.0803,
"step": 16310
},
{
"epoch": 1.27,
"learning_rate": 7.293677981937092e-06,
"loss": 0.0937,
"step": 16320
},
{
"epoch": 1.27,
"learning_rate": 7.285892245406416e-06,
"loss": 0.1217,
"step": 16330
},
{
"epoch": 1.27,
"learning_rate": 7.278106508875741e-06,
"loss": 0.0702,
"step": 16340
},
{
"epoch": 1.27,
"learning_rate": 7.270320772345064e-06,
"loss": 0.0911,
"step": 16350
},
{
"epoch": 1.27,
"learning_rate": 7.262535035814389e-06,
"loss": 0.097,
"step": 16360
},
{
"epoch": 1.27,
"learning_rate": 7.254749299283713e-06,
"loss": 0.0741,
"step": 16370
},
{
"epoch": 1.28,
"learning_rate": 7.246963562753037e-06,
"loss": 0.1093,
"step": 16380
},
{
"epoch": 1.28,
"learning_rate": 7.239177826222361e-06,
"loss": 0.0761,
"step": 16390
},
{
"epoch": 1.28,
"learning_rate": 7.231392089691686e-06,
"loss": 0.0818,
"step": 16400
},
{
"epoch": 1.28,
"learning_rate": 7.22360635316101e-06,
"loss": 0.102,
"step": 16410
},
{
"epoch": 1.28,
"learning_rate": 7.215820616630334e-06,
"loss": 0.0706,
"step": 16420
},
{
"epoch": 1.28,
"learning_rate": 7.208034880099658e-06,
"loss": 0.0957,
"step": 16430
},
{
"epoch": 1.28,
"learning_rate": 7.200249143568983e-06,
"loss": 0.1091,
"step": 16440
},
{
"epoch": 1.28,
"learning_rate": 7.192463407038306e-06,
"loss": 0.0743,
"step": 16450
},
{
"epoch": 1.28,
"learning_rate": 7.184677670507631e-06,
"loss": 0.0681,
"step": 16460
},
{
"epoch": 1.28,
"learning_rate": 7.176891933976955e-06,
"loss": 0.0797,
"step": 16470
},
{
"epoch": 1.28,
"learning_rate": 7.16910619744628e-06,
"loss": 0.0789,
"step": 16480
},
{
"epoch": 1.28,
"learning_rate": 7.1613204609156024e-06,
"loss": 0.0784,
"step": 16490
},
{
"epoch": 1.28,
"learning_rate": 7.153534724384928e-06,
"loss": 0.1042,
"step": 16500
},
{
"epoch": 1.29,
"learning_rate": 7.145748987854252e-06,
"loss": 0.0847,
"step": 16510
},
{
"epoch": 1.29,
"learning_rate": 7.137963251323575e-06,
"loss": 0.062,
"step": 16520
},
{
"epoch": 1.29,
"learning_rate": 7.130177514792899e-06,
"loss": 0.1022,
"step": 16530
},
{
"epoch": 1.29,
"learning_rate": 7.122391778262225e-06,
"loss": 0.0677,
"step": 16540
},
{
"epoch": 1.29,
"learning_rate": 7.114606041731549e-06,
"loss": 0.0943,
"step": 16550
},
{
"epoch": 1.29,
"learning_rate": 7.106820305200872e-06,
"loss": 0.0742,
"step": 16560
},
{
"epoch": 1.29,
"learning_rate": 7.099034568670196e-06,
"loss": 0.0905,
"step": 16570
},
{
"epoch": 1.29,
"learning_rate": 7.091248832139521e-06,
"loss": 0.1186,
"step": 16580
},
{
"epoch": 1.29,
"learning_rate": 7.083463095608846e-06,
"loss": 0.0707,
"step": 16590
},
{
"epoch": 1.29,
"learning_rate": 7.075677359078169e-06,
"loss": 0.1026,
"step": 16600
},
{
"epoch": 1.29,
"learning_rate": 7.067891622547493e-06,
"loss": 0.0956,
"step": 16610
},
{
"epoch": 1.29,
"learning_rate": 7.060105886016818e-06,
"loss": 0.0849,
"step": 16620
},
{
"epoch": 1.29,
"learning_rate": 7.052320149486141e-06,
"loss": 0.1157,
"step": 16630
},
{
"epoch": 1.3,
"learning_rate": 7.044534412955466e-06,
"loss": 0.1055,
"step": 16640
},
{
"epoch": 1.3,
"learning_rate": 7.03674867642479e-06,
"loss": 0.0805,
"step": 16650
},
{
"epoch": 1.3,
"learning_rate": 7.028962939894115e-06,
"loss": 0.0785,
"step": 16660
},
{
"epoch": 1.3,
"learning_rate": 7.021177203363438e-06,
"loss": 0.0964,
"step": 16670
},
{
"epoch": 1.3,
"learning_rate": 7.013391466832763e-06,
"loss": 0.0818,
"step": 16680
},
{
"epoch": 1.3,
"learning_rate": 7.005605730302087e-06,
"loss": 0.0897,
"step": 16690
},
{
"epoch": 1.3,
"learning_rate": 6.997819993771411e-06,
"loss": 0.0734,
"step": 16700
},
{
"epoch": 1.3,
"learning_rate": 6.990034257240735e-06,
"loss": 0.0798,
"step": 16710
},
{
"epoch": 1.3,
"learning_rate": 6.98224852071006e-06,
"loss": 0.1029,
"step": 16720
},
{
"epoch": 1.3,
"learning_rate": 6.974462784179384e-06,
"loss": 0.0951,
"step": 16730
},
{
"epoch": 1.3,
"learning_rate": 6.966677047648708e-06,
"loss": 0.0863,
"step": 16740
},
{
"epoch": 1.3,
"learning_rate": 6.958891311118032e-06,
"loss": 0.1014,
"step": 16750
},
{
"epoch": 1.3,
"learning_rate": 6.951105574587357e-06,
"loss": 0.0684,
"step": 16760
},
{
"epoch": 1.31,
"learning_rate": 6.94331983805668e-06,
"loss": 0.0848,
"step": 16770
},
{
"epoch": 1.31,
"learning_rate": 6.935534101526005e-06,
"loss": 0.1228,
"step": 16780
},
{
"epoch": 1.31,
"learning_rate": 6.927748364995329e-06,
"loss": 0.0871,
"step": 16790
},
{
"epoch": 1.31,
"learning_rate": 6.919962628464654e-06,
"loss": 0.0718,
"step": 16800
},
{
"epoch": 1.31,
"learning_rate": 6.912176891933977e-06,
"loss": 0.0939,
"step": 16810
},
{
"epoch": 1.31,
"learning_rate": 6.904391155403302e-06,
"loss": 0.0972,
"step": 16820
},
{
"epoch": 1.31,
"learning_rate": 6.896605418872626e-06,
"loss": 0.0572,
"step": 16830
},
{
"epoch": 1.31,
"learning_rate": 6.888819682341951e-06,
"loss": 0.0753,
"step": 16840
},
{
"epoch": 1.31,
"learning_rate": 6.881033945811274e-06,
"loss": 0.0941,
"step": 16850
},
{
"epoch": 1.31,
"learning_rate": 6.873248209280599e-06,
"loss": 0.1028,
"step": 16860
},
{
"epoch": 1.31,
"learning_rate": 6.865462472749923e-06,
"loss": 0.0869,
"step": 16870
},
{
"epoch": 1.31,
"learning_rate": 6.857676736219247e-06,
"loss": 0.0681,
"step": 16880
},
{
"epoch": 1.32,
"learning_rate": 6.849890999688571e-06,
"loss": 0.0623,
"step": 16890
},
{
"epoch": 1.32,
"learning_rate": 6.842105263157896e-06,
"loss": 0.0851,
"step": 16900
},
{
"epoch": 1.32,
"learning_rate": 6.83431952662722e-06,
"loss": 0.0821,
"step": 16910
},
{
"epoch": 1.32,
"learning_rate": 6.826533790096544e-06,
"loss": 0.0962,
"step": 16920
},
{
"epoch": 1.32,
"learning_rate": 6.818748053565868e-06,
"loss": 0.0576,
"step": 16930
},
{
"epoch": 1.32,
"learning_rate": 6.810962317035193e-06,
"loss": 0.0677,
"step": 16940
},
{
"epoch": 1.32,
"learning_rate": 6.803176580504515e-06,
"loss": 0.0845,
"step": 16950
},
{
"epoch": 1.32,
"learning_rate": 6.795390843973841e-06,
"loss": 0.0849,
"step": 16960
},
{
"epoch": 1.32,
"learning_rate": 6.787605107443165e-06,
"loss": 0.0757,
"step": 16970
},
{
"epoch": 1.32,
"learning_rate": 6.77981937091249e-06,
"loss": 0.0836,
"step": 16980
},
{
"epoch": 1.32,
"learning_rate": 6.772033634381812e-06,
"loss": 0.0912,
"step": 16990
},
{
"epoch": 1.32,
"learning_rate": 6.764247897851138e-06,
"loss": 0.1112,
"step": 17000
},
{
"epoch": 1.32,
"learning_rate": 6.756462161320462e-06,
"loss": 0.0829,
"step": 17010
},
{
"epoch": 1.33,
"learning_rate": 6.748676424789785e-06,
"loss": 0.0699,
"step": 17020
},
{
"epoch": 1.33,
"learning_rate": 6.740890688259109e-06,
"loss": 0.0592,
"step": 17030
},
{
"epoch": 1.33,
"learning_rate": 6.733104951728434e-06,
"loss": 0.091,
"step": 17040
},
{
"epoch": 1.33,
"learning_rate": 6.725319215197759e-06,
"loss": 0.0726,
"step": 17050
},
{
"epoch": 1.33,
"learning_rate": 6.717533478667082e-06,
"loss": 0.0809,
"step": 17060
},
{
"epoch": 1.33,
"learning_rate": 6.709747742136406e-06,
"loss": 0.1029,
"step": 17070
},
{
"epoch": 1.33,
"learning_rate": 6.701962005605731e-06,
"loss": 0.0697,
"step": 17080
},
{
"epoch": 1.33,
"learning_rate": 6.694176269075055e-06,
"loss": 0.1216,
"step": 17090
},
{
"epoch": 1.33,
"learning_rate": 6.686390532544379e-06,
"loss": 0.0974,
"step": 17100
},
{
"epoch": 1.33,
"learning_rate": 6.678604796013703e-06,
"loss": 0.063,
"step": 17110
},
{
"epoch": 1.33,
"learning_rate": 6.670819059483028e-06,
"loss": 0.0926,
"step": 17120
},
{
"epoch": 1.33,
"learning_rate": 6.663033322952351e-06,
"loss": 0.0712,
"step": 17130
},
{
"epoch": 1.33,
"learning_rate": 6.655247586421676e-06,
"loss": 0.076,
"step": 17140
},
{
"epoch": 1.34,
"learning_rate": 6.647461849891e-06,
"loss": 0.0821,
"step": 17150
},
{
"epoch": 1.34,
"learning_rate": 6.639676113360325e-06,
"loss": 0.0927,
"step": 17160
},
{
"epoch": 1.34,
"learning_rate": 6.631890376829648e-06,
"loss": 0.0978,
"step": 17170
},
{
"epoch": 1.34,
"learning_rate": 6.624104640298973e-06,
"loss": 0.0637,
"step": 17180
},
{
"epoch": 1.34,
"learning_rate": 6.616318903768297e-06,
"loss": 0.1203,
"step": 17190
},
{
"epoch": 1.34,
"learning_rate": 6.608533167237621e-06,
"loss": 0.0763,
"step": 17200
},
{
"epoch": 1.34,
"learning_rate": 6.600747430706945e-06,
"loss": 0.0763,
"step": 17210
},
{
"epoch": 1.34,
"learning_rate": 6.59296169417627e-06,
"loss": 0.0827,
"step": 17220
},
{
"epoch": 1.34,
"learning_rate": 6.585175957645594e-06,
"loss": 0.1012,
"step": 17230
},
{
"epoch": 1.34,
"learning_rate": 6.577390221114918e-06,
"loss": 0.0608,
"step": 17240
},
{
"epoch": 1.34,
"learning_rate": 6.569604484584242e-06,
"loss": 0.0909,
"step": 17250
},
{
"epoch": 1.34,
"learning_rate": 6.561818748053567e-06,
"loss": 0.0718,
"step": 17260
},
{
"epoch": 1.34,
"learning_rate": 6.55403301152289e-06,
"loss": 0.0828,
"step": 17270
},
{
"epoch": 1.35,
"learning_rate": 6.546247274992215e-06,
"loss": 0.0703,
"step": 17280
},
{
"epoch": 1.35,
"learning_rate": 6.538461538461539e-06,
"loss": 0.112,
"step": 17290
},
{
"epoch": 1.35,
"learning_rate": 6.530675801930864e-06,
"loss": 0.0572,
"step": 17300
},
{
"epoch": 1.35,
"learning_rate": 6.522890065400187e-06,
"loss": 0.0934,
"step": 17310
},
{
"epoch": 1.35,
"learning_rate": 6.515104328869512e-06,
"loss": 0.0591,
"step": 17320
},
{
"epoch": 1.35,
"learning_rate": 6.507318592338836e-06,
"loss": 0.0934,
"step": 17330
},
{
"epoch": 1.35,
"learning_rate": 6.4995328558081605e-06,
"loss": 0.092,
"step": 17340
},
{
"epoch": 1.35,
"learning_rate": 6.491747119277484e-06,
"loss": 0.0971,
"step": 17350
},
{
"epoch": 1.35,
"learning_rate": 6.483961382746809e-06,
"loss": 0.0846,
"step": 17360
},
{
"epoch": 1.35,
"learning_rate": 6.476175646216133e-06,
"loss": 0.0944,
"step": 17370
},
{
"epoch": 1.35,
"learning_rate": 6.468389909685457e-06,
"loss": 0.0879,
"step": 17380
},
{
"epoch": 1.35,
"learning_rate": 6.460604173154781e-06,
"loss": 0.0787,
"step": 17390
},
{
"epoch": 1.35,
"learning_rate": 6.4528184366241056e-06,
"loss": 0.0859,
"step": 17400
},
{
"epoch": 1.36,
"learning_rate": 6.44503270009343e-06,
"loss": 0.1081,
"step": 17410
},
{
"epoch": 1.36,
"learning_rate": 6.437246963562754e-06,
"loss": 0.0907,
"step": 17420
},
{
"epoch": 1.36,
"learning_rate": 6.429461227032078e-06,
"loss": 0.0704,
"step": 17430
},
{
"epoch": 1.36,
"learning_rate": 6.4216754905014025e-06,
"loss": 0.0748,
"step": 17440
},
{
"epoch": 1.36,
"learning_rate": 6.413889753970725e-06,
"loss": 0.0854,
"step": 17450
},
{
"epoch": 1.36,
"learning_rate": 6.406104017440051e-06,
"loss": 0.1008,
"step": 17460
},
{
"epoch": 1.36,
"learning_rate": 6.398318280909375e-06,
"loss": 0.0992,
"step": 17470
},
{
"epoch": 1.36,
"learning_rate": 6.3905325443786995e-06,
"loss": 0.1136,
"step": 17480
},
{
"epoch": 1.36,
"learning_rate": 6.382746807848022e-06,
"loss": 0.0753,
"step": 17490
},
{
"epoch": 1.36,
"learning_rate": 6.374961071317347e-06,
"loss": 0.0752,
"step": 17500
},
{
"epoch": 1.36,
"learning_rate": 6.367175334786672e-06,
"loss": 0.1051,
"step": 17510
},
{
"epoch": 1.36,
"learning_rate": 6.359389598255995e-06,
"loss": 0.0922,
"step": 17520
},
{
"epoch": 1.36,
"learning_rate": 6.351603861725319e-06,
"loss": 0.1027,
"step": 17530
},
{
"epoch": 1.37,
"learning_rate": 6.343818125194644e-06,
"loss": 0.1113,
"step": 17540
},
{
"epoch": 1.37,
"learning_rate": 6.336032388663968e-06,
"loss": 0.1094,
"step": 17550
},
{
"epoch": 1.37,
"learning_rate": 6.328246652133292e-06,
"loss": 0.0837,
"step": 17560
},
{
"epoch": 1.37,
"learning_rate": 6.320460915602616e-06,
"loss": 0.0749,
"step": 17570
},
{
"epoch": 1.37,
"learning_rate": 6.312675179071941e-06,
"loss": 0.1025,
"step": 17580
},
{
"epoch": 1.37,
"learning_rate": 6.304889442541265e-06,
"loss": 0.0718,
"step": 17590
},
{
"epoch": 1.37,
"learning_rate": 6.297103706010589e-06,
"loss": 0.0668,
"step": 17600
},
{
"epoch": 1.37,
"learning_rate": 6.289317969479913e-06,
"loss": 0.0659,
"step": 17610
},
{
"epoch": 1.37,
"learning_rate": 6.281532232949238e-06,
"loss": 0.096,
"step": 17620
},
{
"epoch": 1.37,
"learning_rate": 6.273746496418561e-06,
"loss": 0.0612,
"step": 17630
},
{
"epoch": 1.37,
"learning_rate": 6.265960759887886e-06,
"loss": 0.0721,
"step": 17640
},
{
"epoch": 1.37,
"learning_rate": 6.25817502335721e-06,
"loss": 0.0804,
"step": 17650
},
{
"epoch": 1.37,
"learning_rate": 6.2503892868265345e-06,
"loss": 0.1048,
"step": 17660
},
{
"epoch": 1.38,
"learning_rate": 6.242603550295858e-06,
"loss": 0.0688,
"step": 17670
},
{
"epoch": 1.38,
"learning_rate": 6.234817813765183e-06,
"loss": 0.072,
"step": 17680
},
{
"epoch": 1.38,
"learning_rate": 6.227032077234507e-06,
"loss": 0.0769,
"step": 17690
},
{
"epoch": 1.38,
"learning_rate": 6.219246340703831e-06,
"loss": 0.0929,
"step": 17700
},
{
"epoch": 1.38,
"learning_rate": 6.211460604173155e-06,
"loss": 0.0908,
"step": 17710
},
{
"epoch": 1.38,
"learning_rate": 6.2036748676424796e-06,
"loss": 0.0712,
"step": 17720
},
{
"epoch": 1.38,
"learning_rate": 6.195889131111804e-06,
"loss": 0.0786,
"step": 17730
},
{
"epoch": 1.38,
"learning_rate": 6.188103394581128e-06,
"loss": 0.1326,
"step": 17740
},
{
"epoch": 1.38,
"learning_rate": 6.180317658050452e-06,
"loss": 0.0685,
"step": 17750
},
{
"epoch": 1.38,
"learning_rate": 6.1725319215197765e-06,
"loss": 0.0841,
"step": 17760
},
{
"epoch": 1.38,
"learning_rate": 6.1647461849891e-06,
"loss": 0.096,
"step": 17770
},
{
"epoch": 1.38,
"learning_rate": 6.156960448458425e-06,
"loss": 0.0683,
"step": 17780
},
{
"epoch": 1.39,
"learning_rate": 6.149174711927749e-06,
"loss": 0.0958,
"step": 17790
},
{
"epoch": 1.39,
"learning_rate": 6.1413889753970735e-06,
"loss": 0.0853,
"step": 17800
},
{
"epoch": 1.39,
"learning_rate": 6.133603238866397e-06,
"loss": 0.0882,
"step": 17810
},
{
"epoch": 1.39,
"learning_rate": 6.1258175023357215e-06,
"loss": 0.0825,
"step": 17820
},
{
"epoch": 1.39,
"learning_rate": 6.118031765805046e-06,
"loss": 0.0723,
"step": 17830
},
{
"epoch": 1.39,
"learning_rate": 6.1102460292743704e-06,
"loss": 0.1124,
"step": 17840
},
{
"epoch": 1.39,
"learning_rate": 6.102460292743694e-06,
"loss": 0.0929,
"step": 17850
},
{
"epoch": 1.39,
"learning_rate": 6.0946745562130185e-06,
"loss": 0.0874,
"step": 17860
},
{
"epoch": 1.39,
"learning_rate": 6.086888819682343e-06,
"loss": 0.073,
"step": 17870
},
{
"epoch": 1.39,
"learning_rate": 6.0791030831516666e-06,
"loss": 0.079,
"step": 17880
},
{
"epoch": 1.39,
"learning_rate": 6.071317346620991e-06,
"loss": 0.0863,
"step": 17890
},
{
"epoch": 1.39,
"learning_rate": 6.0635316100903155e-06,
"loss": 0.1066,
"step": 17900
},
{
"epoch": 1.39,
"learning_rate": 6.05574587355964e-06,
"loss": 0.1092,
"step": 17910
},
{
"epoch": 1.4,
"learning_rate": 6.0479601370289635e-06,
"loss": 0.0614,
"step": 17920
},
{
"epoch": 1.4,
"learning_rate": 6.040174400498288e-06,
"loss": 0.0825,
"step": 17930
},
{
"epoch": 1.4,
"learning_rate": 6.0323886639676124e-06,
"loss": 0.0989,
"step": 17940
},
{
"epoch": 1.4,
"learning_rate": 6.024602927436935e-06,
"loss": 0.0559,
"step": 17950
},
{
"epoch": 1.4,
"learning_rate": 6.01681719090626e-06,
"loss": 0.0648,
"step": 17960
},
{
"epoch": 1.4,
"learning_rate": 6.009031454375585e-06,
"loss": 0.0726,
"step": 17970
},
{
"epoch": 1.4,
"learning_rate": 6.001245717844909e-06,
"loss": 0.0872,
"step": 17980
},
{
"epoch": 1.4,
"learning_rate": 5.993459981314232e-06,
"loss": 0.0937,
"step": 17990
},
{
"epoch": 1.4,
"learning_rate": 5.985674244783557e-06,
"loss": 0.0985,
"step": 18000
},
{
"epoch": 1.4,
"learning_rate": 5.977888508252881e-06,
"loss": 0.0808,
"step": 18010
},
{
"epoch": 1.4,
"learning_rate": 5.970102771722205e-06,
"loss": 0.0798,
"step": 18020
},
{
"epoch": 1.4,
"learning_rate": 5.962317035191529e-06,
"loss": 0.0668,
"step": 18030
},
{
"epoch": 1.4,
"learning_rate": 5.9545312986608536e-06,
"loss": 0.075,
"step": 18040
},
{
"epoch": 1.41,
"learning_rate": 5.946745562130178e-06,
"loss": 0.0632,
"step": 18050
},
{
"epoch": 1.41,
"learning_rate": 5.938959825599502e-06,
"loss": 0.0637,
"step": 18060
},
{
"epoch": 1.41,
"learning_rate": 5.931174089068826e-06,
"loss": 0.0821,
"step": 18070
},
{
"epoch": 1.41,
"learning_rate": 5.9233883525381505e-06,
"loss": 0.0734,
"step": 18080
},
{
"epoch": 1.41,
"learning_rate": 5.915602616007475e-06,
"loss": 0.0938,
"step": 18090
},
{
"epoch": 1.41,
"learning_rate": 5.907816879476799e-06,
"loss": 0.0668,
"step": 18100
},
{
"epoch": 1.41,
"learning_rate": 5.900031142946123e-06,
"loss": 0.0949,
"step": 18110
},
{
"epoch": 1.41,
"learning_rate": 5.8922454064154475e-06,
"loss": 0.0734,
"step": 18120
},
{
"epoch": 1.41,
"learning_rate": 5.884459669884771e-06,
"loss": 0.1135,
"step": 18130
},
{
"epoch": 1.41,
"learning_rate": 5.8766739333540955e-06,
"loss": 0.0805,
"step": 18140
},
{
"epoch": 1.41,
"learning_rate": 5.86888819682342e-06,
"loss": 0.0736,
"step": 18150
},
{
"epoch": 1.41,
"learning_rate": 5.8611024602927444e-06,
"loss": 0.0609,
"step": 18160
},
{
"epoch": 1.41,
"learning_rate": 5.853316723762068e-06,
"loss": 0.0842,
"step": 18170
},
{
"epoch": 1.42,
"learning_rate": 5.8455309872313925e-06,
"loss": 0.0629,
"step": 18180
},
{
"epoch": 1.42,
"learning_rate": 5.837745250700717e-06,
"loss": 0.0642,
"step": 18190
},
{
"epoch": 1.42,
"learning_rate": 5.8299595141700406e-06,
"loss": 0.077,
"step": 18200
},
{
"epoch": 1.42,
"learning_rate": 5.822173777639365e-06,
"loss": 0.069,
"step": 18210
},
{
"epoch": 1.42,
"learning_rate": 5.8143880411086895e-06,
"loss": 0.0733,
"step": 18220
},
{
"epoch": 1.42,
"learning_rate": 5.806602304578014e-06,
"loss": 0.0902,
"step": 18230
},
{
"epoch": 1.42,
"learning_rate": 5.7988165680473375e-06,
"loss": 0.0662,
"step": 18240
},
{
"epoch": 1.42,
"learning_rate": 5.791030831516662e-06,
"loss": 0.0762,
"step": 18250
},
{
"epoch": 1.42,
"learning_rate": 5.7832450949859864e-06,
"loss": 0.0728,
"step": 18260
},
{
"epoch": 1.42,
"learning_rate": 5.77545935845531e-06,
"loss": 0.0933,
"step": 18270
},
{
"epoch": 1.42,
"learning_rate": 5.7676736219246345e-06,
"loss": 0.0765,
"step": 18280
},
{
"epoch": 1.42,
"learning_rate": 5.759887885393959e-06,
"loss": 0.0787,
"step": 18290
},
{
"epoch": 1.42,
"learning_rate": 5.752102148863283e-06,
"loss": 0.0859,
"step": 18300
},
{
"epoch": 1.43,
"learning_rate": 5.744316412332607e-06,
"loss": 0.0879,
"step": 18310
},
{
"epoch": 1.43,
"learning_rate": 5.7365306758019315e-06,
"loss": 0.0838,
"step": 18320
},
{
"epoch": 1.43,
"learning_rate": 5.728744939271256e-06,
"loss": 0.0876,
"step": 18330
},
{
"epoch": 1.43,
"learning_rate": 5.72095920274058e-06,
"loss": 0.0734,
"step": 18340
},
{
"epoch": 1.43,
"learning_rate": 5.713173466209904e-06,
"loss": 0.0803,
"step": 18350
},
{
"epoch": 1.43,
"learning_rate": 5.705387729679228e-06,
"loss": 0.076,
"step": 18360
},
{
"epoch": 1.43,
"learning_rate": 5.697601993148553e-06,
"loss": 0.0562,
"step": 18370
},
{
"epoch": 1.43,
"learning_rate": 5.6898162566178765e-06,
"loss": 0.0943,
"step": 18380
},
{
"epoch": 1.43,
"learning_rate": 5.682030520087201e-06,
"loss": 0.0991,
"step": 18390
},
{
"epoch": 1.43,
"learning_rate": 5.674244783556525e-06,
"loss": 0.0848,
"step": 18400
},
{
"epoch": 1.43,
"learning_rate": 5.66645904702585e-06,
"loss": 0.0821,
"step": 18410
},
{
"epoch": 1.43,
"learning_rate": 5.658673310495173e-06,
"loss": 0.0894,
"step": 18420
},
{
"epoch": 1.43,
"learning_rate": 5.650887573964498e-06,
"loss": 0.0679,
"step": 18430
},
{
"epoch": 1.44,
"learning_rate": 5.643101837433822e-06,
"loss": 0.1185,
"step": 18440
},
{
"epoch": 1.44,
"learning_rate": 5.635316100903145e-06,
"loss": 0.0814,
"step": 18450
},
{
"epoch": 1.44,
"learning_rate": 5.6275303643724695e-06,
"loss": 0.1106,
"step": 18460
},
{
"epoch": 1.44,
"learning_rate": 5.619744627841794e-06,
"loss": 0.1136,
"step": 18470
},
{
"epoch": 1.44,
"learning_rate": 5.611958891311119e-06,
"loss": 0.0627,
"step": 18480
},
{
"epoch": 1.44,
"learning_rate": 5.604173154780442e-06,
"loss": 0.0728,
"step": 18490
},
{
"epoch": 1.44,
"learning_rate": 5.5963874182497665e-06,
"loss": 0.0866,
"step": 18500
},
{
"epoch": 1.44,
"learning_rate": 5.588601681719091e-06,
"loss": 0.0613,
"step": 18510
},
{
"epoch": 1.44,
"learning_rate": 5.5808159451884146e-06,
"loss": 0.0974,
"step": 18520
},
{
"epoch": 1.44,
"learning_rate": 5.573030208657739e-06,
"loss": 0.0717,
"step": 18530
},
{
"epoch": 1.44,
"learning_rate": 5.5652444721270635e-06,
"loss": 0.0602,
"step": 18540
},
{
"epoch": 1.44,
"learning_rate": 5.557458735596388e-06,
"loss": 0.071,
"step": 18550
},
{
"epoch": 1.45,
"learning_rate": 5.5496729990657115e-06,
"loss": 0.0881,
"step": 18560
},
{
"epoch": 1.45,
"learning_rate": 5.541887262535036e-06,
"loss": 0.0925,
"step": 18570
},
{
"epoch": 1.45,
"learning_rate": 5.5341015260043604e-06,
"loss": 0.0794,
"step": 18580
},
{
"epoch": 1.45,
"learning_rate": 5.526315789473685e-06,
"loss": 0.06,
"step": 18590
},
{
"epoch": 1.45,
"learning_rate": 5.5185300529430085e-06,
"loss": 0.0724,
"step": 18600
},
{
"epoch": 1.45,
"learning_rate": 5.510744316412333e-06,
"loss": 0.083,
"step": 18610
},
{
"epoch": 1.45,
"learning_rate": 5.502958579881657e-06,
"loss": 0.0701,
"step": 18620
},
{
"epoch": 1.45,
"learning_rate": 5.495172843350981e-06,
"loss": 0.0612,
"step": 18630
},
{
"epoch": 1.45,
"learning_rate": 5.4873871068203055e-06,
"loss": 0.0599,
"step": 18640
},
{
"epoch": 1.45,
"learning_rate": 5.47960137028963e-06,
"loss": 0.108,
"step": 18650
},
{
"epoch": 1.45,
"learning_rate": 5.471815633758954e-06,
"loss": 0.0808,
"step": 18660
},
{
"epoch": 1.45,
"learning_rate": 5.464029897228278e-06,
"loss": 0.0859,
"step": 18670
},
{
"epoch": 1.45,
"learning_rate": 5.456244160697602e-06,
"loss": 0.075,
"step": 18680
},
{
"epoch": 1.46,
"learning_rate": 5.448458424166927e-06,
"loss": 0.0868,
"step": 18690
},
{
"epoch": 1.46,
"learning_rate": 5.4406726876362505e-06,
"loss": 0.0535,
"step": 18700
},
{
"epoch": 1.46,
"learning_rate": 5.432886951105575e-06,
"loss": 0.0705,
"step": 18710
},
{
"epoch": 1.46,
"learning_rate": 5.425101214574899e-06,
"loss": 0.0977,
"step": 18720
},
{
"epoch": 1.46,
"learning_rate": 5.417315478044224e-06,
"loss": 0.0805,
"step": 18730
},
{
"epoch": 1.46,
"learning_rate": 5.4095297415135474e-06,
"loss": 0.0824,
"step": 18740
},
{
"epoch": 1.46,
"learning_rate": 5.401744004982872e-06,
"loss": 0.0734,
"step": 18750
},
{
"epoch": 1.46,
"learning_rate": 5.393958268452196e-06,
"loss": 0.0822,
"step": 18760
},
{
"epoch": 1.46,
"learning_rate": 5.38617253192152e-06,
"loss": 0.0888,
"step": 18770
},
{
"epoch": 1.46,
"learning_rate": 5.378386795390844e-06,
"loss": 0.0985,
"step": 18780
},
{
"epoch": 1.46,
"learning_rate": 5.370601058860169e-06,
"loss": 0.0934,
"step": 18790
},
{
"epoch": 1.46,
"learning_rate": 5.362815322329493e-06,
"loss": 0.0651,
"step": 18800
},
{
"epoch": 1.46,
"learning_rate": 5.355029585798817e-06,
"loss": 0.0787,
"step": 18810
},
{
"epoch": 1.47,
"learning_rate": 5.347243849268141e-06,
"loss": 0.0637,
"step": 18820
},
{
"epoch": 1.47,
"learning_rate": 5.339458112737466e-06,
"loss": 0.0994,
"step": 18830
},
{
"epoch": 1.47,
"learning_rate": 5.33167237620679e-06,
"loss": 0.1053,
"step": 18840
},
{
"epoch": 1.47,
"learning_rate": 5.323886639676114e-06,
"loss": 0.0963,
"step": 18850
},
{
"epoch": 1.47,
"learning_rate": 5.316100903145438e-06,
"loss": 0.0792,
"step": 18860
},
{
"epoch": 1.47,
"learning_rate": 5.308315166614763e-06,
"loss": 0.0894,
"step": 18870
},
{
"epoch": 1.47,
"learning_rate": 5.3005294300840855e-06,
"loss": 0.0799,
"step": 18880
},
{
"epoch": 1.47,
"learning_rate": 5.292743693553411e-06,
"loss": 0.0885,
"step": 18890
},
{
"epoch": 1.47,
"learning_rate": 5.284957957022735e-06,
"loss": 0.0689,
"step": 18900
},
{
"epoch": 1.47,
"learning_rate": 5.27717222049206e-06,
"loss": 0.0646,
"step": 18910
},
{
"epoch": 1.47,
"learning_rate": 5.2693864839613825e-06,
"loss": 0.0984,
"step": 18920
},
{
"epoch": 1.47,
"learning_rate": 5.261600747430707e-06,
"loss": 0.0806,
"step": 18930
},
{
"epoch": 1.47,
"learning_rate": 5.253815010900032e-06,
"loss": 0.0835,
"step": 18940
},
{
"epoch": 1.48,
"learning_rate": 5.246029274369355e-06,
"loss": 0.0663,
"step": 18950
},
{
"epoch": 1.48,
"learning_rate": 5.2382435378386795e-06,
"loss": 0.0779,
"step": 18960
},
{
"epoch": 1.48,
"learning_rate": 5.230457801308004e-06,
"loss": 0.0765,
"step": 18970
},
{
"epoch": 1.48,
"learning_rate": 5.222672064777329e-06,
"loss": 0.0733,
"step": 18980
},
{
"epoch": 1.48,
"learning_rate": 5.214886328246652e-06,
"loss": 0.0609,
"step": 18990
},
{
"epoch": 1.48,
"learning_rate": 5.207100591715976e-06,
"loss": 0.0666,
"step": 19000
},
{
"epoch": 1.48,
"learning_rate": 5.199314855185301e-06,
"loss": 0.0554,
"step": 19010
},
{
"epoch": 1.48,
"learning_rate": 5.1915291186546245e-06,
"loss": 0.1113,
"step": 19020
},
{
"epoch": 1.48,
"learning_rate": 5.183743382123949e-06,
"loss": 0.0761,
"step": 19030
},
{
"epoch": 1.48,
"learning_rate": 5.175957645593273e-06,
"loss": 0.0847,
"step": 19040
},
{
"epoch": 1.48,
"learning_rate": 5.168171909062598e-06,
"loss": 0.0709,
"step": 19050
},
{
"epoch": 1.48,
"learning_rate": 5.1603861725319214e-06,
"loss": 0.09,
"step": 19060
},
{
"epoch": 1.48,
"learning_rate": 5.152600436001246e-06,
"loss": 0.0829,
"step": 19070
},
{
"epoch": 1.49,
"learning_rate": 5.14481469947057e-06,
"loss": 0.0785,
"step": 19080
},
{
"epoch": 1.49,
"learning_rate": 5.137028962939895e-06,
"loss": 0.083,
"step": 19090
},
{
"epoch": 1.49,
"learning_rate": 5.129243226409218e-06,
"loss": 0.0889,
"step": 19100
},
{
"epoch": 1.49,
"learning_rate": 5.121457489878543e-06,
"loss": 0.0777,
"step": 19110
},
{
"epoch": 1.49,
"learning_rate": 5.113671753347867e-06,
"loss": 0.0712,
"step": 19120
},
{
"epoch": 1.49,
"learning_rate": 5.105886016817191e-06,
"loss": 0.0776,
"step": 19130
},
{
"epoch": 1.49,
"learning_rate": 5.098100280286515e-06,
"loss": 0.082,
"step": 19140
},
{
"epoch": 1.49,
"learning_rate": 5.09031454375584e-06,
"loss": 0.0936,
"step": 19150
},
{
"epoch": 1.49,
"learning_rate": 5.082528807225164e-06,
"loss": 0.0829,
"step": 19160
},
{
"epoch": 1.49,
"learning_rate": 5.074743070694488e-06,
"loss": 0.0602,
"step": 19170
},
{
"epoch": 1.49,
"learning_rate": 5.066957334163812e-06,
"loss": 0.0754,
"step": 19180
},
{
"epoch": 1.49,
"learning_rate": 5.059171597633137e-06,
"loss": 0.0742,
"step": 19190
},
{
"epoch": 1.49,
"learning_rate": 5.05138586110246e-06,
"loss": 0.0701,
"step": 19200
},
{
"epoch": 1.5,
"learning_rate": 5.043600124571785e-06,
"loss": 0.0626,
"step": 19210
},
{
"epoch": 1.5,
"learning_rate": 5.035814388041109e-06,
"loss": 0.0862,
"step": 19220
},
{
"epoch": 1.5,
"learning_rate": 5.028028651510434e-06,
"loss": 0.0889,
"step": 19230
},
{
"epoch": 1.5,
"learning_rate": 5.020242914979757e-06,
"loss": 0.0613,
"step": 19240
},
{
"epoch": 1.5,
"learning_rate": 5.012457178449082e-06,
"loss": 0.0709,
"step": 19250
},
{
"epoch": 1.5,
"learning_rate": 5.004671441918406e-06,
"loss": 0.0782,
"step": 19260
},
{
"epoch": 1.5,
"learning_rate": 4.99688570538773e-06,
"loss": 0.0766,
"step": 19270
},
{
"epoch": 1.5,
"learning_rate": 4.989099968857054e-06,
"loss": 0.0827,
"step": 19280
},
{
"epoch": 1.5,
"learning_rate": 4.981314232326379e-06,
"loss": 0.0721,
"step": 19290
},
{
"epoch": 1.5,
"learning_rate": 4.973528495795702e-06,
"loss": 0.0705,
"step": 19300
},
{
"epoch": 1.5,
"learning_rate": 4.965742759265027e-06,
"loss": 0.0664,
"step": 19310
},
{
"epoch": 1.5,
"learning_rate": 4.957957022734351e-06,
"loss": 0.0729,
"step": 19320
},
{
"epoch": 1.5,
"learning_rate": 4.950171286203676e-06,
"loss": 0.0542,
"step": 19330
},
{
"epoch": 1.51,
"learning_rate": 4.942385549672999e-06,
"loss": 0.0768,
"step": 19340
},
{
"epoch": 1.51,
"learning_rate": 4.934599813142324e-06,
"loss": 0.0725,
"step": 19350
},
{
"epoch": 1.51,
"learning_rate": 4.926814076611648e-06,
"loss": 0.0694,
"step": 19360
},
{
"epoch": 1.51,
"learning_rate": 4.919028340080972e-06,
"loss": 0.0858,
"step": 19370
},
{
"epoch": 1.51,
"learning_rate": 4.911242603550296e-06,
"loss": 0.072,
"step": 19380
},
{
"epoch": 1.51,
"learning_rate": 4.903456867019621e-06,
"loss": 0.0589,
"step": 19390
},
{
"epoch": 1.51,
"learning_rate": 4.895671130488945e-06,
"loss": 0.0718,
"step": 19400
},
{
"epoch": 1.51,
"learning_rate": 4.887885393958269e-06,
"loss": 0.06,
"step": 19410
},
{
"epoch": 1.51,
"learning_rate": 4.880099657427593e-06,
"loss": 0.0718,
"step": 19420
},
{
"epoch": 1.51,
"learning_rate": 4.872313920896917e-06,
"loss": 0.0861,
"step": 19430
},
{
"epoch": 1.51,
"learning_rate": 4.864528184366241e-06,
"loss": 0.1108,
"step": 19440
},
{
"epoch": 1.51,
"learning_rate": 4.856742447835566e-06,
"loss": 0.093,
"step": 19450
},
{
"epoch": 1.52,
"learning_rate": 4.848956711304889e-06,
"loss": 0.0642,
"step": 19460
},
{
"epoch": 1.52,
"learning_rate": 4.841170974774214e-06,
"loss": 0.06,
"step": 19470
},
{
"epoch": 1.52,
"learning_rate": 4.833385238243538e-06,
"loss": 0.0794,
"step": 19480
},
{
"epoch": 1.52,
"learning_rate": 4.825599501712863e-06,
"loss": 0.0722,
"step": 19490
},
{
"epoch": 1.52,
"learning_rate": 4.817813765182186e-06,
"loss": 0.0821,
"step": 19500
},
{
"epoch": 1.52,
"learning_rate": 4.810028028651511e-06,
"loss": 0.0646,
"step": 19510
},
{
"epoch": 1.52,
"learning_rate": 4.802242292120835e-06,
"loss": 0.0491,
"step": 19520
},
{
"epoch": 1.52,
"learning_rate": 4.794456555590159e-06,
"loss": 0.0593,
"step": 19530
},
{
"epoch": 1.52,
"learning_rate": 4.786670819059483e-06,
"loss": 0.0873,
"step": 19540
},
{
"epoch": 1.52,
"learning_rate": 4.778885082528808e-06,
"loss": 0.0623,
"step": 19550
},
{
"epoch": 1.52,
"learning_rate": 4.771099345998132e-06,
"loss": 0.0547,
"step": 19560
},
{
"epoch": 1.52,
"learning_rate": 4.763313609467456e-06,
"loss": 0.0675,
"step": 19570
},
{
"epoch": 1.52,
"learning_rate": 4.75552787293678e-06,
"loss": 0.0766,
"step": 19580
},
{
"epoch": 1.53,
"learning_rate": 4.747742136406105e-06,
"loss": 0.0863,
"step": 19590
},
{
"epoch": 1.53,
"learning_rate": 4.739956399875428e-06,
"loss": 0.0744,
"step": 19600
},
{
"epoch": 1.53,
"learning_rate": 4.732170663344753e-06,
"loss": 0.07,
"step": 19610
},
{
"epoch": 1.53,
"learning_rate": 4.724384926814077e-06,
"loss": 0.0729,
"step": 19620
},
{
"epoch": 1.53,
"learning_rate": 4.716599190283402e-06,
"loss": 0.0503,
"step": 19630
},
{
"epoch": 1.53,
"learning_rate": 4.708813453752725e-06,
"loss": 0.1092,
"step": 19640
},
{
"epoch": 1.53,
"learning_rate": 4.70102771722205e-06,
"loss": 0.0741,
"step": 19650
},
{
"epoch": 1.53,
"learning_rate": 4.693241980691373e-06,
"loss": 0.0693,
"step": 19660
},
{
"epoch": 1.53,
"learning_rate": 4.685456244160699e-06,
"loss": 0.0544,
"step": 19670
},
{
"epoch": 1.53,
"learning_rate": 4.677670507630022e-06,
"loss": 0.097,
"step": 19680
},
{
"epoch": 1.53,
"learning_rate": 4.669884771099346e-06,
"loss": 0.0594,
"step": 19690
},
{
"epoch": 1.53,
"learning_rate": 4.66209903456867e-06,
"loss": 0.0591,
"step": 19700
},
{
"epoch": 1.53,
"learning_rate": 4.654313298037995e-06,
"loss": 0.0565,
"step": 19710
},
{
"epoch": 1.54,
"learning_rate": 4.646527561507319e-06,
"loss": 0.0606,
"step": 19720
},
{
"epoch": 1.54,
"learning_rate": 4.638741824976643e-06,
"loss": 0.108,
"step": 19730
},
{
"epoch": 1.54,
"learning_rate": 4.630956088445967e-06,
"loss": 0.0692,
"step": 19740
},
{
"epoch": 1.54,
"learning_rate": 4.623170351915292e-06,
"loss": 0.065,
"step": 19750
},
{
"epoch": 1.54,
"learning_rate": 4.615384615384616e-06,
"loss": 0.0743,
"step": 19760
},
{
"epoch": 1.54,
"learning_rate": 4.60759887885394e-06,
"loss": 0.0921,
"step": 19770
},
{
"epoch": 1.54,
"learning_rate": 4.599813142323264e-06,
"loss": 0.085,
"step": 19780
},
{
"epoch": 1.54,
"learning_rate": 4.592027405792589e-06,
"loss": 0.0714,
"step": 19790
},
{
"epoch": 1.54,
"learning_rate": 4.584241669261912e-06,
"loss": 0.0638,
"step": 19800
},
{
"epoch": 1.54,
"learning_rate": 4.576455932731237e-06,
"loss": 0.0683,
"step": 19810
},
{
"epoch": 1.54,
"learning_rate": 4.568670196200561e-06,
"loss": 0.062,
"step": 19820
},
{
"epoch": 1.54,
"learning_rate": 4.560884459669886e-06,
"loss": 0.0723,
"step": 19830
},
{
"epoch": 1.54,
"learning_rate": 4.553098723139209e-06,
"loss": 0.0721,
"step": 19840
},
{
"epoch": 1.55,
"learning_rate": 4.545312986608534e-06,
"loss": 0.0679,
"step": 19850
},
{
"epoch": 1.55,
"learning_rate": 4.537527250077858e-06,
"loss": 0.104,
"step": 19860
},
{
"epoch": 1.55,
"learning_rate": 4.529741513547182e-06,
"loss": 0.0858,
"step": 19870
},
{
"epoch": 1.55,
"learning_rate": 4.521955777016506e-06,
"loss": 0.1106,
"step": 19880
},
{
"epoch": 1.55,
"learning_rate": 4.51417004048583e-06,
"loss": 0.0747,
"step": 19890
},
{
"epoch": 1.55,
"learning_rate": 4.506384303955155e-06,
"loss": 0.0624,
"step": 19900
},
{
"epoch": 1.55,
"learning_rate": 4.498598567424479e-06,
"loss": 0.0819,
"step": 19910
},
{
"epoch": 1.55,
"learning_rate": 4.490812830893803e-06,
"loss": 0.0693,
"step": 19920
},
{
"epoch": 1.55,
"learning_rate": 4.483027094363127e-06,
"loss": 0.0673,
"step": 19930
},
{
"epoch": 1.55,
"learning_rate": 4.475241357832451e-06,
"loss": 0.0689,
"step": 19940
},
{
"epoch": 1.55,
"learning_rate": 4.467455621301776e-06,
"loss": 0.0653,
"step": 19950
},
{
"epoch": 1.55,
"learning_rate": 4.459669884771099e-06,
"loss": 0.08,
"step": 19960
},
{
"epoch": 1.55,
"learning_rate": 4.451884148240424e-06,
"loss": 0.0639,
"step": 19970
},
{
"epoch": 1.56,
"learning_rate": 4.444098411709748e-06,
"loss": 0.0538,
"step": 19980
},
{
"epoch": 1.56,
"learning_rate": 4.436312675179073e-06,
"loss": 0.0622,
"step": 19990
},
{
"epoch": 1.56,
"learning_rate": 4.428526938648396e-06,
"loss": 0.0897,
"step": 20000
},
{
"epoch": 1.56,
"learning_rate": 4.420741202117721e-06,
"loss": 0.0705,
"step": 20010
},
{
"epoch": 1.56,
"learning_rate": 4.412955465587045e-06,
"loss": 0.0757,
"step": 20020
},
{
"epoch": 1.56,
"learning_rate": 4.405169729056369e-06,
"loss": 0.0572,
"step": 20030
},
{
"epoch": 1.56,
"learning_rate": 4.397383992525693e-06,
"loss": 0.0789,
"step": 20040
},
{
"epoch": 1.56,
"learning_rate": 4.389598255995018e-06,
"loss": 0.059,
"step": 20050
},
{
"epoch": 1.56,
"learning_rate": 4.381812519464342e-06,
"loss": 0.0831,
"step": 20060
},
{
"epoch": 1.56,
"learning_rate": 4.374026782933666e-06,
"loss": 0.0875,
"step": 20070
},
{
"epoch": 1.56,
"learning_rate": 4.36624104640299e-06,
"loss": 0.0861,
"step": 20080
},
{
"epoch": 1.56,
"learning_rate": 4.358455309872315e-06,
"loss": 0.0651,
"step": 20090
},
{
"epoch": 1.56,
"learning_rate": 4.350669573341639e-06,
"loss": 0.0564,
"step": 20100
},
{
"epoch": 1.57,
"learning_rate": 4.342883836810963e-06,
"loss": 0.0737,
"step": 20110
},
{
"epoch": 1.57,
"learning_rate": 4.335098100280286e-06,
"loss": 0.076,
"step": 20120
},
{
"epoch": 1.57,
"learning_rate": 4.3273123637496116e-06,
"loss": 0.0597,
"step": 20130
},
{
"epoch": 1.57,
"learning_rate": 4.319526627218935e-06,
"loss": 0.1011,
"step": 20140
},
{
"epoch": 1.57,
"learning_rate": 4.31174089068826e-06,
"loss": 0.0547,
"step": 20150
},
{
"epoch": 1.57,
"learning_rate": 4.303955154157583e-06,
"loss": 0.0597,
"step": 20160
},
{
"epoch": 1.57,
"learning_rate": 4.296169417626908e-06,
"loss": 0.0533,
"step": 20170
},
{
"epoch": 1.57,
"learning_rate": 4.288383681096232e-06,
"loss": 0.0823,
"step": 20180
},
{
"epoch": 1.57,
"learning_rate": 4.280597944565556e-06,
"loss": 0.0749,
"step": 20190
},
{
"epoch": 1.57,
"learning_rate": 4.27281220803488e-06,
"loss": 0.0603,
"step": 20200
},
{
"epoch": 1.57,
"learning_rate": 4.265026471504205e-06,
"loss": 0.0547,
"step": 20210
},
{
"epoch": 1.57,
"learning_rate": 4.257240734973529e-06,
"loss": 0.0781,
"step": 20220
},
{
"epoch": 1.58,
"learning_rate": 4.249454998442853e-06,
"loss": 0.0612,
"step": 20230
},
{
"epoch": 1.58,
"learning_rate": 4.241669261912177e-06,
"loss": 0.0592,
"step": 20240
},
{
"epoch": 1.58,
"learning_rate": 4.233883525381502e-06,
"loss": 0.0739,
"step": 20250
},
{
"epoch": 1.58,
"learning_rate": 4.226097788850826e-06,
"loss": 0.074,
"step": 20260
},
{
"epoch": 1.58,
"learning_rate": 4.21831205232015e-06,
"loss": 0.0622,
"step": 20270
},
{
"epoch": 1.58,
"learning_rate": 4.210526315789474e-06,
"loss": 0.0701,
"step": 20280
},
{
"epoch": 1.58,
"learning_rate": 4.2027405792587986e-06,
"loss": 0.0865,
"step": 20290
},
{
"epoch": 1.58,
"learning_rate": 4.194954842728122e-06,
"loss": 0.073,
"step": 20300
},
{
"epoch": 1.58,
"learning_rate": 4.187169106197447e-06,
"loss": 0.07,
"step": 20310
},
{
"epoch": 1.58,
"learning_rate": 4.179383369666771e-06,
"loss": 0.0903,
"step": 20320
},
{
"epoch": 1.58,
"learning_rate": 4.1715976331360955e-06,
"loss": 0.0775,
"step": 20330
},
{
"epoch": 1.58,
"learning_rate": 4.163811896605419e-06,
"loss": 0.0615,
"step": 20340
},
{
"epoch": 1.58,
"learning_rate": 4.156026160074744e-06,
"loss": 0.0602,
"step": 20350
},
{
"epoch": 1.59,
"learning_rate": 4.148240423544068e-06,
"loss": 0.0716,
"step": 20360
},
{
"epoch": 1.59,
"learning_rate": 4.140454687013392e-06,
"loss": 0.0571,
"step": 20370
},
{
"epoch": 1.59,
"learning_rate": 4.132668950482716e-06,
"loss": 0.0653,
"step": 20380
},
{
"epoch": 1.59,
"learning_rate": 4.12488321395204e-06,
"loss": 0.0731,
"step": 20390
},
{
"epoch": 1.59,
"learning_rate": 4.117097477421364e-06,
"loss": 0.0984,
"step": 20400
},
{
"epoch": 1.59,
"learning_rate": 4.109311740890689e-06,
"loss": 0.0659,
"step": 20410
},
{
"epoch": 1.59,
"learning_rate": 4.101526004360013e-06,
"loss": 0.0587,
"step": 20420
},
{
"epoch": 1.59,
"learning_rate": 4.093740267829337e-06,
"loss": 0.0577,
"step": 20430
},
{
"epoch": 1.59,
"learning_rate": 4.085954531298661e-06,
"loss": 0.0841,
"step": 20440
},
{
"epoch": 1.59,
"learning_rate": 4.0781687947679856e-06,
"loss": 0.0606,
"step": 20450
},
{
"epoch": 1.59,
"learning_rate": 4.070383058237309e-06,
"loss": 0.0598,
"step": 20460
},
{
"epoch": 1.59,
"learning_rate": 4.062597321706634e-06,
"loss": 0.066,
"step": 20470
},
{
"epoch": 1.59,
"learning_rate": 4.054811585175958e-06,
"loss": 0.0769,
"step": 20480
},
{
"epoch": 1.6,
"learning_rate": 4.0470258486452825e-06,
"loss": 0.1022,
"step": 20490
},
{
"epoch": 1.6,
"learning_rate": 4.039240112114606e-06,
"loss": 0.0523,
"step": 20500
},
{
"epoch": 1.6,
"learning_rate": 4.031454375583931e-06,
"loss": 0.057,
"step": 20510
},
{
"epoch": 1.6,
"learning_rate": 4.023668639053255e-06,
"loss": 0.0895,
"step": 20520
},
{
"epoch": 1.6,
"learning_rate": 4.015882902522579e-06,
"loss": 0.0652,
"step": 20530
},
{
"epoch": 1.6,
"learning_rate": 4.008097165991903e-06,
"loss": 0.0703,
"step": 20540
},
{
"epoch": 1.6,
"learning_rate": 4.0003114294612276e-06,
"loss": 0.0669,
"step": 20550
},
{
"epoch": 1.6,
"learning_rate": 3.992525692930552e-06,
"loss": 0.0819,
"step": 20560
},
{
"epoch": 1.6,
"learning_rate": 3.984739956399876e-06,
"loss": 0.0562,
"step": 20570
},
{
"epoch": 1.6,
"learning_rate": 3.9769542198692e-06,
"loss": 0.0699,
"step": 20580
},
{
"epoch": 1.6,
"learning_rate": 3.9691684833385245e-06,
"loss": 0.0727,
"step": 20590
},
{
"epoch": 1.6,
"learning_rate": 3.961382746807849e-06,
"loss": 0.0525,
"step": 20600
},
{
"epoch": 1.6,
"learning_rate": 3.9535970102771726e-06,
"loss": 0.1218,
"step": 20610
},
{
"epoch": 1.61,
"learning_rate": 3.945811273746496e-06,
"loss": 0.0535,
"step": 20620
},
{
"epoch": 1.61,
"learning_rate": 3.938025537215821e-06,
"loss": 0.0886,
"step": 20630
},
{
"epoch": 1.61,
"learning_rate": 3.930239800685145e-06,
"loss": 0.0605,
"step": 20640
},
{
"epoch": 1.61,
"learning_rate": 3.9224540641544695e-06,
"loss": 0.0641,
"step": 20650
},
{
"epoch": 1.61,
"learning_rate": 3.914668327623793e-06,
"loss": 0.0815,
"step": 20660
},
{
"epoch": 1.61,
"learning_rate": 3.906882591093118e-06,
"loss": 0.0974,
"step": 20670
},
{
"epoch": 1.61,
"learning_rate": 3.899096854562442e-06,
"loss": 0.0513,
"step": 20680
},
{
"epoch": 1.61,
"learning_rate": 3.891311118031766e-06,
"loss": 0.0919,
"step": 20690
},
{
"epoch": 1.61,
"learning_rate": 3.88352538150109e-06,
"loss": 0.0689,
"step": 20700
},
{
"epoch": 1.61,
"learning_rate": 3.8757396449704146e-06,
"loss": 0.0691,
"step": 20710
},
{
"epoch": 1.61,
"learning_rate": 3.867953908439739e-06,
"loss": 0.0676,
"step": 20720
},
{
"epoch": 1.61,
"learning_rate": 3.860168171909063e-06,
"loss": 0.0675,
"step": 20730
},
{
"epoch": 1.61,
"learning_rate": 3.852382435378387e-06,
"loss": 0.0807,
"step": 20740
},
{
"epoch": 1.62,
"learning_rate": 3.8445966988477115e-06,
"loss": 0.0686,
"step": 20750
},
{
"epoch": 1.62,
"learning_rate": 3.836810962317036e-06,
"loss": 0.0761,
"step": 20760
},
{
"epoch": 1.62,
"learning_rate": 3.8290252257863596e-06,
"loss": 0.0639,
"step": 20770
},
{
"epoch": 1.62,
"learning_rate": 3.821239489255684e-06,
"loss": 0.0606,
"step": 20780
},
{
"epoch": 1.62,
"learning_rate": 3.813453752725008e-06,
"loss": 0.0722,
"step": 20790
},
{
"epoch": 1.62,
"learning_rate": 3.805668016194332e-06,
"loss": 0.0785,
"step": 20800
},
{
"epoch": 1.62,
"learning_rate": 3.7978822796636565e-06,
"loss": 0.0789,
"step": 20810
},
{
"epoch": 1.62,
"learning_rate": 3.7900965431329806e-06,
"loss": 0.0773,
"step": 20820
},
{
"epoch": 1.62,
"learning_rate": 3.782310806602305e-06,
"loss": 0.0637,
"step": 20830
},
{
"epoch": 1.62,
"learning_rate": 3.774525070071629e-06,
"loss": 0.0512,
"step": 20840
},
{
"epoch": 1.62,
"learning_rate": 3.7667393335409535e-06,
"loss": 0.0735,
"step": 20850
},
{
"epoch": 1.62,
"learning_rate": 3.7589535970102775e-06,
"loss": 0.0617,
"step": 20860
},
{
"epoch": 1.62,
"learning_rate": 3.7511678604796016e-06,
"loss": 0.0548,
"step": 20870
},
{
"epoch": 1.63,
"learning_rate": 3.743382123948926e-06,
"loss": 0.0762,
"step": 20880
},
{
"epoch": 1.63,
"learning_rate": 3.73559638741825e-06,
"loss": 0.0502,
"step": 20890
},
{
"epoch": 1.63,
"learning_rate": 3.7278106508875745e-06,
"loss": 0.0757,
"step": 20900
},
{
"epoch": 1.63,
"learning_rate": 3.7200249143568985e-06,
"loss": 0.0631,
"step": 20910
},
{
"epoch": 1.63,
"learning_rate": 3.712239177826223e-06,
"loss": 0.0561,
"step": 20920
},
{
"epoch": 1.63,
"learning_rate": 3.704453441295547e-06,
"loss": 0.0517,
"step": 20930
},
{
"epoch": 1.63,
"learning_rate": 3.6966677047648706e-06,
"loss": 0.048,
"step": 20940
},
{
"epoch": 1.63,
"learning_rate": 3.6888819682341955e-06,
"loss": 0.0684,
"step": 20950
},
{
"epoch": 1.63,
"learning_rate": 3.681096231703519e-06,
"loss": 0.0685,
"step": 20960
},
{
"epoch": 1.63,
"learning_rate": 3.673310495172844e-06,
"loss": 0.0593,
"step": 20970
},
{
"epoch": 1.63,
"learning_rate": 3.6655247586421676e-06,
"loss": 0.0667,
"step": 20980
},
{
"epoch": 1.63,
"learning_rate": 3.6577390221114924e-06,
"loss": 0.068,
"step": 20990
},
{
"epoch": 1.64,
"learning_rate": 3.649953285580816e-06,
"loss": 0.059,
"step": 21000
},
{
"epoch": 1.64,
"learning_rate": 3.6421675490501405e-06,
"loss": 0.0627,
"step": 21010
},
{
"epoch": 1.64,
"learning_rate": 3.6343818125194645e-06,
"loss": 0.0708,
"step": 21020
},
{
"epoch": 1.64,
"learning_rate": 3.6265960759887886e-06,
"loss": 0.0581,
"step": 21030
},
{
"epoch": 1.64,
"learning_rate": 3.618810339458113e-06,
"loss": 0.0885,
"step": 21040
},
{
"epoch": 1.64,
"learning_rate": 3.611024602927437e-06,
"loss": 0.0644,
"step": 21050
},
{
"epoch": 1.64,
"learning_rate": 3.6032388663967615e-06,
"loss": 0.0666,
"step": 21060
},
{
"epoch": 1.64,
"learning_rate": 3.5954531298660855e-06,
"loss": 0.0569,
"step": 21070
},
{
"epoch": 1.64,
"learning_rate": 3.58766739333541e-06,
"loss": 0.0582,
"step": 21080
},
{
"epoch": 1.64,
"learning_rate": 3.579881656804734e-06,
"loss": 0.0694,
"step": 21090
},
{
"epoch": 1.64,
"learning_rate": 3.5720959202740585e-06,
"loss": 0.075,
"step": 21100
},
{
"epoch": 1.64,
"learning_rate": 3.5643101837433825e-06,
"loss": 0.0667,
"step": 21110
},
{
"epoch": 1.64,
"learning_rate": 3.5565244472127065e-06,
"loss": 0.047,
"step": 21120
},
{
"epoch": 1.65,
"learning_rate": 3.548738710682031e-06,
"loss": 0.0575,
"step": 21130
},
{
"epoch": 1.65,
"learning_rate": 3.540952974151355e-06,
"loss": 0.0633,
"step": 21140
},
{
"epoch": 1.65,
"learning_rate": 3.5331672376206794e-06,
"loss": 0.0762,
"step": 21150
},
{
"epoch": 1.65,
"learning_rate": 3.5253815010900035e-06,
"loss": 0.0584,
"step": 21160
},
{
"epoch": 1.65,
"learning_rate": 3.517595764559328e-06,
"loss": 0.0767,
"step": 21170
},
{
"epoch": 1.65,
"learning_rate": 3.509810028028652e-06,
"loss": 0.0565,
"step": 21180
},
{
"epoch": 1.65,
"learning_rate": 3.5020242914979756e-06,
"loss": 0.0597,
"step": 21190
},
{
"epoch": 1.65,
"learning_rate": 3.4942385549673004e-06,
"loss": 0.0615,
"step": 21200
},
{
"epoch": 1.65,
"learning_rate": 3.486452818436624e-06,
"loss": 0.0759,
"step": 21210
},
{
"epoch": 1.65,
"learning_rate": 3.478667081905949e-06,
"loss": 0.0709,
"step": 21220
},
{
"epoch": 1.65,
"learning_rate": 3.4708813453752725e-06,
"loss": 0.0482,
"step": 21230
},
{
"epoch": 1.65,
"learning_rate": 3.463095608844597e-06,
"loss": 0.0536,
"step": 21240
},
{
"epoch": 1.65,
"learning_rate": 3.455309872313921e-06,
"loss": 0.0625,
"step": 21250
},
{
"epoch": 1.66,
"learning_rate": 3.4475241357832455e-06,
"loss": 0.0656,
"step": 21260
},
{
"epoch": 1.66,
"learning_rate": 3.4397383992525695e-06,
"loss": 0.0585,
"step": 21270
},
{
"epoch": 1.66,
"learning_rate": 3.4319526627218935e-06,
"loss": 0.0724,
"step": 21280
},
{
"epoch": 1.66,
"learning_rate": 3.424166926191218e-06,
"loss": 0.0817,
"step": 21290
},
{
"epoch": 1.66,
"learning_rate": 3.416381189660542e-06,
"loss": 0.0846,
"step": 21300
},
{
"epoch": 1.66,
"learning_rate": 3.4085954531298664e-06,
"loss": 0.0793,
"step": 21310
},
{
"epoch": 1.66,
"learning_rate": 3.4008097165991905e-06,
"loss": 0.0558,
"step": 21320
},
{
"epoch": 1.66,
"learning_rate": 3.393023980068515e-06,
"loss": 0.0817,
"step": 21330
},
{
"epoch": 1.66,
"learning_rate": 3.385238243537839e-06,
"loss": 0.0643,
"step": 21340
},
{
"epoch": 1.66,
"learning_rate": 3.3774525070071634e-06,
"loss": 0.0625,
"step": 21350
},
{
"epoch": 1.66,
"learning_rate": 3.3696667704764874e-06,
"loss": 0.052,
"step": 21360
},
{
"epoch": 1.66,
"learning_rate": 3.3618810339458115e-06,
"loss": 0.0811,
"step": 21370
},
{
"epoch": 1.66,
"learning_rate": 3.354095297415136e-06,
"loss": 0.0507,
"step": 21380
},
{
"epoch": 1.67,
"learning_rate": 3.34630956088446e-06,
"loss": 0.0736,
"step": 21390
},
{
"epoch": 1.67,
"learning_rate": 3.3385238243537844e-06,
"loss": 0.0615,
"step": 21400
},
{
"epoch": 1.67,
"learning_rate": 3.3307380878231084e-06,
"loss": 0.0487,
"step": 21410
},
{
"epoch": 1.67,
"learning_rate": 3.322952351292433e-06,
"loss": 0.0606,
"step": 21420
},
{
"epoch": 1.67,
"learning_rate": 3.315166614761757e-06,
"loss": 0.06,
"step": 21430
},
{
"epoch": 1.67,
"learning_rate": 3.3073808782310805e-06,
"loss": 0.0644,
"step": 21440
},
{
"epoch": 1.67,
"learning_rate": 3.2995951417004054e-06,
"loss": 0.0583,
"step": 21450
},
{
"epoch": 1.67,
"learning_rate": 3.291809405169729e-06,
"loss": 0.0763,
"step": 21460
},
{
"epoch": 1.67,
"learning_rate": 3.2840236686390534e-06,
"loss": 0.059,
"step": 21470
},
{
"epoch": 1.67,
"learning_rate": 3.2762379321083775e-06,
"loss": 0.0718,
"step": 21480
},
{
"epoch": 1.67,
"learning_rate": 3.268452195577702e-06,
"loss": 0.0708,
"step": 21490
},
{
"epoch": 1.67,
"learning_rate": 3.260666459047026e-06,
"loss": 0.08,
"step": 21500
},
{
"epoch": 1.67,
"learning_rate": 3.2528807225163504e-06,
"loss": 0.069,
"step": 21510
},
{
"epoch": 1.68,
"learning_rate": 3.2450949859856744e-06,
"loss": 0.0592,
"step": 21520
},
{
"epoch": 1.68,
"learning_rate": 3.2373092494549985e-06,
"loss": 0.0589,
"step": 21530
},
{
"epoch": 1.68,
"learning_rate": 3.229523512924323e-06,
"loss": 0.0703,
"step": 21540
},
{
"epoch": 1.68,
"learning_rate": 3.221737776393647e-06,
"loss": 0.0549,
"step": 21550
},
{
"epoch": 1.68,
"learning_rate": 3.2139520398629714e-06,
"loss": 0.0663,
"step": 21560
},
{
"epoch": 1.68,
"learning_rate": 3.2061663033322954e-06,
"loss": 0.0913,
"step": 21570
},
{
"epoch": 1.68,
"learning_rate": 3.19838056680162e-06,
"loss": 0.0677,
"step": 21580
},
{
"epoch": 1.68,
"learning_rate": 3.190594830270944e-06,
"loss": 0.0821,
"step": 21590
},
{
"epoch": 1.68,
"learning_rate": 3.1828090937402684e-06,
"loss": 0.0569,
"step": 21600
},
{
"epoch": 1.68,
"learning_rate": 3.1750233572095924e-06,
"loss": 0.0686,
"step": 21610
},
{
"epoch": 1.68,
"learning_rate": 3.1672376206789164e-06,
"loss": 0.0636,
"step": 21620
},
{
"epoch": 1.68,
"learning_rate": 3.159451884148241e-06,
"loss": 0.0526,
"step": 21630
},
{
"epoch": 1.68,
"learning_rate": 3.151666147617565e-06,
"loss": 0.0579,
"step": 21640
},
{
"epoch": 1.69,
"learning_rate": 3.1438804110868893e-06,
"loss": 0.0526,
"step": 21650
},
{
"epoch": 1.69,
"learning_rate": 3.1360946745562134e-06,
"loss": 0.0538,
"step": 21660
},
{
"epoch": 1.69,
"learning_rate": 3.128308938025538e-06,
"loss": 0.0636,
"step": 21670
},
{
"epoch": 1.69,
"learning_rate": 3.120523201494862e-06,
"loss": 0.0528,
"step": 21680
},
{
"epoch": 1.69,
"learning_rate": 3.1127374649641855e-06,
"loss": 0.0524,
"step": 21690
},
{
"epoch": 1.69,
"learning_rate": 3.10495172843351e-06,
"loss": 0.0692,
"step": 21700
},
{
"epoch": 1.69,
"learning_rate": 3.097165991902834e-06,
"loss": 0.059,
"step": 21710
},
{
"epoch": 1.69,
"learning_rate": 3.0893802553721584e-06,
"loss": 0.0491,
"step": 21720
},
{
"epoch": 1.69,
"learning_rate": 3.0815945188414824e-06,
"loss": 0.0622,
"step": 21730
},
{
"epoch": 1.69,
"learning_rate": 3.073808782310807e-06,
"loss": 0.0707,
"step": 21740
},
{
"epoch": 1.69,
"learning_rate": 3.066023045780131e-06,
"loss": 0.0717,
"step": 21750
},
{
"epoch": 1.69,
"learning_rate": 3.0582373092494554e-06,
"loss": 0.0597,
"step": 21760
},
{
"epoch": 1.69,
"learning_rate": 3.0504515727187794e-06,
"loss": 0.0566,
"step": 21770
},
{
"epoch": 1.7,
"learning_rate": 3.0426658361881034e-06,
"loss": 0.0611,
"step": 21780
},
{
"epoch": 1.7,
"learning_rate": 3.034880099657428e-06,
"loss": 0.0599,
"step": 21790
},
{
"epoch": 1.7,
"learning_rate": 3.027094363126752e-06,
"loss": 0.0624,
"step": 21800
},
{
"epoch": 1.7,
"learning_rate": 3.0193086265960763e-06,
"loss": 0.0705,
"step": 21810
},
{
"epoch": 1.7,
"learning_rate": 3.0115228900654004e-06,
"loss": 0.0663,
"step": 21820
},
{
"epoch": 1.7,
"learning_rate": 3.003737153534725e-06,
"loss": 0.0571,
"step": 21830
},
{
"epoch": 1.7,
"learning_rate": 2.995951417004049e-06,
"loss": 0.0521,
"step": 21840
},
{
"epoch": 1.7,
"learning_rate": 2.9881656804733733e-06,
"loss": 0.072,
"step": 21850
},
{
"epoch": 1.7,
"learning_rate": 2.9803799439426973e-06,
"loss": 0.0653,
"step": 21860
},
{
"epoch": 1.7,
"learning_rate": 2.9725942074120214e-06,
"loss": 0.0642,
"step": 21870
},
{
"epoch": 1.7,
"learning_rate": 2.964808470881346e-06,
"loss": 0.0493,
"step": 21880
},
{
"epoch": 1.7,
"learning_rate": 2.95702273435067e-06,
"loss": 0.0645,
"step": 21890
},
{
"epoch": 1.71,
"learning_rate": 2.9492369978199943e-06,
"loss": 0.0505,
"step": 21900
},
{
"epoch": 1.71,
"learning_rate": 2.9414512612893183e-06,
"loss": 0.0873,
"step": 21910
},
{
"epoch": 1.71,
"learning_rate": 2.9336655247586428e-06,
"loss": 0.0579,
"step": 21920
},
{
"epoch": 1.71,
"learning_rate": 2.9258797882279664e-06,
"loss": 0.088,
"step": 21930
},
{
"epoch": 1.71,
"learning_rate": 2.9180940516972904e-06,
"loss": 0.0714,
"step": 21940
},
{
"epoch": 1.71,
"learning_rate": 2.910308315166615e-06,
"loss": 0.0774,
"step": 21950
},
{
"epoch": 1.71,
"learning_rate": 2.902522578635939e-06,
"loss": 0.0496,
"step": 21960
},
{
"epoch": 1.71,
"learning_rate": 2.8947368421052634e-06,
"loss": 0.0519,
"step": 21970
},
{
"epoch": 1.71,
"learning_rate": 2.8869511055745874e-06,
"loss": 0.0596,
"step": 21980
},
{
"epoch": 1.71,
"learning_rate": 2.879165369043912e-06,
"loss": 0.0627,
"step": 21990
},
{
"epoch": 1.71,
"learning_rate": 2.871379632513236e-06,
"loss": 0.0714,
"step": 22000
},
{
"epoch": 1.71,
"learning_rate": 2.8635938959825603e-06,
"loss": 0.0571,
"step": 22010
},
{
"epoch": 1.71,
"learning_rate": 2.8558081594518843e-06,
"loss": 0.0609,
"step": 22020
},
{
"epoch": 1.72,
"learning_rate": 2.8480224229212084e-06,
"loss": 0.0658,
"step": 22030
},
{
"epoch": 1.72,
"learning_rate": 2.840236686390533e-06,
"loss": 0.0533,
"step": 22040
},
{
"epoch": 1.72,
"learning_rate": 2.832450949859857e-06,
"loss": 0.0456,
"step": 22050
},
{
"epoch": 1.72,
"learning_rate": 2.8246652133291813e-06,
"loss": 0.06,
"step": 22060
},
{
"epoch": 1.72,
"learning_rate": 2.8168794767985053e-06,
"loss": 0.0509,
"step": 22070
},
{
"epoch": 1.72,
"learning_rate": 2.8090937402678298e-06,
"loss": 0.0561,
"step": 22080
},
{
"epoch": 1.72,
"learning_rate": 2.801308003737154e-06,
"loss": 0.0552,
"step": 22090
},
{
"epoch": 1.72,
"learning_rate": 2.7935222672064783e-06,
"loss": 0.0636,
"step": 22100
},
{
"epoch": 1.72,
"learning_rate": 2.7857365306758023e-06,
"loss": 0.0457,
"step": 22110
},
{
"epoch": 1.72,
"learning_rate": 2.7779507941451263e-06,
"loss": 0.0634,
"step": 22120
},
{
"epoch": 1.72,
"learning_rate": 2.7701650576144508e-06,
"loss": 0.0594,
"step": 22130
},
{
"epoch": 1.72,
"learning_rate": 2.762379321083775e-06,
"loss": 0.0687,
"step": 22140
},
{
"epoch": 1.72,
"learning_rate": 2.7545935845530993e-06,
"loss": 0.0634,
"step": 22150
},
{
"epoch": 1.73,
"learning_rate": 2.746807848022423e-06,
"loss": 0.0563,
"step": 22160
},
{
"epoch": 1.73,
"learning_rate": 2.7390221114917477e-06,
"loss": 0.0745,
"step": 22170
},
{
"epoch": 1.73,
"learning_rate": 2.7312363749610713e-06,
"loss": 0.0735,
"step": 22180
},
{
"epoch": 1.73,
"learning_rate": 2.7234506384303954e-06,
"loss": 0.0593,
"step": 22190
},
{
"epoch": 1.73,
"learning_rate": 2.71566490189972e-06,
"loss": 0.0509,
"step": 22200
},
{
"epoch": 1.73,
"learning_rate": 2.707879165369044e-06,
"loss": 0.0707,
"step": 22210
},
{
"epoch": 1.73,
"learning_rate": 2.7000934288383683e-06,
"loss": 0.0653,
"step": 22220
},
{
"epoch": 1.73,
"learning_rate": 2.6923076923076923e-06,
"loss": 0.0514,
"step": 22230
},
{
"epoch": 1.73,
"learning_rate": 2.6845219557770168e-06,
"loss": 0.0593,
"step": 22240
},
{
"epoch": 1.73,
"learning_rate": 2.676736219246341e-06,
"loss": 0.043,
"step": 22250
},
{
"epoch": 1.73,
"learning_rate": 2.6689504827156653e-06,
"loss": 0.0839,
"step": 22260
},
{
"epoch": 1.73,
"learning_rate": 2.6611647461849893e-06,
"loss": 0.0555,
"step": 22270
},
{
"epoch": 1.73,
"learning_rate": 2.6533790096543133e-06,
"loss": 0.0646,
"step": 22280
},
{
"epoch": 1.74,
"learning_rate": 2.6455932731236378e-06,
"loss": 0.063,
"step": 22290
},
{
"epoch": 1.74,
"learning_rate": 2.637807536592962e-06,
"loss": 0.0461,
"step": 22300
},
{
"epoch": 1.74,
"learning_rate": 2.6300218000622863e-06,
"loss": 0.0611,
"step": 22310
},
{
"epoch": 1.74,
"learning_rate": 2.6222360635316103e-06,
"loss": 0.0546,
"step": 22320
},
{
"epoch": 1.74,
"learning_rate": 2.6144503270009347e-06,
"loss": 0.0607,
"step": 22330
},
{
"epoch": 1.74,
"learning_rate": 2.6066645904702588e-06,
"loss": 0.0552,
"step": 22340
},
{
"epoch": 1.74,
"learning_rate": 2.5988788539395832e-06,
"loss": 0.0628,
"step": 22350
},
{
"epoch": 1.74,
"learning_rate": 2.5910931174089072e-06,
"loss": 0.0504,
"step": 22360
},
{
"epoch": 1.74,
"learning_rate": 2.5833073808782313e-06,
"loss": 0.0537,
"step": 22370
},
{
"epoch": 1.74,
"learning_rate": 2.5755216443475557e-06,
"loss": 0.0548,
"step": 22380
},
{
"epoch": 1.74,
"learning_rate": 2.5677359078168793e-06,
"loss": 0.0621,
"step": 22390
},
{
"epoch": 1.74,
"learning_rate": 2.559950171286204e-06,
"loss": 0.0773,
"step": 22400
},
{
"epoch": 1.74,
"learning_rate": 2.552164434755528e-06,
"loss": 0.0585,
"step": 22410
},
{
"epoch": 1.75,
"learning_rate": 2.5443786982248527e-06,
"loss": 0.0711,
"step": 22420
},
{
"epoch": 1.75,
"learning_rate": 2.5365929616941763e-06,
"loss": 0.0469,
"step": 22430
},
{
"epoch": 1.75,
"learning_rate": 2.5288072251635003e-06,
"loss": 0.0724,
"step": 22440
},
{
"epoch": 1.75,
"learning_rate": 2.5210214886328248e-06,
"loss": 0.0616,
"step": 22450
},
{
"epoch": 1.75,
"learning_rate": 2.513235752102149e-06,
"loss": 0.0506,
"step": 22460
},
{
"epoch": 1.75,
"learning_rate": 2.5054500155714733e-06,
"loss": 0.0653,
"step": 22470
},
{
"epoch": 1.75,
"learning_rate": 2.4976642790407973e-06,
"loss": 0.0543,
"step": 22480
},
{
"epoch": 1.75,
"learning_rate": 2.4898785425101217e-06,
"loss": 0.0484,
"step": 22490
},
{
"epoch": 1.75,
"learning_rate": 2.4820928059794458e-06,
"loss": 0.0549,
"step": 22500
},
{
"epoch": 1.75,
"learning_rate": 2.4743070694487702e-06,
"loss": 0.0575,
"step": 22510
},
{
"epoch": 1.75,
"learning_rate": 2.4665213329180942e-06,
"loss": 0.0619,
"step": 22520
},
{
"epoch": 1.75,
"learning_rate": 2.4587355963874183e-06,
"loss": 0.0482,
"step": 22530
},
{
"epoch": 1.75,
"learning_rate": 2.4509498598567427e-06,
"loss": 0.0511,
"step": 22540
},
{
"epoch": 1.76,
"learning_rate": 2.4431641233260668e-06,
"loss": 0.0605,
"step": 22550
},
{
"epoch": 1.76,
"learning_rate": 2.435378386795391e-06,
"loss": 0.0646,
"step": 22560
},
{
"epoch": 1.76,
"learning_rate": 2.4275926502647152e-06,
"loss": 0.0593,
"step": 22570
},
{
"epoch": 1.76,
"learning_rate": 2.4198069137340393e-06,
"loss": 0.0577,
"step": 22580
},
{
"epoch": 1.76,
"learning_rate": 2.4120211772033637e-06,
"loss": 0.0608,
"step": 22590
},
{
"epoch": 1.76,
"learning_rate": 2.4042354406726877e-06,
"loss": 0.0519,
"step": 22600
},
{
"epoch": 1.76,
"learning_rate": 2.396449704142012e-06,
"loss": 0.049,
"step": 22610
},
{
"epoch": 1.76,
"learning_rate": 2.3886639676113362e-06,
"loss": 0.0477,
"step": 22620
},
{
"epoch": 1.76,
"learning_rate": 2.3808782310806607e-06,
"loss": 0.0564,
"step": 22630
},
{
"epoch": 1.76,
"learning_rate": 2.3730924945499847e-06,
"loss": 0.05,
"step": 22640
},
{
"epoch": 1.76,
"learning_rate": 2.3653067580193087e-06,
"loss": 0.06,
"step": 22650
},
{
"epoch": 1.76,
"learning_rate": 2.3575210214886328e-06,
"loss": 0.0576,
"step": 22660
},
{
"epoch": 1.77,
"learning_rate": 2.3497352849579572e-06,
"loss": 0.072,
"step": 22670
},
{
"epoch": 1.77,
"learning_rate": 2.3419495484272812e-06,
"loss": 0.0507,
"step": 22680
},
{
"epoch": 1.77,
"learning_rate": 2.3341638118966057e-06,
"loss": 0.0557,
"step": 22690
},
{
"epoch": 1.77,
"learning_rate": 2.3263780753659297e-06,
"loss": 0.0553,
"step": 22700
},
{
"epoch": 1.77,
"learning_rate": 2.318592338835254e-06,
"loss": 0.0493,
"step": 22710
},
{
"epoch": 1.77,
"learning_rate": 2.310806602304578e-06,
"loss": 0.0688,
"step": 22720
},
{
"epoch": 1.77,
"learning_rate": 2.3030208657739022e-06,
"loss": 0.0801,
"step": 22730
},
{
"epoch": 1.77,
"learning_rate": 2.2952351292432267e-06,
"loss": 0.0608,
"step": 22740
},
{
"epoch": 1.77,
"learning_rate": 2.2874493927125507e-06,
"loss": 0.0542,
"step": 22750
},
{
"epoch": 1.77,
"learning_rate": 2.279663656181875e-06,
"loss": 0.0521,
"step": 22760
},
{
"epoch": 1.77,
"learning_rate": 2.271877919651199e-06,
"loss": 0.059,
"step": 22770
},
{
"epoch": 1.77,
"learning_rate": 2.2640921831205232e-06,
"loss": 0.0754,
"step": 22780
},
{
"epoch": 1.77,
"learning_rate": 2.2563064465898477e-06,
"loss": 0.0577,
"step": 22790
},
{
"epoch": 1.78,
"learning_rate": 2.2485207100591717e-06,
"loss": 0.0455,
"step": 22800
},
{
"epoch": 1.78,
"learning_rate": 2.240734973528496e-06,
"loss": 0.046,
"step": 22810
},
{
"epoch": 1.78,
"learning_rate": 2.23294923699782e-06,
"loss": 0.0575,
"step": 22820
},
{
"epoch": 1.78,
"learning_rate": 2.2251635004671442e-06,
"loss": 0.0499,
"step": 22830
},
{
"epoch": 1.78,
"learning_rate": 2.2173777639364687e-06,
"loss": 0.0864,
"step": 22840
},
{
"epoch": 1.78,
"learning_rate": 2.2095920274057927e-06,
"loss": 0.056,
"step": 22850
},
{
"epoch": 1.78,
"learning_rate": 2.201806290875117e-06,
"loss": 0.0756,
"step": 22860
},
{
"epoch": 1.78,
"learning_rate": 2.194020554344441e-06,
"loss": 0.049,
"step": 22870
},
{
"epoch": 1.78,
"learning_rate": 2.1862348178137656e-06,
"loss": 0.0531,
"step": 22880
},
{
"epoch": 1.78,
"learning_rate": 2.1784490812830897e-06,
"loss": 0.0495,
"step": 22890
},
{
"epoch": 1.78,
"learning_rate": 2.1706633447524137e-06,
"loss": 0.0526,
"step": 22900
},
{
"epoch": 1.78,
"learning_rate": 2.1628776082217377e-06,
"loss": 0.0588,
"step": 22910
},
{
"epoch": 1.78,
"learning_rate": 2.155091871691062e-06,
"loss": 0.0513,
"step": 22920
},
{
"epoch": 1.79,
"learning_rate": 2.147306135160386e-06,
"loss": 0.0512,
"step": 22930
},
{
"epoch": 1.79,
"learning_rate": 2.1395203986297107e-06,
"loss": 0.0594,
"step": 22940
},
{
"epoch": 1.79,
"learning_rate": 2.1317346620990347e-06,
"loss": 0.0626,
"step": 22950
},
{
"epoch": 1.79,
"learning_rate": 2.123948925568359e-06,
"loss": 0.0845,
"step": 22960
},
{
"epoch": 1.79,
"learning_rate": 2.116163189037683e-06,
"loss": 0.0489,
"step": 22970
},
{
"epoch": 1.79,
"learning_rate": 2.108377452507007e-06,
"loss": 0.0525,
"step": 22980
},
{
"epoch": 1.79,
"learning_rate": 2.1005917159763316e-06,
"loss": 0.0653,
"step": 22990
},
{
"epoch": 1.79,
"learning_rate": 2.0928059794456557e-06,
"loss": 0.0467,
"step": 23000
},
{
"epoch": 1.79,
"learning_rate": 2.0850202429149797e-06,
"loss": 0.0558,
"step": 23010
},
{
"epoch": 1.79,
"learning_rate": 2.077234506384304e-06,
"loss": 0.0544,
"step": 23020
},
{
"epoch": 1.79,
"learning_rate": 2.069448769853628e-06,
"loss": 0.0781,
"step": 23030
},
{
"epoch": 1.79,
"learning_rate": 2.0616630333229526e-06,
"loss": 0.0646,
"step": 23040
},
{
"epoch": 1.79,
"learning_rate": 2.0538772967922767e-06,
"loss": 0.0495,
"step": 23050
},
{
"epoch": 1.8,
"learning_rate": 2.046091560261601e-06,
"loss": 0.0606,
"step": 23060
},
{
"epoch": 1.8,
"learning_rate": 2.038305823730925e-06,
"loss": 0.0693,
"step": 23070
},
{
"epoch": 1.8,
"learning_rate": 2.030520087200249e-06,
"loss": 0.0549,
"step": 23080
},
{
"epoch": 1.8,
"learning_rate": 2.0227343506695736e-06,
"loss": 0.0592,
"step": 23090
},
{
"epoch": 1.8,
"learning_rate": 2.0149486141388977e-06,
"loss": 0.0567,
"step": 23100
},
{
"epoch": 1.8,
"learning_rate": 2.007162877608222e-06,
"loss": 0.0647,
"step": 23110
},
{
"epoch": 1.8,
"learning_rate": 1.999377141077546e-06,
"loss": 0.0504,
"step": 23120
},
{
"epoch": 1.8,
"learning_rate": 1.9915914045468706e-06,
"loss": 0.0566,
"step": 23130
},
{
"epoch": 1.8,
"learning_rate": 1.9838056680161946e-06,
"loss": 0.0621,
"step": 23140
},
{
"epoch": 1.8,
"learning_rate": 1.9760199314855186e-06,
"loss": 0.0541,
"step": 23150
},
{
"epoch": 1.8,
"learning_rate": 1.9682341949548427e-06,
"loss": 0.0555,
"step": 23160
},
{
"epoch": 1.8,
"learning_rate": 1.960448458424167e-06,
"loss": 0.0583,
"step": 23170
},
{
"epoch": 1.8,
"learning_rate": 1.952662721893491e-06,
"loss": 0.047,
"step": 23180
},
{
"epoch": 1.81,
"learning_rate": 1.9448769853628156e-06,
"loss": 0.0502,
"step": 23190
},
{
"epoch": 1.81,
"learning_rate": 1.9370912488321396e-06,
"loss": 0.0535,
"step": 23200
},
{
"epoch": 1.81,
"learning_rate": 1.929305512301464e-06,
"loss": 0.0561,
"step": 23210
},
{
"epoch": 1.81,
"learning_rate": 1.921519775770788e-06,
"loss": 0.0583,
"step": 23220
},
{
"epoch": 1.81,
"learning_rate": 1.913734039240112e-06,
"loss": 0.0512,
"step": 23230
},
{
"epoch": 1.81,
"learning_rate": 1.9059483027094364e-06,
"loss": 0.0499,
"step": 23240
},
{
"epoch": 1.81,
"learning_rate": 1.8981625661787606e-06,
"loss": 0.0464,
"step": 23250
},
{
"epoch": 1.81,
"learning_rate": 1.8903768296480849e-06,
"loss": 0.057,
"step": 23260
},
{
"epoch": 1.81,
"learning_rate": 1.8825910931174091e-06,
"loss": 0.0488,
"step": 23270
},
{
"epoch": 1.81,
"learning_rate": 1.8748053565867333e-06,
"loss": 0.0562,
"step": 23280
},
{
"epoch": 1.81,
"learning_rate": 1.8670196200560576e-06,
"loss": 0.0447,
"step": 23290
},
{
"epoch": 1.81,
"learning_rate": 1.8592338835253818e-06,
"loss": 0.0522,
"step": 23300
},
{
"epoch": 1.81,
"learning_rate": 1.851448146994706e-06,
"loss": 0.0425,
"step": 23310
},
{
"epoch": 1.82,
"learning_rate": 1.8436624104640299e-06,
"loss": 0.0546,
"step": 23320
},
{
"epoch": 1.82,
"learning_rate": 1.8358766739333541e-06,
"loss": 0.0551,
"step": 23330
},
{
"epoch": 1.82,
"learning_rate": 1.8280909374026784e-06,
"loss": 0.058,
"step": 23340
},
{
"epoch": 1.82,
"learning_rate": 1.8203052008720026e-06,
"loss": 0.0714,
"step": 23350
},
{
"epoch": 1.82,
"learning_rate": 1.8125194643413268e-06,
"loss": 0.0617,
"step": 23360
},
{
"epoch": 1.82,
"learning_rate": 1.804733727810651e-06,
"loss": 0.0618,
"step": 23370
},
{
"epoch": 1.82,
"learning_rate": 1.7969479912799753e-06,
"loss": 0.0504,
"step": 23380
},
{
"epoch": 1.82,
"learning_rate": 1.7891622547492996e-06,
"loss": 0.0538,
"step": 23390
},
{
"epoch": 1.82,
"learning_rate": 1.7813765182186236e-06,
"loss": 0.0561,
"step": 23400
},
{
"epoch": 1.82,
"learning_rate": 1.7735907816879478e-06,
"loss": 0.0585,
"step": 23410
},
{
"epoch": 1.82,
"learning_rate": 1.7658050451572719e-06,
"loss": 0.0659,
"step": 23420
},
{
"epoch": 1.82,
"learning_rate": 1.7580193086265961e-06,
"loss": 0.0485,
"step": 23430
},
{
"epoch": 1.82,
"learning_rate": 1.7502335720959204e-06,
"loss": 0.055,
"step": 23440
},
{
"epoch": 1.83,
"learning_rate": 1.7424478355652446e-06,
"loss": 0.0522,
"step": 23450
},
{
"epoch": 1.83,
"learning_rate": 1.7346620990345688e-06,
"loss": 0.0438,
"step": 23460
},
{
"epoch": 1.83,
"learning_rate": 1.726876362503893e-06,
"loss": 0.0501,
"step": 23470
},
{
"epoch": 1.83,
"learning_rate": 1.719090625973217e-06,
"loss": 0.0645,
"step": 23480
},
{
"epoch": 1.83,
"learning_rate": 1.7113048894425413e-06,
"loss": 0.0577,
"step": 23490
},
{
"epoch": 1.83,
"learning_rate": 1.7035191529118656e-06,
"loss": 0.0557,
"step": 23500
},
{
"epoch": 1.83,
"learning_rate": 1.6957334163811898e-06,
"loss": 0.0464,
"step": 23510
},
{
"epoch": 1.83,
"learning_rate": 1.687947679850514e-06,
"loss": 0.0498,
"step": 23520
},
{
"epoch": 1.83,
"learning_rate": 1.6801619433198383e-06,
"loss": 0.0553,
"step": 23530
},
{
"epoch": 1.83,
"learning_rate": 1.6723762067891625e-06,
"loss": 0.0639,
"step": 23540
},
{
"epoch": 1.83,
"learning_rate": 1.6645904702584868e-06,
"loss": 0.0564,
"step": 23550
},
{
"epoch": 1.83,
"learning_rate": 1.656804733727811e-06,
"loss": 0.0635,
"step": 23560
},
{
"epoch": 1.84,
"learning_rate": 1.6490189971971348e-06,
"loss": 0.0623,
"step": 23570
},
{
"epoch": 1.84,
"learning_rate": 1.641233260666459e-06,
"loss": 0.0545,
"step": 23580
},
{
"epoch": 1.84,
"learning_rate": 1.6334475241357833e-06,
"loss": 0.0678,
"step": 23590
},
{
"epoch": 1.84,
"learning_rate": 1.6256617876051076e-06,
"loss": 0.0484,
"step": 23600
},
{
"epoch": 1.84,
"learning_rate": 1.6178760510744318e-06,
"loss": 0.0545,
"step": 23610
},
{
"epoch": 1.84,
"learning_rate": 1.610090314543756e-06,
"loss": 0.0485,
"step": 23620
},
{
"epoch": 1.84,
"learning_rate": 1.6023045780130803e-06,
"loss": 0.0558,
"step": 23630
},
{
"epoch": 1.84,
"learning_rate": 1.5945188414824045e-06,
"loss": 0.0579,
"step": 23640
},
{
"epoch": 1.84,
"learning_rate": 1.5867331049517283e-06,
"loss": 0.0686,
"step": 23650
},
{
"epoch": 1.84,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.045,
"step": 23660
},
{
"epoch": 1.84,
"learning_rate": 1.5711616318903768e-06,
"loss": 0.0663,
"step": 23670
},
{
"epoch": 1.84,
"learning_rate": 1.563375895359701e-06,
"loss": 0.056,
"step": 23680
},
{
"epoch": 1.84,
"learning_rate": 1.5555901588290253e-06,
"loss": 0.0638,
"step": 23690
},
{
"epoch": 1.85,
"learning_rate": 1.5478044222983495e-06,
"loss": 0.056,
"step": 23700
},
{
"epoch": 1.85,
"learning_rate": 1.5400186857676738e-06,
"loss": 0.0558,
"step": 23710
},
{
"epoch": 1.85,
"learning_rate": 1.532232949236998e-06,
"loss": 0.0473,
"step": 23720
},
{
"epoch": 1.85,
"learning_rate": 1.524447212706322e-06,
"loss": 0.0602,
"step": 23730
},
{
"epoch": 1.85,
"learning_rate": 1.5166614761756463e-06,
"loss": 0.0457,
"step": 23740
},
{
"epoch": 1.85,
"learning_rate": 1.5088757396449705e-06,
"loss": 0.0562,
"step": 23750
},
{
"epoch": 1.85,
"learning_rate": 1.5010900031142948e-06,
"loss": 0.0479,
"step": 23760
},
{
"epoch": 1.85,
"learning_rate": 1.493304266583619e-06,
"loss": 0.0469,
"step": 23770
},
{
"epoch": 1.85,
"learning_rate": 1.4855185300529433e-06,
"loss": 0.0498,
"step": 23780
},
{
"epoch": 1.85,
"learning_rate": 1.4777327935222675e-06,
"loss": 0.0484,
"step": 23790
},
{
"epoch": 1.85,
"learning_rate": 1.4699470569915915e-06,
"loss": 0.0557,
"step": 23800
},
{
"epoch": 1.85,
"learning_rate": 1.4621613204609158e-06,
"loss": 0.0488,
"step": 23810
},
{
"epoch": 1.85,
"learning_rate": 1.4543755839302398e-06,
"loss": 0.0634,
"step": 23820
},
{
"epoch": 1.86,
"learning_rate": 1.446589847399564e-06,
"loss": 0.0568,
"step": 23830
},
{
"epoch": 1.86,
"learning_rate": 1.4388041108688883e-06,
"loss": 0.0454,
"step": 23840
},
{
"epoch": 1.86,
"learning_rate": 1.4310183743382125e-06,
"loss": 0.0615,
"step": 23850
},
{
"epoch": 1.86,
"learning_rate": 1.4232326378075368e-06,
"loss": 0.0548,
"step": 23860
},
{
"epoch": 1.86,
"learning_rate": 1.415446901276861e-06,
"loss": 0.052,
"step": 23870
},
{
"epoch": 1.86,
"learning_rate": 1.4076611647461852e-06,
"loss": 0.0526,
"step": 23880
},
{
"epoch": 1.86,
"learning_rate": 1.3998754282155095e-06,
"loss": 0.0594,
"step": 23890
},
{
"epoch": 1.86,
"learning_rate": 1.3920896916848333e-06,
"loss": 0.0643,
"step": 23900
},
{
"epoch": 1.86,
"learning_rate": 1.3843039551541575e-06,
"loss": 0.0797,
"step": 23910
},
{
"epoch": 1.86,
"learning_rate": 1.3765182186234818e-06,
"loss": 0.0487,
"step": 23920
},
{
"epoch": 1.86,
"learning_rate": 1.368732482092806e-06,
"loss": 0.0526,
"step": 23930
},
{
"epoch": 1.86,
"learning_rate": 1.3609467455621303e-06,
"loss": 0.0528,
"step": 23940
},
{
"epoch": 1.86,
"learning_rate": 1.3531610090314545e-06,
"loss": 0.0491,
"step": 23950
},
{
"epoch": 1.87,
"learning_rate": 1.3453752725007787e-06,
"loss": 0.0495,
"step": 23960
},
{
"epoch": 1.87,
"learning_rate": 1.337589535970103e-06,
"loss": 0.0685,
"step": 23970
},
{
"epoch": 1.87,
"learning_rate": 1.329803799439427e-06,
"loss": 0.0624,
"step": 23980
},
{
"epoch": 1.87,
"learning_rate": 1.3220180629087512e-06,
"loss": 0.0419,
"step": 23990
},
{
"epoch": 1.87,
"learning_rate": 1.3142323263780755e-06,
"loss": 0.0822,
"step": 24000
},
{
"epoch": 1.87,
"learning_rate": 1.3064465898473997e-06,
"loss": 0.0649,
"step": 24010
},
{
"epoch": 1.87,
"learning_rate": 1.298660853316724e-06,
"loss": 0.0521,
"step": 24020
},
{
"epoch": 1.87,
"learning_rate": 1.290875116786048e-06,
"loss": 0.0432,
"step": 24030
},
{
"epoch": 1.87,
"learning_rate": 1.2830893802553722e-06,
"loss": 0.0511,
"step": 24040
},
{
"epoch": 1.87,
"learning_rate": 1.2753036437246965e-06,
"loss": 0.0467,
"step": 24050
},
{
"epoch": 1.87,
"learning_rate": 1.2675179071940207e-06,
"loss": 0.0554,
"step": 24060
},
{
"epoch": 1.87,
"learning_rate": 1.2597321706633447e-06,
"loss": 0.0483,
"step": 24070
},
{
"epoch": 1.87,
"learning_rate": 1.251946434132669e-06,
"loss": 0.0533,
"step": 24080
},
{
"epoch": 1.88,
"learning_rate": 1.2441606976019932e-06,
"loss": 0.0481,
"step": 24090
},
{
"epoch": 1.88,
"learning_rate": 1.2363749610713175e-06,
"loss": 0.054,
"step": 24100
},
{
"epoch": 1.88,
"learning_rate": 1.2285892245406417e-06,
"loss": 0.0536,
"step": 24110
},
{
"epoch": 1.88,
"learning_rate": 1.220803488009966e-06,
"loss": 0.0509,
"step": 24120
},
{
"epoch": 1.88,
"learning_rate": 1.21301775147929e-06,
"loss": 0.0462,
"step": 24130
},
{
"epoch": 1.88,
"learning_rate": 1.2052320149486142e-06,
"loss": 0.0478,
"step": 24140
},
{
"epoch": 1.88,
"learning_rate": 1.1974462784179385e-06,
"loss": 0.0488,
"step": 24150
},
{
"epoch": 1.88,
"learning_rate": 1.1896605418872627e-06,
"loss": 0.057,
"step": 24160
},
{
"epoch": 1.88,
"learning_rate": 1.1818748053565867e-06,
"loss": 0.0469,
"step": 24170
},
{
"epoch": 1.88,
"learning_rate": 1.174089068825911e-06,
"loss": 0.054,
"step": 24180
},
{
"epoch": 1.88,
"learning_rate": 1.1663033322952352e-06,
"loss": 0.0527,
"step": 24190
},
{
"epoch": 1.88,
"learning_rate": 1.1585175957645595e-06,
"loss": 0.057,
"step": 24200
},
{
"epoch": 1.88,
"learning_rate": 1.1507318592338835e-06,
"loss": 0.0508,
"step": 24210
},
{
"epoch": 1.89,
"learning_rate": 1.1429461227032077e-06,
"loss": 0.0526,
"step": 24220
},
{
"epoch": 1.89,
"learning_rate": 1.135160386172532e-06,
"loss": 0.0578,
"step": 24230
},
{
"epoch": 1.89,
"learning_rate": 1.1273746496418562e-06,
"loss": 0.0585,
"step": 24240
},
{
"epoch": 1.89,
"learning_rate": 1.1195889131111804e-06,
"loss": 0.0459,
"step": 24250
},
{
"epoch": 1.89,
"learning_rate": 1.1118031765805045e-06,
"loss": 0.05,
"step": 24260
},
{
"epoch": 1.89,
"learning_rate": 1.1040174400498287e-06,
"loss": 0.0441,
"step": 24270
},
{
"epoch": 1.89,
"learning_rate": 1.096231703519153e-06,
"loss": 0.0597,
"step": 24280
},
{
"epoch": 1.89,
"learning_rate": 1.0884459669884772e-06,
"loss": 0.0609,
"step": 24290
},
{
"epoch": 1.89,
"learning_rate": 1.0806602304578014e-06,
"loss": 0.0493,
"step": 24300
},
{
"epoch": 1.89,
"learning_rate": 1.0728744939271257e-06,
"loss": 0.0494,
"step": 24310
},
{
"epoch": 1.89,
"learning_rate": 1.06508875739645e-06,
"loss": 0.0537,
"step": 24320
},
{
"epoch": 1.89,
"learning_rate": 1.0573030208657742e-06,
"loss": 0.0433,
"step": 24330
},
{
"epoch": 1.9,
"learning_rate": 1.0495172843350982e-06,
"loss": 0.052,
"step": 24340
},
{
"epoch": 1.9,
"learning_rate": 1.0417315478044224e-06,
"loss": 0.0463,
"step": 24350
},
{
"epoch": 1.9,
"learning_rate": 1.0339458112737467e-06,
"loss": 0.0498,
"step": 24360
},
{
"epoch": 1.9,
"learning_rate": 1.026160074743071e-06,
"loss": 0.0515,
"step": 24370
},
{
"epoch": 1.9,
"learning_rate": 1.018374338212395e-06,
"loss": 0.0493,
"step": 24380
},
{
"epoch": 1.9,
"learning_rate": 1.0105886016817192e-06,
"loss": 0.0601,
"step": 24390
},
{
"epoch": 1.9,
"learning_rate": 1.0028028651510434e-06,
"loss": 0.0566,
"step": 24400
},
{
"epoch": 1.9,
"learning_rate": 9.950171286203677e-07,
"loss": 0.0496,
"step": 24410
},
{
"epoch": 1.9,
"learning_rate": 9.872313920896917e-07,
"loss": 0.0473,
"step": 24420
},
{
"epoch": 1.9,
"learning_rate": 9.79445655559016e-07,
"loss": 0.0538,
"step": 24430
},
{
"epoch": 1.9,
"learning_rate": 9.716599190283402e-07,
"loss": 0.046,
"step": 24440
},
{
"epoch": 1.9,
"learning_rate": 9.638741824976644e-07,
"loss": 0.05,
"step": 24450
},
{
"epoch": 1.9,
"learning_rate": 9.560884459669884e-07,
"loss": 0.0656,
"step": 24460
},
{
"epoch": 1.91,
"learning_rate": 9.483027094363127e-07,
"loss": 0.0433,
"step": 24470
},
{
"epoch": 1.91,
"learning_rate": 9.405169729056369e-07,
"loss": 0.0466,
"step": 24480
},
{
"epoch": 1.91,
"learning_rate": 9.327312363749612e-07,
"loss": 0.0493,
"step": 24490
},
{
"epoch": 1.91,
"learning_rate": 9.249454998442853e-07,
"loss": 0.0453,
"step": 24500
},
{
"epoch": 1.91,
"learning_rate": 9.171597633136095e-07,
"loss": 0.0536,
"step": 24510
},
{
"epoch": 1.91,
"learning_rate": 9.093740267829338e-07,
"loss": 0.0484,
"step": 24520
},
{
"epoch": 1.91,
"learning_rate": 9.01588290252258e-07,
"loss": 0.0514,
"step": 24530
},
{
"epoch": 1.91,
"learning_rate": 8.938025537215823e-07,
"loss": 0.0488,
"step": 24540
},
{
"epoch": 1.91,
"learning_rate": 8.860168171909063e-07,
"loss": 0.0458,
"step": 24550
},
{
"epoch": 1.91,
"learning_rate": 8.782310806602305e-07,
"loss": 0.0555,
"step": 24560
},
{
"epoch": 1.91,
"learning_rate": 8.704453441295548e-07,
"loss": 0.0469,
"step": 24570
},
{
"epoch": 1.91,
"learning_rate": 8.62659607598879e-07,
"loss": 0.0444,
"step": 24580
},
{
"epoch": 1.91,
"learning_rate": 8.54873871068203e-07,
"loss": 0.0475,
"step": 24590
},
{
"epoch": 1.92,
"learning_rate": 8.470881345375273e-07,
"loss": 0.0501,
"step": 24600
},
{
"epoch": 1.92,
"learning_rate": 8.393023980068515e-07,
"loss": 0.0474,
"step": 24610
},
{
"epoch": 1.92,
"learning_rate": 8.315166614761758e-07,
"loss": 0.0438,
"step": 24620
},
{
"epoch": 1.92,
"learning_rate": 8.237309249454999e-07,
"loss": 0.0552,
"step": 24630
},
{
"epoch": 1.92,
"learning_rate": 8.159451884148241e-07,
"loss": 0.046,
"step": 24640
},
{
"epoch": 1.92,
"learning_rate": 8.081594518841484e-07,
"loss": 0.05,
"step": 24650
},
{
"epoch": 1.92,
"learning_rate": 8.003737153534725e-07,
"loss": 0.0518,
"step": 24660
},
{
"epoch": 1.92,
"learning_rate": 7.925879788227966e-07,
"loss": 0.0492,
"step": 24670
},
{
"epoch": 1.92,
"learning_rate": 7.848022422921209e-07,
"loss": 0.0509,
"step": 24680
},
{
"epoch": 1.92,
"learning_rate": 7.770165057614451e-07,
"loss": 0.0511,
"step": 24690
},
{
"epoch": 1.92,
"learning_rate": 7.692307692307694e-07,
"loss": 0.0505,
"step": 24700
},
{
"epoch": 1.92,
"learning_rate": 7.614450327000934e-07,
"loss": 0.043,
"step": 24710
},
{
"epoch": 1.92,
"learning_rate": 7.536592961694176e-07,
"loss": 0.0521,
"step": 24720
},
{
"epoch": 1.93,
"learning_rate": 7.458735596387419e-07,
"loss": 0.0427,
"step": 24730
},
{
"epoch": 1.93,
"learning_rate": 7.380878231080661e-07,
"loss": 0.0721,
"step": 24740
},
{
"epoch": 1.93,
"learning_rate": 7.303020865773902e-07,
"loss": 0.0533,
"step": 24750
},
{
"epoch": 1.93,
"learning_rate": 7.225163500467145e-07,
"loss": 0.052,
"step": 24760
},
{
"epoch": 1.93,
"learning_rate": 7.147306135160387e-07,
"loss": 0.0488,
"step": 24770
},
{
"epoch": 1.93,
"learning_rate": 7.069448769853629e-07,
"loss": 0.0465,
"step": 24780
},
{
"epoch": 1.93,
"learning_rate": 6.991591404546871e-07,
"loss": 0.0504,
"step": 24790
},
{
"epoch": 1.93,
"learning_rate": 6.913734039240112e-07,
"loss": 0.0436,
"step": 24800
},
{
"epoch": 1.93,
"learning_rate": 6.835876673933355e-07,
"loss": 0.0696,
"step": 24810
},
{
"epoch": 1.93,
"learning_rate": 6.758019308626597e-07,
"loss": 0.0497,
"step": 24820
},
{
"epoch": 1.93,
"learning_rate": 6.68016194331984e-07,
"loss": 0.0512,
"step": 24830
},
{
"epoch": 1.93,
"learning_rate": 6.60230457801308e-07,
"loss": 0.0539,
"step": 24840
},
{
"epoch": 1.93,
"learning_rate": 6.524447212706322e-07,
"loss": 0.0436,
"step": 24850
},
{
"epoch": 1.94,
"learning_rate": 6.446589847399565e-07,
"loss": 0.0455,
"step": 24860
},
{
"epoch": 1.94,
"learning_rate": 6.368732482092807e-07,
"loss": 0.0483,
"step": 24870
},
{
"epoch": 1.94,
"learning_rate": 6.290875116786048e-07,
"loss": 0.0462,
"step": 24880
},
{
"epoch": 1.94,
"learning_rate": 6.21301775147929e-07,
"loss": 0.0469,
"step": 24890
},
{
"epoch": 1.94,
"learning_rate": 6.135160386172532e-07,
"loss": 0.0565,
"step": 24900
},
{
"epoch": 1.94,
"learning_rate": 6.057303020865775e-07,
"loss": 0.0503,
"step": 24910
},
{
"epoch": 1.94,
"learning_rate": 5.979445655559017e-07,
"loss": 0.0479,
"step": 24920
},
{
"epoch": 1.94,
"learning_rate": 5.901588290252258e-07,
"loss": 0.0553,
"step": 24930
},
{
"epoch": 1.94,
"learning_rate": 5.823730924945501e-07,
"loss": 0.05,
"step": 24940
},
{
"epoch": 1.94,
"learning_rate": 5.745873559638742e-07,
"loss": 0.0501,
"step": 24950
},
{
"epoch": 1.94,
"learning_rate": 5.668016194331984e-07,
"loss": 0.0461,
"step": 24960
},
{
"epoch": 1.94,
"learning_rate": 5.590158829025226e-07,
"loss": 0.0465,
"step": 24970
},
{
"epoch": 1.94,
"learning_rate": 5.512301463718468e-07,
"loss": 0.0527,
"step": 24980
},
{
"epoch": 1.95,
"learning_rate": 5.43444409841171e-07,
"loss": 0.049,
"step": 24990
},
{
"epoch": 1.95,
"learning_rate": 5.356586733104952e-07,
"loss": 0.0606,
"step": 25000
},
{
"epoch": 1.95,
"learning_rate": 5.278729367798194e-07,
"loss": 0.0497,
"step": 25010
},
{
"epoch": 1.95,
"learning_rate": 5.200872002491436e-07,
"loss": 0.046,
"step": 25020
},
{
"epoch": 1.95,
"learning_rate": 5.123014637184678e-07,
"loss": 0.0531,
"step": 25030
},
{
"epoch": 1.95,
"learning_rate": 5.045157271877921e-07,
"loss": 0.0456,
"step": 25040
},
{
"epoch": 1.95,
"learning_rate": 4.967299906571162e-07,
"loss": 0.0539,
"step": 25050
},
{
"epoch": 1.95,
"learning_rate": 4.889442541264404e-07,
"loss": 0.0565,
"step": 25060
},
{
"epoch": 1.95,
"learning_rate": 4.811585175957647e-07,
"loss": 0.0444,
"step": 25070
},
{
"epoch": 1.95,
"learning_rate": 4.733727810650888e-07,
"loss": 0.0467,
"step": 25080
},
{
"epoch": 1.95,
"learning_rate": 4.65587044534413e-07,
"loss": 0.0465,
"step": 25090
},
{
"epoch": 1.95,
"learning_rate": 4.578013080037372e-07,
"loss": 0.0434,
"step": 25100
},
{
"epoch": 1.95,
"learning_rate": 4.500155714730614e-07,
"loss": 0.046,
"step": 25110
},
{
"epoch": 1.96,
"learning_rate": 4.4222983494238555e-07,
"loss": 0.0437,
"step": 25120
},
{
"epoch": 1.96,
"learning_rate": 4.344440984117098e-07,
"loss": 0.0507,
"step": 25130
},
{
"epoch": 1.96,
"learning_rate": 4.26658361881034e-07,
"loss": 0.046,
"step": 25140
},
{
"epoch": 1.96,
"learning_rate": 4.1887262535035817e-07,
"loss": 0.0529,
"step": 25150
},
{
"epoch": 1.96,
"learning_rate": 4.1108688881968236e-07,
"loss": 0.0452,
"step": 25160
},
{
"epoch": 1.96,
"learning_rate": 4.033011522890066e-07,
"loss": 0.0502,
"step": 25170
},
{
"epoch": 1.96,
"learning_rate": 3.9551541575833073e-07,
"loss": 0.0535,
"step": 25180
},
{
"epoch": 1.96,
"learning_rate": 3.8772967922765497e-07,
"loss": 0.0493,
"step": 25190
},
{
"epoch": 1.96,
"learning_rate": 3.7994394269697916e-07,
"loss": 0.0482,
"step": 25200
},
{
"epoch": 1.96,
"learning_rate": 3.7215820616630335e-07,
"loss": 0.0464,
"step": 25210
},
{
"epoch": 1.96,
"learning_rate": 3.6437246963562754e-07,
"loss": 0.0455,
"step": 25220
},
{
"epoch": 1.96,
"learning_rate": 3.565867331049518e-07,
"loss": 0.0492,
"step": 25230
},
{
"epoch": 1.97,
"learning_rate": 3.488009965742759e-07,
"loss": 0.0479,
"step": 25240
},
{
"epoch": 1.97,
"learning_rate": 3.4101526004360015e-07,
"loss": 0.0455,
"step": 25250
},
{
"epoch": 1.97,
"learning_rate": 3.332295235129244e-07,
"loss": 0.0465,
"step": 25260
},
{
"epoch": 1.97,
"learning_rate": 3.254437869822485e-07,
"loss": 0.0505,
"step": 25270
},
{
"epoch": 1.97,
"learning_rate": 3.1765805045157277e-07,
"loss": 0.0576,
"step": 25280
},
{
"epoch": 1.97,
"learning_rate": 3.0987231392089695e-07,
"loss": 0.0439,
"step": 25290
},
{
"epoch": 1.97,
"learning_rate": 3.0208657739022114e-07,
"loss": 0.0429,
"step": 25300
},
{
"epoch": 1.97,
"learning_rate": 2.9430084085954533e-07,
"loss": 0.0464,
"step": 25310
},
{
"epoch": 1.97,
"learning_rate": 2.865151043288695e-07,
"loss": 0.0443,
"step": 25320
},
{
"epoch": 1.97,
"learning_rate": 2.787293677981937e-07,
"loss": 0.0475,
"step": 25330
},
{
"epoch": 1.97,
"learning_rate": 2.709436312675179e-07,
"loss": 0.0553,
"step": 25340
},
{
"epoch": 1.97,
"learning_rate": 2.6315789473684213e-07,
"loss": 0.0485,
"step": 25350
},
{
"epoch": 1.97,
"learning_rate": 2.553721582061663e-07,
"loss": 0.0471,
"step": 25360
},
{
"epoch": 1.98,
"learning_rate": 2.4758642167549056e-07,
"loss": 0.0511,
"step": 25370
},
{
"epoch": 1.98,
"learning_rate": 2.3980068514481475e-07,
"loss": 0.0455,
"step": 25380
},
{
"epoch": 1.98,
"learning_rate": 2.3201494861413894e-07,
"loss": 0.0517,
"step": 25390
},
{
"epoch": 1.98,
"learning_rate": 2.2422921208346312e-07,
"loss": 0.0553,
"step": 25400
},
{
"epoch": 1.98,
"learning_rate": 2.164434755527873e-07,
"loss": 0.0453,
"step": 25410
},
{
"epoch": 1.98,
"learning_rate": 2.0865773902211152e-07,
"loss": 0.0576,
"step": 25420
},
{
"epoch": 1.98,
"learning_rate": 2.008720024914357e-07,
"loss": 0.0471,
"step": 25430
},
{
"epoch": 1.98,
"learning_rate": 1.930862659607599e-07,
"loss": 0.0471,
"step": 25440
},
{
"epoch": 1.98,
"learning_rate": 1.8530052943008411e-07,
"loss": 0.05,
"step": 25450
},
{
"epoch": 1.98,
"learning_rate": 1.775147928994083e-07,
"loss": 0.0434,
"step": 25460
},
{
"epoch": 1.98,
"learning_rate": 1.697290563687325e-07,
"loss": 0.0439,
"step": 25470
},
{
"epoch": 1.98,
"learning_rate": 1.619433198380567e-07,
"loss": 0.0468,
"step": 25480
},
{
"epoch": 1.98,
"learning_rate": 1.541575833073809e-07,
"loss": 0.0457,
"step": 25490
},
{
"epoch": 1.99,
"learning_rate": 1.4637184677670508e-07,
"loss": 0.0457,
"step": 25500
},
{
"epoch": 1.99,
"learning_rate": 1.385861102460293e-07,
"loss": 0.0485,
"step": 25510
},
{
"epoch": 1.99,
"learning_rate": 1.3080037371535348e-07,
"loss": 0.0418,
"step": 25520
},
{
"epoch": 1.99,
"learning_rate": 1.2301463718467767e-07,
"loss": 0.0558,
"step": 25530
},
{
"epoch": 1.99,
"learning_rate": 1.1522890065400187e-07,
"loss": 0.0544,
"step": 25540
},
{
"epoch": 1.99,
"learning_rate": 1.0744316412332607e-07,
"loss": 0.048,
"step": 25550
},
{
"epoch": 1.99,
"learning_rate": 9.965742759265028e-08,
"loss": 0.0439,
"step": 25560
},
{
"epoch": 1.99,
"learning_rate": 9.187169106197447e-08,
"loss": 0.0493,
"step": 25570
},
{
"epoch": 1.99,
"learning_rate": 8.408595453129867e-08,
"loss": 0.0447,
"step": 25580
},
{
"epoch": 1.99,
"learning_rate": 7.630021800062287e-08,
"loss": 0.041,
"step": 25590
},
{
"epoch": 1.99,
"learning_rate": 6.851448146994706e-08,
"loss": 0.047,
"step": 25600
},
{
"epoch": 1.99,
"learning_rate": 6.072874493927126e-08,
"loss": 0.0484,
"step": 25610
},
{
"epoch": 1.99,
"learning_rate": 5.2943008408595454e-08,
"loss": 0.0442,
"step": 25620
},
{
"epoch": 2.0,
"learning_rate": 4.5157271877919655e-08,
"loss": 0.0501,
"step": 25630
},
{
"epoch": 2.0,
"learning_rate": 3.7371535347243856e-08,
"loss": 0.0449,
"step": 25640
},
{
"epoch": 2.0,
"learning_rate": 2.958579881656805e-08,
"loss": 0.0474,
"step": 25650
},
{
"epoch": 2.0,
"learning_rate": 2.1800062285892244e-08,
"loss": 0.0452,
"step": 25660
},
{
"epoch": 2.0,
"learning_rate": 1.4014325755216445e-08,
"loss": 0.0536,
"step": 25670
},
{
"epoch": 2.0,
"learning_rate": 6.228589224540643e-09,
"loss": 0.0451,
"step": 25680
}
],
"max_steps": 25688,
"num_train_epochs": 2,
"total_flos": 1.9075085981712384e+17,
"trial_name": null,
"trial_params": null
}