AlignKD-Pretrain-1246k / trainer_state.json
jsun39's picture
Upload checkpoint for jsun39/AlignKD-Pretrain-1246k
9276bae verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999550887638503,
"global_step": 9741,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.0,
"loss": 12.8749,
"step": 5
},
{
"epoch": 0.0,
"learning_rate": 3.412969283276451e-07,
"loss": 12.095,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 6.825938566552902e-07,
"loss": 8.3313,
"step": 15
},
{
"epoch": 0.0,
"learning_rate": 1.0238907849829352e-06,
"loss": 7.0655,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 1.3651877133105804e-06,
"loss": 6.626,
"step": 25
},
{
"epoch": 0.0,
"learning_rate": 1.7064846416382255e-06,
"loss": 6.3086,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 2.0477815699658705e-06,
"loss": 6.0114,
"step": 35
},
{
"epoch": 0.0,
"learning_rate": 2.3890784982935154e-06,
"loss": 5.923,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 2.7303754266211608e-06,
"loss": 5.8138,
"step": 45
},
{
"epoch": 0.01,
"learning_rate": 3.0716723549488057e-06,
"loss": 5.7112,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 3.412969283276451e-06,
"loss": 5.6069,
"step": 55
},
{
"epoch": 0.01,
"learning_rate": 3.7542662116040956e-06,
"loss": 5.5401,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 4.095563139931741e-06,
"loss": 5.4575,
"step": 65
},
{
"epoch": 0.01,
"learning_rate": 4.436860068259386e-06,
"loss": 5.3794,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 4.778156996587031e-06,
"loss": 5.331,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 5.119453924914676e-06,
"loss": 5.2926,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 5.4607508532423215e-06,
"loss": 5.2757,
"step": 85
},
{
"epoch": 0.01,
"learning_rate": 5.802047781569966e-06,
"loss": 5.2307,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 6.143344709897611e-06,
"loss": 5.1463,
"step": 95
},
{
"epoch": 0.01,
"learning_rate": 6.484641638225257e-06,
"loss": 5.0758,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 6.825938566552902e-06,
"loss": 5.053,
"step": 105
},
{
"epoch": 0.01,
"learning_rate": 7.167235494880547e-06,
"loss": 5.0551,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 7.508532423208191e-06,
"loss": 5.0355,
"step": 115
},
{
"epoch": 0.01,
"learning_rate": 7.849829351535837e-06,
"loss": 4.9796,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 8.191126279863482e-06,
"loss": 4.951,
"step": 125
},
{
"epoch": 0.01,
"learning_rate": 8.532423208191128e-06,
"loss": 4.9085,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 8.873720136518773e-06,
"loss": 4.8731,
"step": 135
},
{
"epoch": 0.01,
"learning_rate": 9.215017064846417e-06,
"loss": 4.8692,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 9.556313993174062e-06,
"loss": 4.8161,
"step": 145
},
{
"epoch": 0.02,
"learning_rate": 9.897610921501706e-06,
"loss": 4.799,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 1.0238907849829352e-05,
"loss": 4.7847,
"step": 155
},
{
"epoch": 0.02,
"learning_rate": 1.0580204778156997e-05,
"loss": 4.7779,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 1.0921501706484643e-05,
"loss": 4.6951,
"step": 165
},
{
"epoch": 0.02,
"learning_rate": 1.126279863481229e-05,
"loss": 4.7078,
"step": 170
},
{
"epoch": 0.02,
"learning_rate": 1.1604095563139932e-05,
"loss": 4.6461,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 1.1945392491467578e-05,
"loss": 4.6271,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 1.2286689419795223e-05,
"loss": 4.6147,
"step": 185
},
{
"epoch": 0.02,
"learning_rate": 1.2627986348122867e-05,
"loss": 4.5607,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 1.2969283276450513e-05,
"loss": 4.5627,
"step": 195
},
{
"epoch": 0.02,
"learning_rate": 1.3310580204778158e-05,
"loss": 4.5616,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 1.3651877133105804e-05,
"loss": 4.5295,
"step": 205
},
{
"epoch": 0.02,
"learning_rate": 1.3993174061433447e-05,
"loss": 4.4991,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 1.4334470989761093e-05,
"loss": 4.4924,
"step": 215
},
{
"epoch": 0.02,
"learning_rate": 1.467576791808874e-05,
"loss": 4.4419,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 1.5017064846416382e-05,
"loss": 4.445,
"step": 225
},
{
"epoch": 0.02,
"learning_rate": 1.5358361774744027e-05,
"loss": 4.3713,
"step": 230
},
{
"epoch": 0.02,
"learning_rate": 1.5699658703071675e-05,
"loss": 4.4325,
"step": 235
},
{
"epoch": 0.02,
"learning_rate": 1.604095563139932e-05,
"loss": 4.3673,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 1.6382252559726964e-05,
"loss": 4.3136,
"step": 245
},
{
"epoch": 0.03,
"learning_rate": 1.6723549488054608e-05,
"loss": 4.3566,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 1.7064846416382256e-05,
"loss": 4.2961,
"step": 255
},
{
"epoch": 0.03,
"learning_rate": 1.7406143344709897e-05,
"loss": 4.3203,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 1.7747440273037545e-05,
"loss": 4.2783,
"step": 265
},
{
"epoch": 0.03,
"learning_rate": 1.808873720136519e-05,
"loss": 4.2523,
"step": 270
},
{
"epoch": 0.03,
"learning_rate": 1.8430034129692834e-05,
"loss": 4.2558,
"step": 275
},
{
"epoch": 0.03,
"learning_rate": 1.877133105802048e-05,
"loss": 4.2494,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 1.9112627986348123e-05,
"loss": 4.2571,
"step": 285
},
{
"epoch": 0.03,
"learning_rate": 1.945392491467577e-05,
"loss": 4.2041,
"step": 290
},
{
"epoch": 0.03,
"learning_rate": 1.9795221843003412e-05,
"loss": 4.2288,
"step": 295
},
{
"epoch": 0.03,
"learning_rate": 1.9999997788688342e-05,
"loss": 4.2369,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.9999972911443404e-05,
"loss": 4.1872,
"step": 305
},
{
"epoch": 0.03,
"learning_rate": 1.9999920392882944e-05,
"loss": 4.167,
"step": 310
},
{
"epoch": 0.03,
"learning_rate": 1.999984023315213e-05,
"loss": 4.1851,
"step": 315
},
{
"epoch": 0.03,
"learning_rate": 1.9999732432472544e-05,
"loss": 4.1391,
"step": 320
},
{
"epoch": 0.03,
"learning_rate": 1.999959699114215e-05,
"loss": 4.1194,
"step": 325
},
{
"epoch": 0.03,
"learning_rate": 1.9999433909535333e-05,
"loss": 4.1501,
"step": 330
},
{
"epoch": 0.03,
"learning_rate": 1.999924318810287e-05,
"loss": 4.1971,
"step": 335
},
{
"epoch": 0.03,
"learning_rate": 1.9999024827371946e-05,
"loss": 4.114,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 1.9998778827946136e-05,
"loss": 4.0516,
"step": 345
},
{
"epoch": 0.04,
"learning_rate": 1.9998505190505423e-05,
"loss": 4.1302,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 1.999820391580617e-05,
"loss": 4.0338,
"step": 355
},
{
"epoch": 0.04,
"learning_rate": 1.9997875004681147e-05,
"loss": 4.0644,
"step": 360
},
{
"epoch": 0.04,
"learning_rate": 1.999751845803951e-05,
"loss": 4.0183,
"step": 365
},
{
"epoch": 0.04,
"learning_rate": 1.99971342768668e-05,
"loss": 4.0488,
"step": 370
},
{
"epoch": 0.04,
"learning_rate": 1.999672246222496e-05,
"loss": 4.0974,
"step": 375
},
{
"epoch": 0.04,
"learning_rate": 1.9996283015252286e-05,
"loss": 4.0624,
"step": 380
},
{
"epoch": 0.04,
"learning_rate": 1.9995815937163477e-05,
"loss": 4.0256,
"step": 385
},
{
"epoch": 0.04,
"learning_rate": 1.9995321229249605e-05,
"loss": 4.0293,
"step": 390
},
{
"epoch": 0.04,
"learning_rate": 1.9994798892878112e-05,
"loss": 4.0388,
"step": 395
},
{
"epoch": 0.04,
"learning_rate": 1.9994248929492798e-05,
"loss": 4.0258,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 1.999367134061385e-05,
"loss": 4.014,
"step": 405
},
{
"epoch": 0.04,
"learning_rate": 1.99930661278378e-05,
"loss": 3.9845,
"step": 410
},
{
"epoch": 0.04,
"learning_rate": 1.999243329283754e-05,
"loss": 3.9661,
"step": 415
},
{
"epoch": 0.04,
"learning_rate": 1.9991772837362315e-05,
"loss": 3.9915,
"step": 420
},
{
"epoch": 0.04,
"learning_rate": 1.9991084763237715e-05,
"loss": 3.9972,
"step": 425
},
{
"epoch": 0.04,
"learning_rate": 1.9990369072365666e-05,
"loss": 3.9606,
"step": 430
},
{
"epoch": 0.04,
"learning_rate": 1.9989625766724453e-05,
"loss": 3.9885,
"step": 435
},
{
"epoch": 0.05,
"learning_rate": 1.998885484836866e-05,
"loss": 3.9738,
"step": 440
},
{
"epoch": 0.05,
"learning_rate": 1.998805631942922e-05,
"loss": 3.9257,
"step": 445
},
{
"epoch": 0.05,
"learning_rate": 1.9987230182113374e-05,
"loss": 3.9501,
"step": 450
},
{
"epoch": 0.05,
"learning_rate": 1.9986376438704686e-05,
"loss": 3.9264,
"step": 455
},
{
"epoch": 0.05,
"learning_rate": 1.998549509156302e-05,
"loss": 3.9535,
"step": 460
},
{
"epoch": 0.05,
"learning_rate": 1.9984586143124543e-05,
"loss": 3.9542,
"step": 465
},
{
"epoch": 0.05,
"learning_rate": 1.9983649595901706e-05,
"loss": 3.9401,
"step": 470
},
{
"epoch": 0.05,
"learning_rate": 1.998268545248327e-05,
"loss": 3.9526,
"step": 475
},
{
"epoch": 0.05,
"learning_rate": 1.998169371553425e-05,
"loss": 3.9065,
"step": 480
},
{
"epoch": 0.05,
"learning_rate": 1.9980674387795948e-05,
"loss": 3.9531,
"step": 485
},
{
"epoch": 0.05,
"learning_rate": 1.9979627472085927e-05,
"loss": 3.9276,
"step": 490
},
{
"epoch": 0.05,
"learning_rate": 1.9978552971298014e-05,
"loss": 3.9176,
"step": 495
},
{
"epoch": 0.05,
"learning_rate": 1.997745088840227e-05,
"loss": 3.9248,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 1.9976321226445007e-05,
"loss": 3.894,
"step": 505
},
{
"epoch": 0.05,
"learning_rate": 1.9975163988548775e-05,
"loss": 3.9489,
"step": 510
},
{
"epoch": 0.05,
"learning_rate": 1.997397917791233e-05,
"loss": 3.8736,
"step": 515
},
{
"epoch": 0.05,
"learning_rate": 1.997276679781066e-05,
"loss": 3.8586,
"step": 520
},
{
"epoch": 0.05,
"learning_rate": 1.9971526851594953e-05,
"loss": 3.8848,
"step": 525
},
{
"epoch": 0.05,
"learning_rate": 1.997025934269259e-05,
"loss": 3.9149,
"step": 530
},
{
"epoch": 0.05,
"learning_rate": 1.996896427460714e-05,
"loss": 3.8333,
"step": 535
},
{
"epoch": 0.06,
"learning_rate": 1.9967641650918352e-05,
"loss": 3.8496,
"step": 540
},
{
"epoch": 0.06,
"learning_rate": 1.9966291475282148e-05,
"loss": 3.8546,
"step": 545
},
{
"epoch": 0.06,
"learning_rate": 1.9964913751430593e-05,
"loss": 3.9015,
"step": 550
},
{
"epoch": 0.06,
"learning_rate": 1.9963508483171908e-05,
"loss": 3.8391,
"step": 555
},
{
"epoch": 0.06,
"learning_rate": 1.9962075674390456e-05,
"loss": 3.8628,
"step": 560
},
{
"epoch": 0.06,
"learning_rate": 1.9960615329046717e-05,
"loss": 3.8383,
"step": 565
},
{
"epoch": 0.06,
"learning_rate": 1.9959127451177287e-05,
"loss": 3.8381,
"step": 570
},
{
"epoch": 0.06,
"learning_rate": 1.9957612044894867e-05,
"loss": 3.8557,
"step": 575
},
{
"epoch": 0.06,
"learning_rate": 1.995606911438825e-05,
"loss": 3.8447,
"step": 580
},
{
"epoch": 0.06,
"learning_rate": 1.9954498663922318e-05,
"loss": 3.8571,
"step": 585
},
{
"epoch": 0.06,
"learning_rate": 1.9952900697838004e-05,
"loss": 3.8519,
"step": 590
},
{
"epoch": 0.06,
"learning_rate": 1.9951275220552314e-05,
"loss": 3.7803,
"step": 595
},
{
"epoch": 0.06,
"learning_rate": 1.9949622236558294e-05,
"loss": 3.8677,
"step": 600
},
{
"epoch": 0.06,
"learning_rate": 1.9947941750425016e-05,
"loss": 3.8477,
"step": 605
},
{
"epoch": 0.06,
"learning_rate": 1.994623376679758e-05,
"loss": 3.8065,
"step": 610
},
{
"epoch": 0.06,
"learning_rate": 1.9944498290397097e-05,
"loss": 3.8206,
"step": 615
},
{
"epoch": 0.06,
"learning_rate": 1.9942735326020658e-05,
"loss": 3.8404,
"step": 620
},
{
"epoch": 0.06,
"learning_rate": 1.994094487854134e-05,
"loss": 3.808,
"step": 625
},
{
"epoch": 0.06,
"learning_rate": 1.9939126952908198e-05,
"loss": 3.7804,
"step": 630
},
{
"epoch": 0.07,
"learning_rate": 1.993728155414622e-05,
"loss": 3.7677,
"step": 635
},
{
"epoch": 0.07,
"learning_rate": 1.993540868735635e-05,
"loss": 3.8159,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 1.9933508357715454e-05,
"loss": 3.7865,
"step": 645
},
{
"epoch": 0.07,
"learning_rate": 1.9931580570476306e-05,
"loss": 3.7753,
"step": 650
},
{
"epoch": 0.07,
"learning_rate": 1.9929625330967575e-05,
"loss": 3.7645,
"step": 655
},
{
"epoch": 0.07,
"learning_rate": 1.9927642644593818e-05,
"loss": 3.7898,
"step": 660
},
{
"epoch": 0.07,
"learning_rate": 1.9925632516835457e-05,
"loss": 3.7857,
"step": 665
},
{
"epoch": 0.07,
"learning_rate": 1.992359495324876e-05,
"loss": 3.7847,
"step": 670
},
{
"epoch": 0.07,
"learning_rate": 1.9921529959465842e-05,
"loss": 3.8126,
"step": 675
},
{
"epoch": 0.07,
"learning_rate": 1.9919437541194628e-05,
"loss": 3.7679,
"step": 680
},
{
"epoch": 0.07,
"learning_rate": 1.9917317704218852e-05,
"loss": 3.795,
"step": 685
},
{
"epoch": 0.07,
"learning_rate": 1.9915170454398045e-05,
"loss": 3.7215,
"step": 690
},
{
"epoch": 0.07,
"learning_rate": 1.9912995797667498e-05,
"loss": 3.7675,
"step": 695
},
{
"epoch": 0.07,
"learning_rate": 1.9910793740038266e-05,
"loss": 3.7704,
"step": 700
},
{
"epoch": 0.07,
"learning_rate": 1.9908564287597145e-05,
"loss": 3.7432,
"step": 705
},
{
"epoch": 0.07,
"learning_rate": 1.9906307446506647e-05,
"loss": 3.7335,
"step": 710
},
{
"epoch": 0.07,
"learning_rate": 1.9904023223005e-05,
"loss": 3.7434,
"step": 715
},
{
"epoch": 0.07,
"learning_rate": 1.990171162340611e-05,
"loss": 3.7501,
"step": 720
},
{
"epoch": 0.07,
"learning_rate": 1.989937265409956e-05,
"loss": 3.7638,
"step": 725
},
{
"epoch": 0.07,
"learning_rate": 1.9897006321550592e-05,
"loss": 3.7249,
"step": 730
},
{
"epoch": 0.08,
"learning_rate": 1.9894612632300077e-05,
"loss": 3.7536,
"step": 735
},
{
"epoch": 0.08,
"learning_rate": 1.9892191592964498e-05,
"loss": 3.7492,
"step": 740
},
{
"epoch": 0.08,
"learning_rate": 1.988974321023595e-05,
"loss": 3.7529,
"step": 745
},
{
"epoch": 0.08,
"learning_rate": 1.98872674908821e-05,
"loss": 3.787,
"step": 750
},
{
"epoch": 0.08,
"learning_rate": 1.9884764441746186e-05,
"loss": 3.7251,
"step": 755
},
{
"epoch": 0.08,
"learning_rate": 1.988223406974698e-05,
"loss": 3.7379,
"step": 760
},
{
"epoch": 0.08,
"learning_rate": 1.9879676381878783e-05,
"loss": 3.706,
"step": 765
},
{
"epoch": 0.08,
"learning_rate": 1.98770913852114e-05,
"loss": 3.7333,
"step": 770
},
{
"epoch": 0.08,
"learning_rate": 1.9874479086890117e-05,
"loss": 3.721,
"step": 775
},
{
"epoch": 0.08,
"learning_rate": 1.9871839494135696e-05,
"loss": 3.7456,
"step": 780
},
{
"epoch": 0.08,
"learning_rate": 1.9869172614244335e-05,
"loss": 3.7335,
"step": 785
},
{
"epoch": 0.08,
"learning_rate": 1.986647845458766e-05,
"loss": 3.7378,
"step": 790
},
{
"epoch": 0.08,
"learning_rate": 1.986375702261271e-05,
"loss": 3.7137,
"step": 795
},
{
"epoch": 0.08,
"learning_rate": 1.9861008325841893e-05,
"loss": 3.6932,
"step": 800
},
{
"epoch": 0.08,
"learning_rate": 1.9858232371872993e-05,
"loss": 3.6973,
"step": 805
},
{
"epoch": 0.08,
"learning_rate": 1.9855429168379127e-05,
"loss": 3.7263,
"step": 810
},
{
"epoch": 0.08,
"learning_rate": 1.985259872310875e-05,
"loss": 3.7263,
"step": 815
},
{
"epoch": 0.08,
"learning_rate": 1.9849741043885596e-05,
"loss": 3.6926,
"step": 820
},
{
"epoch": 0.08,
"learning_rate": 1.9846856138608693e-05,
"loss": 3.7449,
"step": 825
},
{
"epoch": 0.09,
"learning_rate": 1.9843944015252318e-05,
"loss": 3.7038,
"step": 830
},
{
"epoch": 0.09,
"learning_rate": 1.9841004681865988e-05,
"loss": 3.7182,
"step": 835
},
{
"epoch": 0.09,
"learning_rate": 1.9838038146574426e-05,
"loss": 3.6883,
"step": 840
},
{
"epoch": 0.09,
"learning_rate": 1.983504441757755e-05,
"loss": 3.7354,
"step": 845
},
{
"epoch": 0.09,
"learning_rate": 1.983202350315044e-05,
"loss": 3.7327,
"step": 850
},
{
"epoch": 0.09,
"learning_rate": 1.982897541164333e-05,
"loss": 3.6979,
"step": 855
},
{
"epoch": 0.09,
"learning_rate": 1.9825900151481562e-05,
"loss": 3.665,
"step": 860
},
{
"epoch": 0.09,
"learning_rate": 1.9822797731165587e-05,
"loss": 3.7058,
"step": 865
},
{
"epoch": 0.09,
"learning_rate": 1.981966815927092e-05,
"loss": 3.6884,
"step": 870
},
{
"epoch": 0.09,
"learning_rate": 1.981651144444814e-05,
"loss": 3.678,
"step": 875
},
{
"epoch": 0.09,
"learning_rate": 1.9813327595422843e-05,
"loss": 3.7046,
"step": 880
},
{
"epoch": 0.09,
"learning_rate": 1.981011662099563e-05,
"loss": 3.6846,
"step": 885
},
{
"epoch": 0.09,
"learning_rate": 1.9806878530042083e-05,
"loss": 3.6799,
"step": 890
},
{
"epoch": 0.09,
"learning_rate": 1.980361333151273e-05,
"loss": 3.6712,
"step": 895
},
{
"epoch": 0.09,
"learning_rate": 1.9800321034433043e-05,
"loss": 3.7067,
"step": 900
},
{
"epoch": 0.09,
"learning_rate": 1.979700164790338e-05,
"loss": 3.6772,
"step": 905
},
{
"epoch": 0.09,
"learning_rate": 1.9793655181098992e-05,
"loss": 3.6792,
"step": 910
},
{
"epoch": 0.09,
"learning_rate": 1.9790281643269974e-05,
"loss": 3.7357,
"step": 915
},
{
"epoch": 0.09,
"learning_rate": 1.9786881043741256e-05,
"loss": 3.6694,
"step": 920
},
{
"epoch": 0.09,
"learning_rate": 1.978345339191257e-05,
"loss": 3.6552,
"step": 925
},
{
"epoch": 0.1,
"learning_rate": 1.977999869725842e-05,
"loss": 3.671,
"step": 930
},
{
"epoch": 0.1,
"learning_rate": 1.9776516969328066e-05,
"loss": 3.6895,
"step": 935
},
{
"epoch": 0.1,
"learning_rate": 1.9773008217745483e-05,
"loss": 3.6742,
"step": 940
},
{
"epoch": 0.1,
"learning_rate": 1.976947245220935e-05,
"loss": 3.669,
"step": 945
},
{
"epoch": 0.1,
"learning_rate": 1.976590968249301e-05,
"loss": 3.6384,
"step": 950
},
{
"epoch": 0.1,
"learning_rate": 1.9762319918444466e-05,
"loss": 3.6433,
"step": 955
},
{
"epoch": 0.1,
"learning_rate": 1.975870316998631e-05,
"loss": 3.6324,
"step": 960
},
{
"epoch": 0.1,
"learning_rate": 1.9755059447115755e-05,
"loss": 3.6582,
"step": 965
},
{
"epoch": 0.1,
"learning_rate": 1.975138875990454e-05,
"loss": 3.6667,
"step": 970
},
{
"epoch": 0.1,
"learning_rate": 1.9747691118498963e-05,
"loss": 3.6767,
"step": 975
},
{
"epoch": 0.1,
"learning_rate": 1.9743966533119823e-05,
"loss": 3.6854,
"step": 980
},
{
"epoch": 0.1,
"learning_rate": 1.9740215014062386e-05,
"loss": 3.6838,
"step": 985
},
{
"epoch": 0.1,
"learning_rate": 1.973643657169637e-05,
"loss": 3.7078,
"step": 990
},
{
"epoch": 0.1,
"learning_rate": 1.9732631216465924e-05,
"loss": 3.627,
"step": 995
},
{
"epoch": 0.1,
"learning_rate": 1.972879895888957e-05,
"loss": 3.6671,
"step": 1000
},
{
"epoch": 0.1,
"learning_rate": 1.9724939809560208e-05,
"loss": 3.6825,
"step": 1005
},
{
"epoch": 0.1,
"learning_rate": 1.9721053779145057e-05,
"loss": 3.6226,
"step": 1010
},
{
"epoch": 0.1,
"learning_rate": 1.971714087838565e-05,
"loss": 3.6629,
"step": 1015
},
{
"epoch": 0.1,
"learning_rate": 1.9713201118097784e-05,
"loss": 3.6617,
"step": 1020
},
{
"epoch": 0.11,
"learning_rate": 1.970923450917151e-05,
"loss": 3.6485,
"step": 1025
},
{
"epoch": 0.11,
"learning_rate": 1.9705241062571084e-05,
"loss": 3.6363,
"step": 1030
},
{
"epoch": 0.11,
"learning_rate": 1.9701220789334945e-05,
"loss": 3.6343,
"step": 1035
},
{
"epoch": 0.11,
"learning_rate": 1.9697173700575694e-05,
"loss": 3.6646,
"step": 1040
},
{
"epoch": 0.11,
"learning_rate": 1.969309980748004e-05,
"loss": 3.6167,
"step": 1045
},
{
"epoch": 0.11,
"learning_rate": 1.968899912130879e-05,
"loss": 3.6257,
"step": 1050
},
{
"epoch": 0.11,
"learning_rate": 1.9684871653396817e-05,
"loss": 3.652,
"step": 1055
},
{
"epoch": 0.11,
"learning_rate": 1.968071741515301e-05,
"loss": 3.6486,
"step": 1060
},
{
"epoch": 0.11,
"learning_rate": 1.9676536418060266e-05,
"loss": 3.6246,
"step": 1065
},
{
"epoch": 0.11,
"learning_rate": 1.9672328673675438e-05,
"loss": 3.6233,
"step": 1070
},
{
"epoch": 0.11,
"learning_rate": 1.9668094193629322e-05,
"loss": 3.6361,
"step": 1075
},
{
"epoch": 0.11,
"learning_rate": 1.96638329896266e-05,
"loss": 3.6405,
"step": 1080
},
{
"epoch": 0.11,
"learning_rate": 1.9659545073445844e-05,
"loss": 3.618,
"step": 1085
},
{
"epoch": 0.11,
"learning_rate": 1.965523045693944e-05,
"loss": 3.6378,
"step": 1090
},
{
"epoch": 0.11,
"learning_rate": 1.9650889152033597e-05,
"loss": 3.6453,
"step": 1095
},
{
"epoch": 0.11,
"learning_rate": 1.9646521170728283e-05,
"loss": 3.6312,
"step": 1100
},
{
"epoch": 0.11,
"learning_rate": 1.9642126525097202e-05,
"loss": 3.6043,
"step": 1105
},
{
"epoch": 0.11,
"learning_rate": 1.9637705227287763e-05,
"loss": 3.6043,
"step": 1110
},
{
"epoch": 0.11,
"learning_rate": 1.963325728952106e-05,
"loss": 3.6487,
"step": 1115
},
{
"epoch": 0.11,
"learning_rate": 1.9628782724091795e-05,
"loss": 3.6493,
"step": 1120
},
{
"epoch": 0.12,
"learning_rate": 1.96242815433683e-05,
"loss": 3.6173,
"step": 1125
},
{
"epoch": 0.12,
"learning_rate": 1.9619753759792466e-05,
"loss": 3.655,
"step": 1130
},
{
"epoch": 0.12,
"learning_rate": 1.9615199385879712e-05,
"loss": 3.6231,
"step": 1135
},
{
"epoch": 0.12,
"learning_rate": 1.961061843421896e-05,
"loss": 3.6501,
"step": 1140
},
{
"epoch": 0.12,
"learning_rate": 1.96060109174726e-05,
"loss": 3.6312,
"step": 1145
},
{
"epoch": 0.12,
"learning_rate": 1.9601376848376443e-05,
"loss": 3.6004,
"step": 1150
},
{
"epoch": 0.12,
"learning_rate": 1.9596716239739708e-05,
"loss": 3.6282,
"step": 1155
},
{
"epoch": 0.12,
"learning_rate": 1.9592029104444964e-05,
"loss": 3.5783,
"step": 1160
},
{
"epoch": 0.12,
"learning_rate": 1.9587315455448097e-05,
"loss": 3.6057,
"step": 1165
},
{
"epoch": 0.12,
"learning_rate": 1.9582575305778297e-05,
"loss": 3.6139,
"step": 1170
},
{
"epoch": 0.12,
"learning_rate": 1.9577808668537995e-05,
"loss": 3.5894,
"step": 1175
},
{
"epoch": 0.12,
"learning_rate": 1.9573015556902836e-05,
"loss": 3.5998,
"step": 1180
},
{
"epoch": 0.12,
"learning_rate": 1.9568195984121648e-05,
"loss": 3.5962,
"step": 1185
},
{
"epoch": 0.12,
"learning_rate": 1.9563349963516403e-05,
"loss": 3.6213,
"step": 1190
},
{
"epoch": 0.12,
"learning_rate": 1.9558477508482175e-05,
"loss": 3.5908,
"step": 1195
},
{
"epoch": 0.12,
"learning_rate": 1.9553578632487103e-05,
"loss": 3.6214,
"step": 1200
},
{
"epoch": 0.12,
"learning_rate": 1.9548653349072363e-05,
"loss": 3.5958,
"step": 1205
},
{
"epoch": 0.12,
"learning_rate": 1.9543701671852127e-05,
"loss": 3.5878,
"step": 1210
},
{
"epoch": 0.12,
"learning_rate": 1.953872361451352e-05,
"loss": 3.6117,
"step": 1215
},
{
"epoch": 0.13,
"learning_rate": 1.9533719190816575e-05,
"loss": 3.5977,
"step": 1220
},
{
"epoch": 0.13,
"learning_rate": 1.9528688414594224e-05,
"loss": 3.6178,
"step": 1225
},
{
"epoch": 0.13,
"learning_rate": 1.952363129975223e-05,
"loss": 3.5998,
"step": 1230
},
{
"epoch": 0.13,
"learning_rate": 1.9518547860269157e-05,
"loss": 3.6139,
"step": 1235
},
{
"epoch": 0.13,
"learning_rate": 1.9513438110196346e-05,
"loss": 3.6015,
"step": 1240
},
{
"epoch": 0.13,
"learning_rate": 1.9508302063657853e-05,
"loss": 3.6142,
"step": 1245
},
{
"epoch": 0.13,
"learning_rate": 1.9503139734850426e-05,
"loss": 3.642,
"step": 1250
},
{
"epoch": 0.13,
"learning_rate": 1.9497951138043454e-05,
"loss": 3.5928,
"step": 1255
},
{
"epoch": 0.13,
"learning_rate": 1.9492736287578947e-05,
"loss": 3.609,
"step": 1260
},
{
"epoch": 0.13,
"learning_rate": 1.9487495197871476e-05,
"loss": 3.6197,
"step": 1265
},
{
"epoch": 0.13,
"learning_rate": 1.9482227883408135e-05,
"loss": 3.5956,
"step": 1270
},
{
"epoch": 0.13,
"learning_rate": 1.9476934358748522e-05,
"loss": 3.5974,
"step": 1275
},
{
"epoch": 0.13,
"learning_rate": 1.947161463852467e-05,
"loss": 3.5682,
"step": 1280
},
{
"epoch": 0.13,
"learning_rate": 1.946626873744103e-05,
"loss": 3.6134,
"step": 1285
},
{
"epoch": 0.13,
"learning_rate": 1.9460896670274408e-05,
"loss": 3.5874,
"step": 1290
},
{
"epoch": 0.13,
"learning_rate": 1.9455498451873952e-05,
"loss": 3.5733,
"step": 1295
},
{
"epoch": 0.13,
"learning_rate": 1.9450074097161087e-05,
"loss": 3.6074,
"step": 1300
},
{
"epoch": 0.13,
"learning_rate": 1.944462362112948e-05,
"loss": 3.5849,
"step": 1305
},
{
"epoch": 0.13,
"learning_rate": 1.9439147038845006e-05,
"loss": 3.5862,
"step": 1310
},
{
"epoch": 0.13,
"learning_rate": 1.94336443654457e-05,
"loss": 3.5972,
"step": 1315
},
{
"epoch": 0.14,
"learning_rate": 1.942811561614172e-05,
"loss": 3.591,
"step": 1320
},
{
"epoch": 0.14,
"learning_rate": 1.942256080621529e-05,
"loss": 3.5864,
"step": 1325
},
{
"epoch": 0.14,
"learning_rate": 1.941697995102069e-05,
"loss": 3.5939,
"step": 1330
},
{
"epoch": 0.14,
"learning_rate": 1.9411373065984166e-05,
"loss": 3.5934,
"step": 1335
},
{
"epoch": 0.14,
"learning_rate": 1.9405740166603936e-05,
"loss": 3.5437,
"step": 1340
},
{
"epoch": 0.14,
"learning_rate": 1.9400081268450107e-05,
"loss": 3.5745,
"step": 1345
},
{
"epoch": 0.14,
"learning_rate": 1.9394396387164677e-05,
"loss": 3.5901,
"step": 1350
},
{
"epoch": 0.14,
"learning_rate": 1.9388685538461435e-05,
"loss": 3.5659,
"step": 1355
},
{
"epoch": 0.14,
"learning_rate": 1.9382948738125966e-05,
"loss": 3.57,
"step": 1360
},
{
"epoch": 0.14,
"learning_rate": 1.937718600201558e-05,
"loss": 3.595,
"step": 1365
},
{
"epoch": 0.14,
"learning_rate": 1.9371397346059286e-05,
"loss": 3.6066,
"step": 1370
},
{
"epoch": 0.14,
"learning_rate": 1.936558278625773e-05,
"loss": 3.5567,
"step": 1375
},
{
"epoch": 0.14,
"learning_rate": 1.9359742338683165e-05,
"loss": 3.5779,
"step": 1380
},
{
"epoch": 0.14,
"learning_rate": 1.9353876019479402e-05,
"loss": 3.5831,
"step": 1385
},
{
"epoch": 0.14,
"learning_rate": 1.934798384486176e-05,
"loss": 3.5762,
"step": 1390
},
{
"epoch": 0.14,
"learning_rate": 1.934206583111703e-05,
"loss": 3.5685,
"step": 1395
},
{
"epoch": 0.14,
"learning_rate": 1.9336121994603424e-05,
"loss": 3.5789,
"step": 1400
},
{
"epoch": 0.14,
"learning_rate": 1.9330152351750535e-05,
"loss": 3.5516,
"step": 1405
},
{
"epoch": 0.14,
"learning_rate": 1.9324156919059286e-05,
"loss": 3.5779,
"step": 1410
},
{
"epoch": 0.15,
"learning_rate": 1.9318135713101883e-05,
"loss": 3.5857,
"step": 1415
},
{
"epoch": 0.15,
"learning_rate": 1.9312088750521778e-05,
"loss": 3.5835,
"step": 1420
},
{
"epoch": 0.15,
"learning_rate": 1.9306016048033617e-05,
"loss": 3.5684,
"step": 1425
},
{
"epoch": 0.15,
"learning_rate": 1.9299917622423196e-05,
"loss": 3.5699,
"step": 1430
},
{
"epoch": 0.15,
"learning_rate": 1.9293793490547404e-05,
"loss": 3.5745,
"step": 1435
},
{
"epoch": 0.15,
"learning_rate": 1.9287643669334202e-05,
"loss": 3.5781,
"step": 1440
},
{
"epoch": 0.15,
"learning_rate": 1.9281468175782546e-05,
"loss": 3.5244,
"step": 1445
},
{
"epoch": 0.15,
"learning_rate": 1.9275267026962358e-05,
"loss": 3.5479,
"step": 1450
},
{
"epoch": 0.15,
"learning_rate": 1.926904024001448e-05,
"loss": 3.5617,
"step": 1455
},
{
"epoch": 0.15,
"learning_rate": 1.9262787832150615e-05,
"loss": 3.5508,
"step": 1460
},
{
"epoch": 0.15,
"learning_rate": 1.9256509820653284e-05,
"loss": 3.5705,
"step": 1465
},
{
"epoch": 0.15,
"learning_rate": 1.9250206222875785e-05,
"loss": 3.5852,
"step": 1470
},
{
"epoch": 0.15,
"learning_rate": 1.9243877056242145e-05,
"loss": 3.553,
"step": 1475
},
{
"epoch": 0.15,
"learning_rate": 1.9237522338247053e-05,
"loss": 3.5588,
"step": 1480
},
{
"epoch": 0.15,
"learning_rate": 1.9231142086455838e-05,
"loss": 3.5374,
"step": 1485
},
{
"epoch": 0.15,
"learning_rate": 1.92247363185044e-05,
"loss": 3.5648,
"step": 1490
},
{
"epoch": 0.15,
"learning_rate": 1.921830505209917e-05,
"loss": 3.5486,
"step": 1495
},
{
"epoch": 0.15,
"learning_rate": 1.9211848305017072e-05,
"loss": 3.5476,
"step": 1500
},
{
"epoch": 0.15,
"learning_rate": 1.9205366095105443e-05,
"loss": 3.5538,
"step": 1505
},
{
"epoch": 0.16,
"learning_rate": 1.9198858440282016e-05,
"loss": 3.5734,
"step": 1510
},
{
"epoch": 0.16,
"learning_rate": 1.9192325358534855e-05,
"loss": 3.586,
"step": 1515
},
{
"epoch": 0.16,
"learning_rate": 1.9185766867922303e-05,
"loss": 3.5631,
"step": 1520
},
{
"epoch": 0.16,
"learning_rate": 1.9179182986572943e-05,
"loss": 3.5143,
"step": 1525
},
{
"epoch": 0.16,
"learning_rate": 1.917257373268554e-05,
"loss": 3.5349,
"step": 1530
},
{
"epoch": 0.16,
"learning_rate": 1.9165939124528984e-05,
"loss": 3.526,
"step": 1535
},
{
"epoch": 0.16,
"learning_rate": 1.9159279180442257e-05,
"loss": 3.5638,
"step": 1540
},
{
"epoch": 0.16,
"learning_rate": 1.9152593918834376e-05,
"loss": 3.5747,
"step": 1545
},
{
"epoch": 0.16,
"learning_rate": 1.914588335818433e-05,
"loss": 3.5447,
"step": 1550
},
{
"epoch": 0.16,
"learning_rate": 1.913914751704104e-05,
"loss": 3.541,
"step": 1555
},
{
"epoch": 0.16,
"learning_rate": 1.9132386414023306e-05,
"loss": 3.5198,
"step": 1560
},
{
"epoch": 0.16,
"learning_rate": 1.9125600067819765e-05,
"loss": 3.548,
"step": 1565
},
{
"epoch": 0.16,
"learning_rate": 1.9118788497188815e-05,
"loss": 3.4812,
"step": 1570
},
{
"epoch": 0.16,
"learning_rate": 1.911195172095858e-05,
"loss": 3.5336,
"step": 1575
},
{
"epoch": 0.16,
"learning_rate": 1.9105089758026872e-05,
"loss": 3.5517,
"step": 1580
},
{
"epoch": 0.16,
"learning_rate": 1.90982026273611e-05,
"loss": 3.5337,
"step": 1585
},
{
"epoch": 0.16,
"learning_rate": 1.9091290347998256e-05,
"loss": 3.5607,
"step": 1590
},
{
"epoch": 0.16,
"learning_rate": 1.908435293904484e-05,
"loss": 3.5372,
"step": 1595
},
{
"epoch": 0.16,
"learning_rate": 1.9077390419676813e-05,
"loss": 3.5373,
"step": 1600
},
{
"epoch": 0.16,
"learning_rate": 1.907040280913955e-05,
"loss": 3.5493,
"step": 1605
},
{
"epoch": 0.17,
"learning_rate": 1.9063390126747778e-05,
"loss": 3.5431,
"step": 1610
},
{
"epoch": 0.17,
"learning_rate": 1.9056352391885524e-05,
"loss": 3.5358,
"step": 1615
},
{
"epoch": 0.17,
"learning_rate": 1.904928962400607e-05,
"loss": 3.5254,
"step": 1620
},
{
"epoch": 0.17,
"learning_rate": 1.904220184263188e-05,
"loss": 3.5759,
"step": 1625
},
{
"epoch": 0.17,
"learning_rate": 1.9035089067354573e-05,
"loss": 3.5486,
"step": 1630
},
{
"epoch": 0.17,
"learning_rate": 1.9027951317834847e-05,
"loss": 3.5699,
"step": 1635
},
{
"epoch": 0.17,
"learning_rate": 1.9020788613802435e-05,
"loss": 3.5456,
"step": 1640
},
{
"epoch": 0.17,
"learning_rate": 1.9013600975056052e-05,
"loss": 3.5657,
"step": 1645
},
{
"epoch": 0.17,
"learning_rate": 1.9006388421463322e-05,
"loss": 3.5525,
"step": 1650
},
{
"epoch": 0.17,
"learning_rate": 1.899915097296075e-05,
"loss": 3.5306,
"step": 1655
},
{
"epoch": 0.17,
"learning_rate": 1.899188864955365e-05,
"loss": 3.5637,
"step": 1660
},
{
"epoch": 0.17,
"learning_rate": 1.8984601471316092e-05,
"loss": 3.5142,
"step": 1665
},
{
"epoch": 0.17,
"learning_rate": 1.897728945839085e-05,
"loss": 3.5379,
"step": 1670
},
{
"epoch": 0.17,
"learning_rate": 1.896995263098935e-05,
"loss": 3.5179,
"step": 1675
},
{
"epoch": 0.17,
"learning_rate": 1.8962591009391595e-05,
"loss": 3.5322,
"step": 1680
},
{
"epoch": 0.17,
"learning_rate": 1.8955204613946135e-05,
"loss": 3.5419,
"step": 1685
},
{
"epoch": 0.17,
"learning_rate": 1.894779346506999e-05,
"loss": 3.4724,
"step": 1690
},
{
"epoch": 0.17,
"learning_rate": 1.8940357583248613e-05,
"loss": 3.5225,
"step": 1695
},
{
"epoch": 0.17,
"learning_rate": 1.8932896989035814e-05,
"loss": 3.5276,
"step": 1700
},
{
"epoch": 0.18,
"learning_rate": 1.8925411703053708e-05,
"loss": 3.5402,
"step": 1705
},
{
"epoch": 0.18,
"learning_rate": 1.8917901745992667e-05,
"loss": 3.538,
"step": 1710
},
{
"epoch": 0.18,
"learning_rate": 1.8910367138611257e-05,
"loss": 3.5218,
"step": 1715
},
{
"epoch": 0.18,
"learning_rate": 1.8902807901736185e-05,
"loss": 3.488,
"step": 1720
},
{
"epoch": 0.18,
"learning_rate": 1.8895224056262226e-05,
"loss": 3.5459,
"step": 1725
},
{
"epoch": 0.18,
"learning_rate": 1.8887615623152188e-05,
"loss": 3.4957,
"step": 1730
},
{
"epoch": 0.18,
"learning_rate": 1.8879982623436835e-05,
"loss": 3.5491,
"step": 1735
},
{
"epoch": 0.18,
"learning_rate": 1.887232507821484e-05,
"loss": 3.511,
"step": 1740
},
{
"epoch": 0.18,
"learning_rate": 1.8864643008652726e-05,
"loss": 3.5164,
"step": 1745
},
{
"epoch": 0.18,
"learning_rate": 1.88569364359848e-05,
"loss": 3.5123,
"step": 1750
},
{
"epoch": 0.18,
"learning_rate": 1.8849205381513095e-05,
"loss": 3.4969,
"step": 1755
},
{
"epoch": 0.18,
"learning_rate": 1.884144986660733e-05,
"loss": 3.5236,
"step": 1760
},
{
"epoch": 0.18,
"learning_rate": 1.883366991270482e-05,
"loss": 3.5089,
"step": 1765
},
{
"epoch": 0.18,
"learning_rate": 1.8825865541310438e-05,
"loss": 3.5072,
"step": 1770
},
{
"epoch": 0.18,
"learning_rate": 1.8818036773996552e-05,
"loss": 3.4727,
"step": 1775
},
{
"epoch": 0.18,
"learning_rate": 1.8810183632402972e-05,
"loss": 3.5314,
"step": 1780
},
{
"epoch": 0.18,
"learning_rate": 1.8802306138236862e-05,
"loss": 3.5055,
"step": 1785
},
{
"epoch": 0.18,
"learning_rate": 1.879440431327272e-05,
"loss": 3.5259,
"step": 1790
},
{
"epoch": 0.18,
"learning_rate": 1.8786478179352285e-05,
"loss": 3.5075,
"step": 1795
},
{
"epoch": 0.18,
"learning_rate": 1.8778527758384492e-05,
"loss": 3.5221,
"step": 1800
},
{
"epoch": 0.19,
"learning_rate": 1.8770553072345407e-05,
"loss": 3.4877,
"step": 1805
},
{
"epoch": 0.19,
"learning_rate": 1.876255414327818e-05,
"loss": 3.5192,
"step": 1810
},
{
"epoch": 0.19,
"learning_rate": 1.8754530993292956e-05,
"loss": 3.4922,
"step": 1815
},
{
"epoch": 0.19,
"learning_rate": 1.8746483644566842e-05,
"loss": 3.5452,
"step": 1820
},
{
"epoch": 0.19,
"learning_rate": 1.873841211934382e-05,
"loss": 3.509,
"step": 1825
},
{
"epoch": 0.19,
"learning_rate": 1.8730316439934723e-05,
"loss": 3.5443,
"step": 1830
},
{
"epoch": 0.19,
"learning_rate": 1.8722196628717118e-05,
"loss": 3.5118,
"step": 1835
},
{
"epoch": 0.19,
"learning_rate": 1.8714052708135305e-05,
"loss": 3.5334,
"step": 1840
},
{
"epoch": 0.19,
"learning_rate": 1.8705884700700206e-05,
"loss": 3.5009,
"step": 1845
},
{
"epoch": 0.19,
"learning_rate": 1.8697692628989327e-05,
"loss": 3.5415,
"step": 1850
},
{
"epoch": 0.19,
"learning_rate": 1.86894765156467e-05,
"loss": 3.5182,
"step": 1855
},
{
"epoch": 0.19,
"learning_rate": 1.8681236383382804e-05,
"loss": 3.5114,
"step": 1860
},
{
"epoch": 0.19,
"learning_rate": 1.8672972254974507e-05,
"loss": 3.5119,
"step": 1865
},
{
"epoch": 0.19,
"learning_rate": 1.866468415326501e-05,
"loss": 3.4983,
"step": 1870
},
{
"epoch": 0.19,
"learning_rate": 1.8656372101163774e-05,
"loss": 3.5203,
"step": 1875
},
{
"epoch": 0.19,
"learning_rate": 1.8648036121646474e-05,
"loss": 3.5214,
"step": 1880
},
{
"epoch": 0.19,
"learning_rate": 1.8639676237754916e-05,
"loss": 3.5087,
"step": 1885
},
{
"epoch": 0.19,
"learning_rate": 1.8631292472596978e-05,
"loss": 3.5203,
"step": 1890
},
{
"epoch": 0.19,
"learning_rate": 1.862288484934655e-05,
"loss": 3.5087,
"step": 1895
},
{
"epoch": 0.2,
"learning_rate": 1.8614453391243482e-05,
"loss": 3.5026,
"step": 1900
},
{
"epoch": 0.2,
"learning_rate": 1.8605998121593486e-05,
"loss": 3.51,
"step": 1905
},
{
"epoch": 0.2,
"learning_rate": 1.8597519063768104e-05,
"loss": 3.5118,
"step": 1910
},
{
"epoch": 0.2,
"learning_rate": 1.8589016241204637e-05,
"loss": 3.4831,
"step": 1915
},
{
"epoch": 0.2,
"learning_rate": 1.8580489677406064e-05,
"loss": 3.5349,
"step": 1920
},
{
"epoch": 0.2,
"learning_rate": 1.8571939395940995e-05,
"loss": 3.4857,
"step": 1925
},
{
"epoch": 0.2,
"learning_rate": 1.8563365420443594e-05,
"loss": 3.4766,
"step": 1930
},
{
"epoch": 0.2,
"learning_rate": 1.8554767774613528e-05,
"loss": 3.5126,
"step": 1935
},
{
"epoch": 0.2,
"learning_rate": 1.8546146482215875e-05,
"loss": 3.555,
"step": 1940
},
{
"epoch": 0.2,
"learning_rate": 1.8537501567081097e-05,
"loss": 3.5108,
"step": 1945
},
{
"epoch": 0.2,
"learning_rate": 1.852883305310493e-05,
"loss": 3.5117,
"step": 1950
},
{
"epoch": 0.2,
"learning_rate": 1.852014096424836e-05,
"loss": 3.525,
"step": 1955
},
{
"epoch": 0.2,
"learning_rate": 1.851142532453753e-05,
"loss": 3.4482,
"step": 1960
},
{
"epoch": 0.2,
"learning_rate": 1.8502686158063676e-05,
"loss": 3.5066,
"step": 1965
},
{
"epoch": 0.2,
"learning_rate": 1.8493923488983066e-05,
"loss": 3.5157,
"step": 1970
},
{
"epoch": 0.2,
"learning_rate": 1.8485137341516947e-05,
"loss": 3.5266,
"step": 1975
},
{
"epoch": 0.2,
"learning_rate": 1.847632773995144e-05,
"loss": 3.4949,
"step": 1980
},
{
"epoch": 0.2,
"learning_rate": 1.8467494708637517e-05,
"loss": 3.495,
"step": 1985
},
{
"epoch": 0.2,
"learning_rate": 1.84586382719909e-05,
"loss": 3.4731,
"step": 1990
},
{
"epoch": 0.2,
"learning_rate": 1.8449758454492014e-05,
"loss": 3.488,
"step": 1995
},
{
"epoch": 0.21,
"learning_rate": 1.8440855280685907e-05,
"loss": 3.499,
"step": 2000
},
{
"epoch": 0.21,
"learning_rate": 1.8431928775182194e-05,
"loss": 3.5122,
"step": 2005
},
{
"epoch": 0.21,
"learning_rate": 1.842297896265497e-05,
"loss": 3.4933,
"step": 2010
},
{
"epoch": 0.21,
"learning_rate": 1.8414005867842765e-05,
"loss": 3.5144,
"step": 2015
},
{
"epoch": 0.21,
"learning_rate": 1.840500951554846e-05,
"loss": 3.4728,
"step": 2020
},
{
"epoch": 0.21,
"learning_rate": 1.8395989930639224e-05,
"loss": 3.4829,
"step": 2025
},
{
"epoch": 0.21,
"learning_rate": 1.838694713804645e-05,
"loss": 3.4995,
"step": 2030
},
{
"epoch": 0.21,
"learning_rate": 1.8377881162765662e-05,
"loss": 3.492,
"step": 2035
},
{
"epoch": 0.21,
"learning_rate": 1.8368792029856482e-05,
"loss": 3.5294,
"step": 2040
},
{
"epoch": 0.21,
"learning_rate": 1.8359679764442538e-05,
"loss": 3.4922,
"step": 2045
},
{
"epoch": 0.21,
"learning_rate": 1.8350544391711396e-05,
"loss": 3.5267,
"step": 2050
},
{
"epoch": 0.21,
"learning_rate": 1.8341385936914503e-05,
"loss": 3.5039,
"step": 2055
},
{
"epoch": 0.21,
"learning_rate": 1.8332204425367096e-05,
"loss": 3.4839,
"step": 2060
},
{
"epoch": 0.21,
"learning_rate": 1.8322999882448148e-05,
"loss": 3.4741,
"step": 2065
},
{
"epoch": 0.21,
"learning_rate": 1.83137723336003e-05,
"loss": 3.4617,
"step": 2070
},
{
"epoch": 0.21,
"learning_rate": 1.8304521804329773e-05,
"loss": 3.5136,
"step": 2075
},
{
"epoch": 0.21,
"learning_rate": 1.8295248320206323e-05,
"loss": 3.4694,
"step": 2080
},
{
"epoch": 0.21,
"learning_rate": 1.828595190686315e-05,
"loss": 3.4442,
"step": 2085
},
{
"epoch": 0.21,
"learning_rate": 1.827663258999683e-05,
"loss": 3.4919,
"step": 2090
},
{
"epoch": 0.22,
"learning_rate": 1.826729039536725e-05,
"loss": 3.4623,
"step": 2095
},
{
"epoch": 0.22,
"learning_rate": 1.8257925348797534e-05,
"loss": 3.5064,
"step": 2100
},
{
"epoch": 0.22,
"learning_rate": 1.8248537476173975e-05,
"loss": 3.4907,
"step": 2105
},
{
"epoch": 0.22,
"learning_rate": 1.823912680344596e-05,
"loss": 3.4883,
"step": 2110
},
{
"epoch": 0.22,
"learning_rate": 1.8229693356625892e-05,
"loss": 3.5198,
"step": 2115
},
{
"epoch": 0.22,
"learning_rate": 1.8220237161789134e-05,
"loss": 3.4987,
"step": 2120
},
{
"epoch": 0.22,
"learning_rate": 1.8210758245073923e-05,
"loss": 3.4645,
"step": 2125
},
{
"epoch": 0.22,
"learning_rate": 1.82012566326813e-05,
"loss": 3.4729,
"step": 2130
},
{
"epoch": 0.22,
"learning_rate": 1.8191732350875045e-05,
"loss": 3.4733,
"step": 2135
},
{
"epoch": 0.22,
"learning_rate": 1.8182185425981593e-05,
"loss": 3.4836,
"step": 2140
},
{
"epoch": 0.22,
"learning_rate": 1.817261588438998e-05,
"loss": 3.476,
"step": 2145
},
{
"epoch": 0.22,
"learning_rate": 1.816302375255174e-05,
"loss": 3.4627,
"step": 2150
},
{
"epoch": 0.22,
"learning_rate": 1.8153409056980868e-05,
"loss": 3.4819,
"step": 2155
},
{
"epoch": 0.22,
"learning_rate": 1.8143771824253712e-05,
"loss": 3.476,
"step": 2160
},
{
"epoch": 0.22,
"learning_rate": 1.8134112081008926e-05,
"loss": 3.474,
"step": 2165
},
{
"epoch": 0.22,
"learning_rate": 1.8124429853947387e-05,
"loss": 3.4573,
"step": 2170
},
{
"epoch": 0.22,
"learning_rate": 1.811472516983211e-05,
"loss": 3.4789,
"step": 2175
},
{
"epoch": 0.22,
"learning_rate": 1.8104998055488198e-05,
"loss": 3.4821,
"step": 2180
},
{
"epoch": 0.22,
"learning_rate": 1.8095248537802743e-05,
"loss": 3.4564,
"step": 2185
},
{
"epoch": 0.22,
"learning_rate": 1.8085476643724768e-05,
"loss": 3.4867,
"step": 2190
},
{
"epoch": 0.23,
"learning_rate": 1.8075682400265146e-05,
"loss": 3.468,
"step": 2195
},
{
"epoch": 0.23,
"learning_rate": 1.8065865834496535e-05,
"loss": 3.4569,
"step": 2200
},
{
"epoch": 0.23,
"learning_rate": 1.805602697355328e-05,
"loss": 3.4577,
"step": 2205
},
{
"epoch": 0.23,
"learning_rate": 1.804616584463136e-05,
"loss": 3.4858,
"step": 2210
},
{
"epoch": 0.23,
"learning_rate": 1.8036282474988307e-05,
"loss": 3.4827,
"step": 2215
},
{
"epoch": 0.23,
"learning_rate": 1.8026376891943137e-05,
"loss": 3.5044,
"step": 2220
},
{
"epoch": 0.23,
"learning_rate": 1.8016449122876247e-05,
"loss": 3.4965,
"step": 2225
},
{
"epoch": 0.23,
"learning_rate": 1.800649919522938e-05,
"loss": 3.4853,
"step": 2230
},
{
"epoch": 0.23,
"learning_rate": 1.7996527136505516e-05,
"loss": 3.4952,
"step": 2235
},
{
"epoch": 0.23,
"learning_rate": 1.7986532974268814e-05,
"loss": 3.4914,
"step": 2240
},
{
"epoch": 0.23,
"learning_rate": 1.7976516736144524e-05,
"loss": 3.4836,
"step": 2245
},
{
"epoch": 0.23,
"learning_rate": 1.7966478449818925e-05,
"loss": 3.4622,
"step": 2250
},
{
"epoch": 0.23,
"learning_rate": 1.7956418143039232e-05,
"loss": 3.4965,
"step": 2255
},
{
"epoch": 0.23,
"learning_rate": 1.7946335843613533e-05,
"loss": 3.4663,
"step": 2260
},
{
"epoch": 0.23,
"learning_rate": 1.7936231579410707e-05,
"loss": 3.496,
"step": 2265
},
{
"epoch": 0.23,
"learning_rate": 1.792610537836035e-05,
"loss": 3.4688,
"step": 2270
},
{
"epoch": 0.23,
"learning_rate": 1.7915957268452678e-05,
"loss": 3.4422,
"step": 2275
},
{
"epoch": 0.23,
"learning_rate": 1.7905787277738483e-05,
"loss": 3.4605,
"step": 2280
},
{
"epoch": 0.23,
"learning_rate": 1.7895595434329037e-05,
"loss": 3.4842,
"step": 2285
},
{
"epoch": 0.24,
"learning_rate": 1.7885381766396008e-05,
"loss": 3.4694,
"step": 2290
},
{
"epoch": 0.24,
"learning_rate": 1.7875146302171398e-05,
"loss": 3.4923,
"step": 2295
},
{
"epoch": 0.24,
"learning_rate": 1.7864889069947448e-05,
"loss": 3.4477,
"step": 2300
},
{
"epoch": 0.24,
"learning_rate": 1.7854610098076577e-05,
"loss": 3.4722,
"step": 2305
},
{
"epoch": 0.24,
"learning_rate": 1.7844309414971296e-05,
"loss": 3.4378,
"step": 2310
},
{
"epoch": 0.24,
"learning_rate": 1.783398704910412e-05,
"loss": 3.467,
"step": 2315
},
{
"epoch": 0.24,
"learning_rate": 1.78236430290075e-05,
"loss": 3.4667,
"step": 2320
},
{
"epoch": 0.24,
"learning_rate": 1.781327738327376e-05,
"loss": 3.485,
"step": 2325
},
{
"epoch": 0.24,
"learning_rate": 1.780289014055497e-05,
"loss": 3.4708,
"step": 2330
},
{
"epoch": 0.24,
"learning_rate": 1.7792481329562923e-05,
"loss": 3.4718,
"step": 2335
},
{
"epoch": 0.24,
"learning_rate": 1.778205097906902e-05,
"loss": 3.4674,
"step": 2340
},
{
"epoch": 0.24,
"learning_rate": 1.7771599117904193e-05,
"loss": 3.4815,
"step": 2345
},
{
"epoch": 0.24,
"learning_rate": 1.7761125774958846e-05,
"loss": 3.4772,
"step": 2350
},
{
"epoch": 0.24,
"learning_rate": 1.775063097918275e-05,
"loss": 3.4501,
"step": 2355
},
{
"epoch": 0.24,
"learning_rate": 1.7740114759584983e-05,
"loss": 3.4611,
"step": 2360
},
{
"epoch": 0.24,
"learning_rate": 1.7729577145233835e-05,
"loss": 3.4335,
"step": 2365
},
{
"epoch": 0.24,
"learning_rate": 1.7719018165256745e-05,
"loss": 3.4715,
"step": 2370
},
{
"epoch": 0.24,
"learning_rate": 1.7708437848840193e-05,
"loss": 3.4592,
"step": 2375
},
{
"epoch": 0.24,
"learning_rate": 1.769783622522965e-05,
"loss": 3.4746,
"step": 2380
},
{
"epoch": 0.24,
"learning_rate": 1.768721332372947e-05,
"loss": 3.4731,
"step": 2385
},
{
"epoch": 0.25,
"learning_rate": 1.7676569173702844e-05,
"loss": 3.497,
"step": 2390
},
{
"epoch": 0.25,
"learning_rate": 1.7665903804571668e-05,
"loss": 3.4437,
"step": 2395
},
{
"epoch": 0.25,
"learning_rate": 1.7655217245816513e-05,
"loss": 3.4544,
"step": 2400
},
{
"epoch": 0.25,
"learning_rate": 1.7644509526976515e-05,
"loss": 3.4832,
"step": 2405
},
{
"epoch": 0.25,
"learning_rate": 1.763378067764929e-05,
"loss": 3.4588,
"step": 2410
},
{
"epoch": 0.25,
"learning_rate": 1.7623030727490875e-05,
"loss": 3.4574,
"step": 2415
},
{
"epoch": 0.25,
"learning_rate": 1.7612259706215626e-05,
"loss": 3.4369,
"step": 2420
},
{
"epoch": 0.25,
"learning_rate": 1.7601467643596142e-05,
"loss": 3.4789,
"step": 2425
},
{
"epoch": 0.25,
"learning_rate": 1.7590654569463186e-05,
"loss": 3.4798,
"step": 2430
},
{
"epoch": 0.25,
"learning_rate": 1.7579820513705596e-05,
"loss": 3.4592,
"step": 2435
},
{
"epoch": 0.25,
"learning_rate": 1.7568965506270212e-05,
"loss": 3.4747,
"step": 2440
},
{
"epoch": 0.25,
"learning_rate": 1.7558089577161783e-05,
"loss": 3.4782,
"step": 2445
},
{
"epoch": 0.25,
"learning_rate": 1.7547192756442887e-05,
"loss": 3.4625,
"step": 2450
},
{
"epoch": 0.25,
"learning_rate": 1.7536275074233854e-05,
"loss": 3.4597,
"step": 2455
},
{
"epoch": 0.25,
"learning_rate": 1.7525336560712675e-05,
"loss": 3.4495,
"step": 2460
},
{
"epoch": 0.25,
"learning_rate": 1.751437724611492e-05,
"loss": 3.4815,
"step": 2465
},
{
"epoch": 0.25,
"learning_rate": 1.750339716073366e-05,
"loss": 3.4729,
"step": 2470
},
{
"epoch": 0.25,
"learning_rate": 1.749239633491938e-05,
"loss": 3.4658,
"step": 2475
},
{
"epoch": 0.25,
"learning_rate": 1.748137479907989e-05,
"loss": 3.4374,
"step": 2480
},
{
"epoch": 0.26,
"learning_rate": 1.747033258368024e-05,
"loss": 3.476,
"step": 2485
},
{
"epoch": 0.26,
"learning_rate": 1.7459269719242665e-05,
"loss": 3.4594,
"step": 2490
},
{
"epoch": 0.26,
"learning_rate": 1.744818623634645e-05,
"loss": 3.493,
"step": 2495
},
{
"epoch": 0.26,
"learning_rate": 1.743708216562788e-05,
"loss": 3.4566,
"step": 2500
},
{
"epoch": 0.26,
"learning_rate": 1.742595753778016e-05,
"loss": 3.4497,
"step": 2505
},
{
"epoch": 0.26,
"learning_rate": 1.7414812383553297e-05,
"loss": 3.4345,
"step": 2510
},
{
"epoch": 0.26,
"learning_rate": 1.7403646733754057e-05,
"loss": 3.4414,
"step": 2515
},
{
"epoch": 0.26,
"learning_rate": 1.7392460619245842e-05,
"loss": 3.4439,
"step": 2520
},
{
"epoch": 0.26,
"learning_rate": 1.7381254070948635e-05,
"loss": 3.4548,
"step": 2525
},
{
"epoch": 0.26,
"learning_rate": 1.7370027119838884e-05,
"loss": 3.4665,
"step": 2530
},
{
"epoch": 0.26,
"learning_rate": 1.7358779796949447e-05,
"loss": 3.4637,
"step": 2535
},
{
"epoch": 0.26,
"learning_rate": 1.7347512133369494e-05,
"loss": 3.4169,
"step": 2540
},
{
"epoch": 0.26,
"learning_rate": 1.7336224160244404e-05,
"loss": 3.45,
"step": 2545
},
{
"epoch": 0.26,
"learning_rate": 1.7324915908775708e-05,
"loss": 3.4598,
"step": 2550
},
{
"epoch": 0.26,
"learning_rate": 1.7313587410220988e-05,
"loss": 3.4528,
"step": 2555
},
{
"epoch": 0.26,
"learning_rate": 1.7302238695893788e-05,
"loss": 3.4456,
"step": 2560
},
{
"epoch": 0.26,
"learning_rate": 1.7290869797163533e-05,
"loss": 3.4426,
"step": 2565
},
{
"epoch": 0.26,
"learning_rate": 1.7279480745455433e-05,
"loss": 3.4472,
"step": 2570
},
{
"epoch": 0.26,
"learning_rate": 1.726807157225042e-05,
"loss": 3.4715,
"step": 2575
},
{
"epoch": 0.26,
"learning_rate": 1.725664230908503e-05,
"loss": 3.4379,
"step": 2580
},
{
"epoch": 0.27,
"learning_rate": 1.7245192987551336e-05,
"loss": 3.4433,
"step": 2585
},
{
"epoch": 0.27,
"learning_rate": 1.7233723639296857e-05,
"loss": 3.4492,
"step": 2590
},
{
"epoch": 0.27,
"learning_rate": 1.722223429602446e-05,
"loss": 3.48,
"step": 2595
},
{
"epoch": 0.27,
"learning_rate": 1.7210724989492298e-05,
"loss": 3.4469,
"step": 2600
},
{
"epoch": 0.27,
"learning_rate": 1.7199195751513685e-05,
"loss": 3.4716,
"step": 2605
},
{
"epoch": 0.27,
"learning_rate": 1.718764661395704e-05,
"loss": 3.4365,
"step": 2610
},
{
"epoch": 0.27,
"learning_rate": 1.7176077608745788e-05,
"loss": 3.4319,
"step": 2615
},
{
"epoch": 0.27,
"learning_rate": 1.7164488767858262e-05,
"loss": 3.4571,
"step": 2620
},
{
"epoch": 0.27,
"learning_rate": 1.7152880123327636e-05,
"loss": 3.468,
"step": 2625
},
{
"epoch": 0.27,
"learning_rate": 1.714125170724182e-05,
"loss": 3.4563,
"step": 2630
},
{
"epoch": 0.27,
"learning_rate": 1.712960355174336e-05,
"loss": 3.4546,
"step": 2635
},
{
"epoch": 0.27,
"learning_rate": 1.7117935689029386e-05,
"loss": 3.4337,
"step": 2640
},
{
"epoch": 0.27,
"learning_rate": 1.7106248151351493e-05,
"loss": 3.4386,
"step": 2645
},
{
"epoch": 0.27,
"learning_rate": 1.7094540971015663e-05,
"loss": 3.4529,
"step": 2650
},
{
"epoch": 0.27,
"learning_rate": 1.7082814180382165e-05,
"loss": 3.434,
"step": 2655
},
{
"epoch": 0.27,
"learning_rate": 1.7071067811865477e-05,
"loss": 3.4692,
"step": 2660
},
{
"epoch": 0.27,
"learning_rate": 1.70593018979342e-05,
"loss": 3.456,
"step": 2665
},
{
"epoch": 0.27,
"learning_rate": 1.7047516471110953e-05,
"loss": 3.456,
"step": 2670
},
{
"epoch": 0.27,
"learning_rate": 1.7035711563972297e-05,
"loss": 3.4733,
"step": 2675
},
{
"epoch": 0.28,
"learning_rate": 1.7023887209148636e-05,
"loss": 3.4323,
"step": 2680
},
{
"epoch": 0.28,
"learning_rate": 1.7012043439324128e-05,
"loss": 3.4326,
"step": 2685
},
{
"epoch": 0.28,
"learning_rate": 1.70001802872366e-05,
"loss": 3.4657,
"step": 2690
},
{
"epoch": 0.28,
"learning_rate": 1.6988297785677458e-05,
"loss": 3.464,
"step": 2695
},
{
"epoch": 0.28,
"learning_rate": 1.6976395967491585e-05,
"loss": 3.451,
"step": 2700
},
{
"epoch": 0.28,
"learning_rate": 1.696447486557726e-05,
"loss": 3.4533,
"step": 2705
},
{
"epoch": 0.28,
"learning_rate": 1.695253451288607e-05,
"loss": 3.4042,
"step": 2710
},
{
"epoch": 0.28,
"learning_rate": 1.6940574942422807e-05,
"loss": 3.4533,
"step": 2715
},
{
"epoch": 0.28,
"learning_rate": 1.692859618724539e-05,
"loss": 3.4485,
"step": 2720
},
{
"epoch": 0.28,
"learning_rate": 1.691659828046476e-05,
"loss": 3.4082,
"step": 2725
},
{
"epoch": 0.28,
"learning_rate": 1.6904581255244802e-05,
"loss": 3.4368,
"step": 2730
},
{
"epoch": 0.28,
"learning_rate": 1.6892545144802245e-05,
"loss": 3.435,
"step": 2735
},
{
"epoch": 0.28,
"learning_rate": 1.6880489982406568e-05,
"loss": 3.453,
"step": 2740
},
{
"epoch": 0.28,
"learning_rate": 1.6868415801379918e-05,
"loss": 3.4251,
"step": 2745
},
{
"epoch": 0.28,
"learning_rate": 1.6856322635097013e-05,
"loss": 3.4618,
"step": 2750
},
{
"epoch": 0.28,
"learning_rate": 1.6844210516985043e-05,
"loss": 3.4494,
"step": 2755
},
{
"epoch": 0.28,
"learning_rate": 1.683207948052359e-05,
"loss": 3.4315,
"step": 2760
},
{
"epoch": 0.28,
"learning_rate": 1.6819929559244515e-05,
"loss": 3.442,
"step": 2765
},
{
"epoch": 0.28,
"learning_rate": 1.6807760786731905e-05,
"loss": 3.4317,
"step": 2770
},
{
"epoch": 0.28,
"learning_rate": 1.679557319662193e-05,
"loss": 3.4345,
"step": 2775
},
{
"epoch": 0.29,
"learning_rate": 1.678336682260278e-05,
"loss": 3.4628,
"step": 2780
},
{
"epoch": 0.29,
"learning_rate": 1.677114169841458e-05,
"loss": 3.4578,
"step": 2785
},
{
"epoch": 0.29,
"learning_rate": 1.6758897857849268e-05,
"loss": 3.4315,
"step": 2790
},
{
"epoch": 0.29,
"learning_rate": 1.674663533475052e-05,
"loss": 3.4531,
"step": 2795
},
{
"epoch": 0.29,
"learning_rate": 1.673435416301366e-05,
"loss": 3.431,
"step": 2800
},
{
"epoch": 0.29,
"learning_rate": 1.6722054376585547e-05,
"loss": 3.4282,
"step": 2805
},
{
"epoch": 0.29,
"learning_rate": 1.6709736009464504e-05,
"loss": 3.4283,
"step": 2810
},
{
"epoch": 0.29,
"learning_rate": 1.6697399095700216e-05,
"loss": 3.4503,
"step": 2815
},
{
"epoch": 0.29,
"learning_rate": 1.6685043669393622e-05,
"loss": 3.446,
"step": 2820
},
{
"epoch": 0.29,
"learning_rate": 1.6672669764696838e-05,
"loss": 3.4529,
"step": 2825
},
{
"epoch": 0.29,
"learning_rate": 1.666027741581306e-05,
"loss": 3.4525,
"step": 2830
},
{
"epoch": 0.29,
"learning_rate": 1.664786665699646e-05,
"loss": 3.4641,
"step": 2835
},
{
"epoch": 0.29,
"learning_rate": 1.6635437522552106e-05,
"loss": 3.4218,
"step": 2840
},
{
"epoch": 0.29,
"learning_rate": 1.6622990046835846e-05,
"loss": 3.4395,
"step": 2845
},
{
"epoch": 0.29,
"learning_rate": 1.661052426425424e-05,
"loss": 3.4622,
"step": 2850
},
{
"epoch": 0.29,
"learning_rate": 1.6598040209264445e-05,
"loss": 3.4136,
"step": 2855
},
{
"epoch": 0.29,
"learning_rate": 1.658553791637412e-05,
"loss": 3.4263,
"step": 2860
},
{
"epoch": 0.29,
"learning_rate": 1.6573017420141344e-05,
"loss": 3.4418,
"step": 2865
},
{
"epoch": 0.29,
"learning_rate": 1.6560478755174506e-05,
"loss": 3.4332,
"step": 2870
},
{
"epoch": 0.3,
"learning_rate": 1.6547921956132226e-05,
"loss": 3.4528,
"step": 2875
},
{
"epoch": 0.3,
"learning_rate": 1.6535347057723235e-05,
"loss": 3.4053,
"step": 2880
},
{
"epoch": 0.3,
"learning_rate": 1.6522754094706304e-05,
"loss": 3.4493,
"step": 2885
},
{
"epoch": 0.3,
"learning_rate": 1.6510143101890136e-05,
"loss": 3.4355,
"step": 2890
},
{
"epoch": 0.3,
"learning_rate": 1.6497514114133266e-05,
"loss": 3.413,
"step": 2895
},
{
"epoch": 0.3,
"learning_rate": 1.648486716634397e-05,
"loss": 3.4323,
"step": 2900
},
{
"epoch": 0.3,
"learning_rate": 1.6472202293480172e-05,
"loss": 3.473,
"step": 2905
},
{
"epoch": 0.3,
"learning_rate": 1.6459519530549345e-05,
"loss": 3.4457,
"step": 2910
},
{
"epoch": 0.3,
"learning_rate": 1.64468189126084e-05,
"loss": 3.4162,
"step": 2915
},
{
"epoch": 0.3,
"learning_rate": 1.6434100474763623e-05,
"loss": 3.432,
"step": 2920
},
{
"epoch": 0.3,
"learning_rate": 1.6421364252170534e-05,
"loss": 3.438,
"step": 2925
},
{
"epoch": 0.3,
"learning_rate": 1.640861028003383e-05,
"loss": 3.4596,
"step": 2930
},
{
"epoch": 0.3,
"learning_rate": 1.6395838593607263e-05,
"loss": 3.3828,
"step": 2935
},
{
"epoch": 0.3,
"learning_rate": 1.6383049228193545e-05,
"loss": 3.438,
"step": 2940
},
{
"epoch": 0.3,
"learning_rate": 1.6370242219144262e-05,
"loss": 3.4488,
"step": 2945
},
{
"epoch": 0.3,
"learning_rate": 1.6357417601859772e-05,
"loss": 3.4436,
"step": 2950
},
{
"epoch": 0.3,
"learning_rate": 1.6344575411789097e-05,
"loss": 3.4508,
"step": 2955
},
{
"epoch": 0.3,
"learning_rate": 1.6331715684429834e-05,
"loss": 3.41,
"step": 2960
},
{
"epoch": 0.3,
"learning_rate": 1.6318838455328057e-05,
"loss": 3.4267,
"step": 2965
},
{
"epoch": 0.3,
"learning_rate": 1.6305943760078226e-05,
"loss": 3.4105,
"step": 2970
},
{
"epoch": 0.31,
"learning_rate": 1.6293031634323065e-05,
"loss": 3.44,
"step": 2975
},
{
"epoch": 0.31,
"learning_rate": 1.628010211375348e-05,
"loss": 3.4041,
"step": 2980
},
{
"epoch": 0.31,
"learning_rate": 1.6267155234108474e-05,
"loss": 3.4008,
"step": 2985
},
{
"epoch": 0.31,
"learning_rate": 1.625419103117502e-05,
"loss": 3.4276,
"step": 2990
},
{
"epoch": 0.31,
"learning_rate": 1.6241209540787973e-05,
"loss": 3.4329,
"step": 2995
},
{
"epoch": 0.31,
"learning_rate": 1.6228210798829978e-05,
"loss": 3.4045,
"step": 3000
},
{
"epoch": 0.31,
"learning_rate": 1.6215194841231365e-05,
"loss": 3.4173,
"step": 3005
},
{
"epoch": 0.31,
"learning_rate": 1.6202161703970057e-05,
"loss": 3.4396,
"step": 3010
},
{
"epoch": 0.31,
"learning_rate": 1.618911142307145e-05,
"loss": 3.4001,
"step": 3015
},
{
"epoch": 0.31,
"learning_rate": 1.617604403460834e-05,
"loss": 3.4269,
"step": 3020
},
{
"epoch": 0.31,
"learning_rate": 1.6162959574700798e-05,
"loss": 3.4284,
"step": 3025
},
{
"epoch": 0.31,
"learning_rate": 1.6149858079516097e-05,
"loss": 3.4375,
"step": 3030
},
{
"epoch": 0.31,
"learning_rate": 1.6136739585268593e-05,
"loss": 3.4387,
"step": 3035
},
{
"epoch": 0.31,
"learning_rate": 1.612360412821962e-05,
"loss": 3.4332,
"step": 3040
},
{
"epoch": 0.31,
"learning_rate": 1.6110451744677415e-05,
"loss": 3.3909,
"step": 3045
},
{
"epoch": 0.31,
"learning_rate": 1.6097282470996997e-05,
"loss": 3.3885,
"step": 3050
},
{
"epoch": 0.31,
"learning_rate": 1.6084096343580056e-05,
"loss": 3.425,
"step": 3055
},
{
"epoch": 0.31,
"learning_rate": 1.60708933988749e-05,
"loss": 3.4276,
"step": 3060
},
{
"epoch": 0.31,
"learning_rate": 1.605767367337629e-05,
"loss": 3.4234,
"step": 3065
},
{
"epoch": 0.32,
"learning_rate": 1.604443720362539e-05,
"loss": 3.4213,
"step": 3070
},
{
"epoch": 0.32,
"learning_rate": 1.6031184026209642e-05,
"loss": 3.4176,
"step": 3075
},
{
"epoch": 0.32,
"learning_rate": 1.601791417776267e-05,
"loss": 3.4254,
"step": 3080
},
{
"epoch": 0.32,
"learning_rate": 1.6004627694964187e-05,
"loss": 3.3915,
"step": 3085
},
{
"epoch": 0.32,
"learning_rate": 1.599132461453987e-05,
"loss": 3.4298,
"step": 3090
},
{
"epoch": 0.32,
"learning_rate": 1.5978004973261286e-05,
"loss": 3.4137,
"step": 3095
},
{
"epoch": 0.32,
"learning_rate": 1.5964668807945777e-05,
"loss": 3.4123,
"step": 3100
},
{
"epoch": 0.32,
"learning_rate": 1.5951316155456358e-05,
"loss": 3.413,
"step": 3105
},
{
"epoch": 0.32,
"learning_rate": 1.5937947052701615e-05,
"loss": 3.4029,
"step": 3110
},
{
"epoch": 0.32,
"learning_rate": 1.592456153663561e-05,
"loss": 3.4254,
"step": 3115
},
{
"epoch": 0.32,
"learning_rate": 1.5911159644257765e-05,
"loss": 3.4178,
"step": 3120
},
{
"epoch": 0.32,
"learning_rate": 1.5897741412612782e-05,
"loss": 3.4116,
"step": 3125
},
{
"epoch": 0.32,
"learning_rate": 1.5884306878790512e-05,
"loss": 3.44,
"step": 3130
},
{
"epoch": 0.32,
"learning_rate": 1.5870856079925877e-05,
"loss": 3.4486,
"step": 3135
},
{
"epoch": 0.32,
"learning_rate": 1.5857389053198753e-05,
"loss": 3.4174,
"step": 3140
},
{
"epoch": 0.32,
"learning_rate": 1.584390583583388e-05,
"loss": 3.3858,
"step": 3145
},
{
"epoch": 0.32,
"learning_rate": 1.583040646510074e-05,
"loss": 3.4183,
"step": 3150
},
{
"epoch": 0.32,
"learning_rate": 1.5816890978313476e-05,
"loss": 3.4317,
"step": 3155
},
{
"epoch": 0.32,
"learning_rate": 1.5803359412830763e-05,
"loss": 3.4108,
"step": 3160
},
{
"epoch": 0.32,
"learning_rate": 1.578981180605574e-05,
"loss": 3.4329,
"step": 3165
},
{
"epoch": 0.33,
"learning_rate": 1.577624819543587e-05,
"loss": 3.4029,
"step": 3170
},
{
"epoch": 0.33,
"learning_rate": 1.576266861846286e-05,
"loss": 3.4199,
"step": 3175
},
{
"epoch": 0.33,
"learning_rate": 1.574907311267255e-05,
"loss": 3.3975,
"step": 3180
},
{
"epoch": 0.33,
"learning_rate": 1.573546171564481e-05,
"loss": 3.3935,
"step": 3185
},
{
"epoch": 0.33,
"learning_rate": 1.5721834465003425e-05,
"loss": 3.3933,
"step": 3190
},
{
"epoch": 0.33,
"learning_rate": 1.5708191398416023e-05,
"loss": 3.425,
"step": 3195
},
{
"epoch": 0.33,
"learning_rate": 1.5694532553593925e-05,
"loss": 3.4013,
"step": 3200
},
{
"epoch": 0.33,
"learning_rate": 1.5680857968292087e-05,
"loss": 3.4289,
"step": 3205
},
{
"epoch": 0.33,
"learning_rate": 1.566716768030896e-05,
"loss": 3.4144,
"step": 3210
},
{
"epoch": 0.33,
"learning_rate": 1.56534617274864e-05,
"loss": 3.416,
"step": 3215
},
{
"epoch": 0.33,
"learning_rate": 1.563974014770957e-05,
"loss": 3.4148,
"step": 3220
},
{
"epoch": 0.33,
"learning_rate": 1.5626002978906827e-05,
"loss": 3.421,
"step": 3225
},
{
"epoch": 0.33,
"learning_rate": 1.561225025904961e-05,
"loss": 3.4306,
"step": 3230
},
{
"epoch": 0.33,
"learning_rate": 1.5598482026152353e-05,
"loss": 3.4607,
"step": 3235
},
{
"epoch": 0.33,
"learning_rate": 1.5584698318272367e-05,
"loss": 3.417,
"step": 3240
},
{
"epoch": 0.33,
"learning_rate": 1.557089917350973e-05,
"loss": 3.3939,
"step": 3245
},
{
"epoch": 0.33,
"learning_rate": 1.5557084630007206e-05,
"loss": 3.3773,
"step": 3250
},
{
"epoch": 0.33,
"learning_rate": 1.5543254725950104e-05,
"loss": 3.4396,
"step": 3255
},
{
"epoch": 0.33,
"learning_rate": 1.552940949956621e-05,
"loss": 3.4436,
"step": 3260
},
{
"epoch": 0.34,
"learning_rate": 1.5515548989125654e-05,
"loss": 3.3934,
"step": 3265
},
{
"epoch": 0.34,
"learning_rate": 1.5501673232940807e-05,
"loss": 3.4003,
"step": 3270
},
{
"epoch": 0.34,
"learning_rate": 1.54877822693662e-05,
"loss": 3.4258,
"step": 3275
},
{
"epoch": 0.34,
"learning_rate": 1.5473876136798374e-05,
"loss": 3.4066,
"step": 3280
},
{
"epoch": 0.34,
"learning_rate": 1.5459954873675825e-05,
"loss": 3.4054,
"step": 3285
},
{
"epoch": 0.34,
"learning_rate": 1.544601851847885e-05,
"loss": 3.4156,
"step": 3290
},
{
"epoch": 0.34,
"learning_rate": 1.543206710972948e-05,
"loss": 3.415,
"step": 3295
},
{
"epoch": 0.34,
"learning_rate": 1.5418100685991344e-05,
"loss": 3.4014,
"step": 3300
},
{
"epoch": 0.34,
"learning_rate": 1.5404119285869584e-05,
"loss": 3.3938,
"step": 3305
},
{
"epoch": 0.34,
"learning_rate": 1.539012294801073e-05,
"loss": 3.4192,
"step": 3310
},
{
"epoch": 0.34,
"learning_rate": 1.5376111711102604e-05,
"loss": 3.3927,
"step": 3315
},
{
"epoch": 0.34,
"learning_rate": 1.536208561387422e-05,
"loss": 3.4204,
"step": 3320
},
{
"epoch": 0.34,
"learning_rate": 1.5348044695095653e-05,
"loss": 3.4058,
"step": 3325
},
{
"epoch": 0.34,
"learning_rate": 1.5333988993577958e-05,
"loss": 3.3966,
"step": 3330
},
{
"epoch": 0.34,
"learning_rate": 1.5319918548173053e-05,
"loss": 3.4233,
"step": 3335
},
{
"epoch": 0.34,
"learning_rate": 1.5305833397773596e-05,
"loss": 3.4064,
"step": 3340
},
{
"epoch": 0.34,
"learning_rate": 1.529173358131291e-05,
"loss": 3.4137,
"step": 3345
},
{
"epoch": 0.34,
"learning_rate": 1.5277619137764843e-05,
"loss": 3.3975,
"step": 3350
},
{
"epoch": 0.34,
"learning_rate": 1.5263490106143684e-05,
"loss": 3.4172,
"step": 3355
},
{
"epoch": 0.34,
"learning_rate": 1.5249346525504032e-05,
"loss": 3.4074,
"step": 3360
},
{
"epoch": 0.35,
"learning_rate": 1.5235188434940717e-05,
"loss": 3.4161,
"step": 3365
},
{
"epoch": 0.35,
"learning_rate": 1.5221015873588672e-05,
"loss": 3.4484,
"step": 3370
},
{
"epoch": 0.35,
"learning_rate": 1.5206828880622821e-05,
"loss": 3.3988,
"step": 3375
},
{
"epoch": 0.35,
"learning_rate": 1.5192627495257992e-05,
"loss": 3.4127,
"step": 3380
},
{
"epoch": 0.35,
"learning_rate": 1.5178411756748781e-05,
"loss": 3.4079,
"step": 3385
},
{
"epoch": 0.35,
"learning_rate": 1.5164181704389471e-05,
"loss": 3.3974,
"step": 3390
},
{
"epoch": 0.35,
"learning_rate": 1.5149937377513904e-05,
"loss": 3.4187,
"step": 3395
},
{
"epoch": 0.35,
"learning_rate": 1.5135678815495381e-05,
"loss": 3.4162,
"step": 3400
},
{
"epoch": 0.35,
"learning_rate": 1.5121406057746546e-05,
"loss": 3.4463,
"step": 3405
},
{
"epoch": 0.35,
"learning_rate": 1.510711914371929e-05,
"loss": 3.399,
"step": 3410
},
{
"epoch": 0.35,
"learning_rate": 1.5092818112904628e-05,
"loss": 3.3848,
"step": 3415
},
{
"epoch": 0.35,
"learning_rate": 1.5078503004832599e-05,
"loss": 3.4137,
"step": 3420
},
{
"epoch": 0.35,
"learning_rate": 1.506417385907215e-05,
"loss": 3.4264,
"step": 3425
},
{
"epoch": 0.35,
"learning_rate": 1.5049830715231038e-05,
"loss": 3.3953,
"step": 3430
},
{
"epoch": 0.35,
"learning_rate": 1.5035473612955697e-05,
"loss": 3.3849,
"step": 3435
},
{
"epoch": 0.35,
"learning_rate": 1.502110259193116e-05,
"loss": 3.4107,
"step": 3440
},
{
"epoch": 0.35,
"learning_rate": 1.500671769188093e-05,
"loss": 3.3866,
"step": 3445
},
{
"epoch": 0.35,
"learning_rate": 1.4992318952566862e-05,
"loss": 3.4107,
"step": 3450
},
{
"epoch": 0.35,
"learning_rate": 1.497790641378908e-05,
"loss": 3.4024,
"step": 3455
},
{
"epoch": 0.36,
"learning_rate": 1.4963480115385847e-05,
"loss": 3.3854,
"step": 3460
},
{
"epoch": 0.36,
"learning_rate": 1.4949040097233453e-05,
"loss": 3.3878,
"step": 3465
},
{
"epoch": 0.36,
"learning_rate": 1.4934586399246116e-05,
"loss": 3.4319,
"step": 3470
},
{
"epoch": 0.36,
"learning_rate": 1.4920119061375868e-05,
"loss": 3.3934,
"step": 3475
},
{
"epoch": 0.36,
"learning_rate": 1.4905638123612443e-05,
"loss": 3.4121,
"step": 3480
},
{
"epoch": 0.36,
"learning_rate": 1.4891143625983169e-05,
"loss": 3.4061,
"step": 3485
},
{
"epoch": 0.36,
"learning_rate": 1.4876635608552845e-05,
"loss": 3.4153,
"step": 3490
},
{
"epoch": 0.36,
"learning_rate": 1.4862114111423658e-05,
"loss": 3.3985,
"step": 3495
},
{
"epoch": 0.36,
"learning_rate": 1.4847579174735036e-05,
"loss": 3.4101,
"step": 3500
},
{
"epoch": 0.36,
"learning_rate": 1.483303083866357e-05,
"loss": 3.4199,
"step": 3505
},
{
"epoch": 0.36,
"learning_rate": 1.4818469143422882e-05,
"loss": 3.3797,
"step": 3510
},
{
"epoch": 0.36,
"learning_rate": 1.4803894129263527e-05,
"loss": 3.4023,
"step": 3515
},
{
"epoch": 0.36,
"learning_rate": 1.4789305836472865e-05,
"loss": 3.3979,
"step": 3520
},
{
"epoch": 0.36,
"learning_rate": 1.4774704305374968e-05,
"loss": 3.3845,
"step": 3525
},
{
"epoch": 0.36,
"learning_rate": 1.4760089576330493e-05,
"loss": 3.4168,
"step": 3530
},
{
"epoch": 0.36,
"learning_rate": 1.4745461689736592e-05,
"loss": 3.4011,
"step": 3535
},
{
"epoch": 0.36,
"learning_rate": 1.4730820686026773e-05,
"loss": 3.3818,
"step": 3540
},
{
"epoch": 0.36,
"learning_rate": 1.4716166605670806e-05,
"loss": 3.3813,
"step": 3545
},
{
"epoch": 0.36,
"learning_rate": 1.4701499489174604e-05,
"loss": 3.4347,
"step": 3550
},
{
"epoch": 0.36,
"learning_rate": 1.4686819377080123e-05,
"loss": 3.3986,
"step": 3555
},
{
"epoch": 0.37,
"learning_rate": 1.4672126309965226e-05,
"loss": 3.3861,
"step": 3560
},
{
"epoch": 0.37,
"learning_rate": 1.46574203284436e-05,
"loss": 3.3919,
"step": 3565
},
{
"epoch": 0.37,
"learning_rate": 1.4642701473164618e-05,
"loss": 3.4078,
"step": 3570
},
{
"epoch": 0.37,
"learning_rate": 1.4627969784813247e-05,
"loss": 3.4109,
"step": 3575
},
{
"epoch": 0.37,
"learning_rate": 1.4613225304109917e-05,
"loss": 3.3927,
"step": 3580
},
{
"epoch": 0.37,
"learning_rate": 1.4598468071810425e-05,
"loss": 3.378,
"step": 3585
},
{
"epoch": 0.37,
"learning_rate": 1.4583698128705815e-05,
"loss": 3.4147,
"step": 3590
},
{
"epoch": 0.37,
"learning_rate": 1.456891551562226e-05,
"loss": 3.4059,
"step": 3595
},
{
"epoch": 0.37,
"learning_rate": 1.455412027342096e-05,
"loss": 3.3561,
"step": 3600
},
{
"epoch": 0.37,
"learning_rate": 1.4539312442998019e-05,
"loss": 3.4308,
"step": 3605
},
{
"epoch": 0.37,
"learning_rate": 1.4524492065284344e-05,
"loss": 3.3608,
"step": 3610
},
{
"epoch": 0.37,
"learning_rate": 1.4509659181245512e-05,
"loss": 3.4004,
"step": 3615
},
{
"epoch": 0.37,
"learning_rate": 1.4494813831881687e-05,
"loss": 3.3856,
"step": 3620
},
{
"epoch": 0.37,
"learning_rate": 1.4479956058227474e-05,
"loss": 3.3773,
"step": 3625
},
{
"epoch": 0.37,
"learning_rate": 1.4465085901351819e-05,
"loss": 3.3811,
"step": 3630
},
{
"epoch": 0.37,
"learning_rate": 1.445020340235791e-05,
"loss": 3.3973,
"step": 3635
},
{
"epoch": 0.37,
"learning_rate": 1.4435308602383043e-05,
"loss": 3.3748,
"step": 3640
},
{
"epoch": 0.37,
"learning_rate": 1.4420401542598514e-05,
"loss": 3.4191,
"step": 3645
},
{
"epoch": 0.37,
"learning_rate": 1.4405482264209512e-05,
"loss": 3.4051,
"step": 3650
},
{
"epoch": 0.38,
"learning_rate": 1.4390550808454993e-05,
"loss": 3.4239,
"step": 3655
},
{
"epoch": 0.38,
"learning_rate": 1.437560721660758e-05,
"loss": 3.4045,
"step": 3660
},
{
"epoch": 0.38,
"learning_rate": 1.4360651529973435e-05,
"loss": 3.3648,
"step": 3665
},
{
"epoch": 0.38,
"learning_rate": 1.434568378989216e-05,
"loss": 3.3625,
"step": 3670
},
{
"epoch": 0.38,
"learning_rate": 1.4330704037736665e-05,
"loss": 3.4145,
"step": 3675
},
{
"epoch": 0.38,
"learning_rate": 1.431571231491307e-05,
"loss": 3.3903,
"step": 3680
},
{
"epoch": 0.38,
"learning_rate": 1.4300708662860585e-05,
"loss": 3.4044,
"step": 3685
},
{
"epoch": 0.38,
"learning_rate": 1.4285693123051385e-05,
"loss": 3.4025,
"step": 3690
},
{
"epoch": 0.38,
"learning_rate": 1.4270665736990509e-05,
"loss": 3.3918,
"step": 3695
},
{
"epoch": 0.38,
"learning_rate": 1.4255626546215746e-05,
"loss": 3.3852,
"step": 3700
},
{
"epoch": 0.38,
"learning_rate": 1.4240575592297508e-05,
"loss": 3.3773,
"step": 3705
},
{
"epoch": 0.38,
"learning_rate": 1.4225512916838726e-05,
"loss": 3.4109,
"step": 3710
},
{
"epoch": 0.38,
"learning_rate": 1.4210438561474726e-05,
"loss": 3.398,
"step": 3715
},
{
"epoch": 0.38,
"learning_rate": 1.4195352567873124e-05,
"loss": 3.3736,
"step": 3720
},
{
"epoch": 0.38,
"learning_rate": 1.4180254977733703e-05,
"loss": 3.3859,
"step": 3725
},
{
"epoch": 0.38,
"learning_rate": 1.4165145832788305e-05,
"loss": 3.4007,
"step": 3730
},
{
"epoch": 0.38,
"learning_rate": 1.4150025174800704e-05,
"loss": 3.3933,
"step": 3735
},
{
"epoch": 0.38,
"learning_rate": 1.41348930455665e-05,
"loss": 3.4343,
"step": 3740
},
{
"epoch": 0.38,
"learning_rate": 1.4119749486913006e-05,
"loss": 3.3927,
"step": 3745
},
{
"epoch": 0.38,
"learning_rate": 1.4104594540699122e-05,
"loss": 3.3558,
"step": 3750
},
{
"epoch": 0.39,
"learning_rate": 1.4089428248815224e-05,
"loss": 3.3754,
"step": 3755
},
{
"epoch": 0.39,
"learning_rate": 1.4074250653183055e-05,
"loss": 3.3841,
"step": 3760
},
{
"epoch": 0.39,
"learning_rate": 1.4059061795755598e-05,
"loss": 3.4171,
"step": 3765
},
{
"epoch": 0.39,
"learning_rate": 1.4043861718516964e-05,
"loss": 3.3985,
"step": 3770
},
{
"epoch": 0.39,
"learning_rate": 1.4028650463482287e-05,
"loss": 3.3983,
"step": 3775
},
{
"epoch": 0.39,
"learning_rate": 1.4013428072697584e-05,
"loss": 3.3941,
"step": 3780
},
{
"epoch": 0.39,
"learning_rate": 1.3998194588239662e-05,
"loss": 3.3649,
"step": 3785
},
{
"epoch": 0.39,
"learning_rate": 1.398295005221599e-05,
"loss": 3.39,
"step": 3790
},
{
"epoch": 0.39,
"learning_rate": 1.3967694506764586e-05,
"loss": 3.392,
"step": 3795
},
{
"epoch": 0.39,
"learning_rate": 1.39524279940539e-05,
"loss": 3.4054,
"step": 3800
},
{
"epoch": 0.39,
"learning_rate": 1.3937150556282692e-05,
"loss": 3.383,
"step": 3805
},
{
"epoch": 0.39,
"learning_rate": 1.3921862235679929e-05,
"loss": 3.3944,
"step": 3810
},
{
"epoch": 0.39,
"learning_rate": 1.390656307450465e-05,
"loss": 3.4016,
"step": 3815
},
{
"epoch": 0.39,
"learning_rate": 1.3891253115045867e-05,
"loss": 3.3936,
"step": 3820
},
{
"epoch": 0.39,
"learning_rate": 1.3875932399622434e-05,
"loss": 3.4001,
"step": 3825
},
{
"epoch": 0.39,
"learning_rate": 1.386060097058294e-05,
"loss": 3.4204,
"step": 3830
},
{
"epoch": 0.39,
"learning_rate": 1.3845258870305587e-05,
"loss": 3.4111,
"step": 3835
},
{
"epoch": 0.39,
"learning_rate": 1.3829906141198076e-05,
"loss": 3.3971,
"step": 3840
},
{
"epoch": 0.39,
"learning_rate": 1.3814542825697476e-05,
"loss": 3.3963,
"step": 3845
},
{
"epoch": 0.4,
"learning_rate": 1.3799168966270139e-05,
"loss": 3.3946,
"step": 3850
},
{
"epoch": 0.4,
"learning_rate": 1.3783784605411539e-05,
"loss": 3.4239,
"step": 3855
},
{
"epoch": 0.4,
"learning_rate": 1.3768389785646196e-05,
"loss": 3.3657,
"step": 3860
},
{
"epoch": 0.4,
"learning_rate": 1.3752984549527529e-05,
"loss": 3.3875,
"step": 3865
},
{
"epoch": 0.4,
"learning_rate": 1.3737568939637753e-05,
"loss": 3.4093,
"step": 3870
},
{
"epoch": 0.4,
"learning_rate": 1.3722142998587757e-05,
"loss": 3.3958,
"step": 3875
},
{
"epoch": 0.4,
"learning_rate": 1.3706706769016991e-05,
"loss": 3.3936,
"step": 3880
},
{
"epoch": 0.4,
"learning_rate": 1.3691260293593332e-05,
"loss": 3.4068,
"step": 3885
},
{
"epoch": 0.4,
"learning_rate": 1.3675803615012993e-05,
"loss": 3.4127,
"step": 3890
},
{
"epoch": 0.4,
"learning_rate": 1.3660336776000379e-05,
"loss": 3.3843,
"step": 3895
},
{
"epoch": 0.4,
"learning_rate": 1.364485981930798e-05,
"loss": 3.3413,
"step": 3900
},
{
"epoch": 0.4,
"learning_rate": 1.3629372787716264e-05,
"loss": 3.3736,
"step": 3905
},
{
"epoch": 0.4,
"learning_rate": 1.3613875724033536e-05,
"loss": 3.4248,
"step": 3910
},
{
"epoch": 0.4,
"learning_rate": 1.3598368671095835e-05,
"loss": 3.3881,
"step": 3915
},
{
"epoch": 0.4,
"learning_rate": 1.3582851671766808e-05,
"loss": 3.4194,
"step": 3920
},
{
"epoch": 0.4,
"learning_rate": 1.3567324768937603e-05,
"loss": 3.348,
"step": 3925
},
{
"epoch": 0.4,
"learning_rate": 1.3551788005526738e-05,
"loss": 3.4205,
"step": 3930
},
{
"epoch": 0.4,
"learning_rate": 1.3536241424479985e-05,
"loss": 3.3955,
"step": 3935
},
{
"epoch": 0.4,
"learning_rate": 1.352068506877026e-05,
"loss": 3.3848,
"step": 3940
},
{
"epoch": 0.4,
"learning_rate": 1.3505118981397485e-05,
"loss": 3.4094,
"step": 3945
},
{
"epoch": 0.41,
"learning_rate": 1.3489543205388498e-05,
"loss": 3.3731,
"step": 3950
},
{
"epoch": 0.41,
"learning_rate": 1.3473957783796907e-05,
"loss": 3.3853,
"step": 3955
},
{
"epoch": 0.41,
"learning_rate": 1.345836275970298e-05,
"loss": 3.3809,
"step": 3960
},
{
"epoch": 0.41,
"learning_rate": 1.3442758176213539e-05,
"loss": 3.4384,
"step": 3965
},
{
"epoch": 0.41,
"learning_rate": 1.3427144076461818e-05,
"loss": 3.398,
"step": 3970
},
{
"epoch": 0.41,
"learning_rate": 1.341152050360736e-05,
"loss": 3.3624,
"step": 3975
},
{
"epoch": 0.41,
"learning_rate": 1.3395887500835894e-05,
"loss": 3.3804,
"step": 3980
},
{
"epoch": 0.41,
"learning_rate": 1.338024511135921e-05,
"loss": 3.3796,
"step": 3985
},
{
"epoch": 0.41,
"learning_rate": 1.3364593378415054e-05,
"loss": 3.3582,
"step": 3990
},
{
"epoch": 0.41,
"learning_rate": 1.3348932345266987e-05,
"loss": 3.3812,
"step": 3995
},
{
"epoch": 0.41,
"learning_rate": 1.3333262055204284e-05,
"loss": 3.3918,
"step": 4000
},
{
"epoch": 0.41,
"learning_rate": 1.33175825515418e-05,
"loss": 3.3735,
"step": 4005
},
{
"epoch": 0.41,
"learning_rate": 1.3301893877619874e-05,
"loss": 3.4029,
"step": 4010
},
{
"epoch": 0.41,
"learning_rate": 1.3286196076804174e-05,
"loss": 3.3907,
"step": 4015
},
{
"epoch": 0.41,
"learning_rate": 1.3270489192485606e-05,
"loss": 3.3601,
"step": 4020
},
{
"epoch": 0.41,
"learning_rate": 1.3254773268080182e-05,
"loss": 3.3961,
"step": 4025
},
{
"epoch": 0.41,
"learning_rate": 1.32390483470289e-05,
"loss": 3.3803,
"step": 4030
},
{
"epoch": 0.41,
"learning_rate": 1.3223314472797632e-05,
"loss": 3.3675,
"step": 4035
},
{
"epoch": 0.41,
"learning_rate": 1.3207571688876994e-05,
"loss": 3.3571,
"step": 4040
},
{
"epoch": 0.42,
"learning_rate": 1.3191820038782228e-05,
"loss": 3.3868,
"step": 4045
},
{
"epoch": 0.42,
"learning_rate": 1.3176059566053083e-05,
"loss": 3.3573,
"step": 4050
},
{
"epoch": 0.42,
"learning_rate": 1.31602903142537e-05,
"loss": 3.3634,
"step": 4055
},
{
"epoch": 0.42,
"learning_rate": 1.3144512326972485e-05,
"loss": 3.3925,
"step": 4060
},
{
"epoch": 0.42,
"learning_rate": 1.3128725647821984e-05,
"loss": 3.3932,
"step": 4065
},
{
"epoch": 0.42,
"learning_rate": 1.3112930320438774e-05,
"loss": 3.3967,
"step": 4070
},
{
"epoch": 0.42,
"learning_rate": 1.3097126388483342e-05,
"loss": 3.3996,
"step": 4075
},
{
"epoch": 0.42,
"learning_rate": 1.3081313895639945e-05,
"loss": 3.4227,
"step": 4080
},
{
"epoch": 0.42,
"learning_rate": 1.3065492885616518e-05,
"loss": 3.3741,
"step": 4085
},
{
"epoch": 0.42,
"learning_rate": 1.3049663402144528e-05,
"loss": 3.3489,
"step": 4090
},
{
"epoch": 0.42,
"learning_rate": 1.3033825488978868e-05,
"loss": 3.3507,
"step": 4095
},
{
"epoch": 0.42,
"learning_rate": 1.3017979189897738e-05,
"loss": 3.3763,
"step": 4100
},
{
"epoch": 0.42,
"learning_rate": 1.300212454870251e-05,
"loss": 3.3914,
"step": 4105
},
{
"epoch": 0.42,
"learning_rate": 1.2986261609217612e-05,
"loss": 3.3686,
"step": 4110
},
{
"epoch": 0.42,
"learning_rate": 1.2970390415290416e-05,
"loss": 3.3706,
"step": 4115
},
{
"epoch": 0.42,
"learning_rate": 1.2954511010791111e-05,
"loss": 3.3742,
"step": 4120
},
{
"epoch": 0.42,
"learning_rate": 1.2938623439612581e-05,
"loss": 3.3872,
"step": 4125
},
{
"epoch": 0.42,
"learning_rate": 1.2922727745670276e-05,
"loss": 3.3631,
"step": 4130
},
{
"epoch": 0.42,
"learning_rate": 1.2906823972902105e-05,
"loss": 3.3563,
"step": 4135
},
{
"epoch": 0.42,
"learning_rate": 1.2890912165268315e-05,
"loss": 3.3495,
"step": 4140
},
{
"epoch": 0.43,
"learning_rate": 1.2874992366751342e-05,
"loss": 3.3806,
"step": 4145
},
{
"epoch": 0.43,
"learning_rate": 1.2859064621355735e-05,
"loss": 3.378,
"step": 4150
},
{
"epoch": 0.43,
"learning_rate": 1.2843128973107988e-05,
"loss": 3.332,
"step": 4155
},
{
"epoch": 0.43,
"learning_rate": 1.282718546605645e-05,
"loss": 3.3848,
"step": 4160
},
{
"epoch": 0.43,
"learning_rate": 1.2811234144271193e-05,
"loss": 3.3649,
"step": 4165
},
{
"epoch": 0.43,
"learning_rate": 1.2795275051843893e-05,
"loss": 3.3807,
"step": 4170
},
{
"epoch": 0.43,
"learning_rate": 1.2779308232887692e-05,
"loss": 3.3834,
"step": 4175
},
{
"epoch": 0.43,
"learning_rate": 1.2763333731537102e-05,
"loss": 3.3647,
"step": 4180
},
{
"epoch": 0.43,
"learning_rate": 1.2747351591947862e-05,
"loss": 3.3481,
"step": 4185
},
{
"epoch": 0.43,
"learning_rate": 1.2731361858296833e-05,
"loss": 3.3643,
"step": 4190
},
{
"epoch": 0.43,
"learning_rate": 1.2715364574781864e-05,
"loss": 3.3734,
"step": 4195
},
{
"epoch": 0.43,
"learning_rate": 1.2699359785621663e-05,
"loss": 3.3656,
"step": 4200
},
{
"epoch": 0.43,
"learning_rate": 1.2683347535055694e-05,
"loss": 3.3747,
"step": 4205
},
{
"epoch": 0.43,
"learning_rate": 1.266732786734405e-05,
"loss": 3.3535,
"step": 4210
},
{
"epoch": 0.43,
"learning_rate": 1.2651300826767317e-05,
"loss": 3.332,
"step": 4215
},
{
"epoch": 0.43,
"learning_rate": 1.2635266457626461e-05,
"loss": 3.3766,
"step": 4220
},
{
"epoch": 0.43,
"learning_rate": 1.261922480424271e-05,
"loss": 3.372,
"step": 4225
},
{
"epoch": 0.43,
"learning_rate": 1.260317591095742e-05,
"loss": 3.3389,
"step": 4230
},
{
"epoch": 0.43,
"learning_rate": 1.2587119822131975e-05,
"loss": 3.3996,
"step": 4235
},
{
"epoch": 0.44,
"learning_rate": 1.2571056582147625e-05,
"loss": 3.3387,
"step": 4240
},
{
"epoch": 0.44,
"learning_rate": 1.2554986235405402e-05,
"loss": 3.36,
"step": 4245
},
{
"epoch": 0.44,
"learning_rate": 1.253890882632598e-05,
"loss": 3.3494,
"step": 4250
},
{
"epoch": 0.44,
"learning_rate": 1.2522824399349553e-05,
"loss": 3.3745,
"step": 4255
},
{
"epoch": 0.44,
"learning_rate": 1.2506732998935717e-05,
"loss": 3.3513,
"step": 4260
},
{
"epoch": 0.44,
"learning_rate": 1.2490634669563338e-05,
"loss": 3.376,
"step": 4265
},
{
"epoch": 0.44,
"learning_rate": 1.2474529455730429e-05,
"loss": 3.3596,
"step": 4270
},
{
"epoch": 0.44,
"learning_rate": 1.2458417401954048e-05,
"loss": 3.3764,
"step": 4275
},
{
"epoch": 0.44,
"learning_rate": 1.2442298552770151e-05,
"loss": 3.3739,
"step": 4280
},
{
"epoch": 0.44,
"learning_rate": 1.2426172952733482e-05,
"loss": 3.3721,
"step": 4285
},
{
"epoch": 0.44,
"learning_rate": 1.2410040646417431e-05,
"loss": 3.3646,
"step": 4290
},
{
"epoch": 0.44,
"learning_rate": 1.2393901678413944e-05,
"loss": 3.3435,
"step": 4295
},
{
"epoch": 0.44,
"learning_rate": 1.2377756093333371e-05,
"loss": 3.383,
"step": 4300
},
{
"epoch": 0.44,
"learning_rate": 1.2361603935804357e-05,
"loss": 3.3563,
"step": 4305
},
{
"epoch": 0.44,
"learning_rate": 1.2345445250473702e-05,
"loss": 3.3686,
"step": 4310
},
{
"epoch": 0.44,
"learning_rate": 1.2329280082006268e-05,
"loss": 3.3408,
"step": 4315
},
{
"epoch": 0.44,
"learning_rate": 1.2313108475084823e-05,
"loss": 3.3663,
"step": 4320
},
{
"epoch": 0.44,
"learning_rate": 1.2296930474409943e-05,
"loss": 3.3789,
"step": 4325
},
{
"epoch": 0.44,
"learning_rate": 1.2280746124699864e-05,
"loss": 3.3563,
"step": 4330
},
{
"epoch": 0.45,
"learning_rate": 1.2264555470690381e-05,
"loss": 3.3576,
"step": 4335
},
{
"epoch": 0.45,
"learning_rate": 1.2248358557134714e-05,
"loss": 3.3725,
"step": 4340
},
{
"epoch": 0.45,
"learning_rate": 1.2232155428803387e-05,
"loss": 3.3735,
"step": 4345
},
{
"epoch": 0.45,
"learning_rate": 1.2215946130484096e-05,
"loss": 3.386,
"step": 4350
},
{
"epoch": 0.45,
"learning_rate": 1.2199730706981594e-05,
"loss": 3.3951,
"step": 4355
},
{
"epoch": 0.45,
"learning_rate": 1.218350920311757e-05,
"loss": 3.3483,
"step": 4360
},
{
"epoch": 0.45,
"learning_rate": 1.2167281663730512e-05,
"loss": 3.3281,
"step": 4365
},
{
"epoch": 0.45,
"learning_rate": 1.21510481336756e-05,
"loss": 3.3764,
"step": 4370
},
{
"epoch": 0.45,
"learning_rate": 1.2134808657824564e-05,
"loss": 3.3487,
"step": 4375
},
{
"epoch": 0.45,
"learning_rate": 1.2118563281065574e-05,
"loss": 3.3574,
"step": 4380
},
{
"epoch": 0.45,
"learning_rate": 1.2102312048303111e-05,
"loss": 3.3336,
"step": 4385
},
{
"epoch": 0.45,
"learning_rate": 1.2086055004457844e-05,
"loss": 3.3471,
"step": 4390
},
{
"epoch": 0.45,
"learning_rate": 1.2069792194466499e-05,
"loss": 3.3387,
"step": 4395
},
{
"epoch": 0.45,
"learning_rate": 1.2053523663281745e-05,
"loss": 3.3496,
"step": 4400
},
{
"epoch": 0.45,
"learning_rate": 1.2037249455872065e-05,
"loss": 3.3761,
"step": 4405
},
{
"epoch": 0.45,
"learning_rate": 1.2020969617221627e-05,
"loss": 3.3492,
"step": 4410
},
{
"epoch": 0.45,
"learning_rate": 1.2004684192330176e-05,
"loss": 3.3398,
"step": 4415
},
{
"epoch": 0.45,
"learning_rate": 1.1988393226212884e-05,
"loss": 3.3646,
"step": 4420
},
{
"epoch": 0.45,
"learning_rate": 1.1972096763900252e-05,
"loss": 3.41,
"step": 4425
},
{
"epoch": 0.45,
"learning_rate": 1.1955794850437962e-05,
"loss": 3.3711,
"step": 4430
},
{
"epoch": 0.46,
"learning_rate": 1.1939487530886776e-05,
"loss": 3.371,
"step": 4435
},
{
"epoch": 0.46,
"learning_rate": 1.1923174850322385e-05,
"loss": 3.347,
"step": 4440
},
{
"epoch": 0.46,
"learning_rate": 1.1906856853835312e-05,
"loss": 3.3773,
"step": 4445
},
{
"epoch": 0.46,
"learning_rate": 1.1890533586530766e-05,
"loss": 3.3762,
"step": 4450
},
{
"epoch": 0.46,
"learning_rate": 1.1874205093528525e-05,
"loss": 3.3312,
"step": 4455
},
{
"epoch": 0.46,
"learning_rate": 1.1857871419962823e-05,
"loss": 3.3581,
"step": 4460
},
{
"epoch": 0.46,
"learning_rate": 1.1841532610982194e-05,
"loss": 3.3483,
"step": 4465
},
{
"epoch": 0.46,
"learning_rate": 1.182518871174938e-05,
"loss": 3.3597,
"step": 4470
},
{
"epoch": 0.46,
"learning_rate": 1.1808839767441196e-05,
"loss": 3.3726,
"step": 4475
},
{
"epoch": 0.46,
"learning_rate": 1.1792485823248396e-05,
"loss": 3.3494,
"step": 4480
},
{
"epoch": 0.46,
"learning_rate": 1.1776126924375553e-05,
"loss": 3.3468,
"step": 4485
},
{
"epoch": 0.46,
"learning_rate": 1.1759763116040936e-05,
"loss": 3.3237,
"step": 4490
},
{
"epoch": 0.46,
"learning_rate": 1.174339444347639e-05,
"loss": 3.3899,
"step": 4495
},
{
"epoch": 0.46,
"learning_rate": 1.1727020951927206e-05,
"loss": 3.3624,
"step": 4500
},
{
"epoch": 0.46,
"learning_rate": 1.1710642686651981e-05,
"loss": 3.3505,
"step": 4505
},
{
"epoch": 0.46,
"learning_rate": 1.1694259692922525e-05,
"loss": 3.403,
"step": 4510
},
{
"epoch": 0.46,
"learning_rate": 1.1677872016023707e-05,
"loss": 3.3984,
"step": 4515
},
{
"epoch": 0.46,
"learning_rate": 1.1661479701253348e-05,
"loss": 3.3749,
"step": 4520
},
{
"epoch": 0.46,
"learning_rate": 1.1645082793922085e-05,
"loss": 3.3832,
"step": 4525
},
{
"epoch": 0.47,
"learning_rate": 1.1628681339353244e-05,
"loss": 3.3484,
"step": 4530
},
{
"epoch": 0.47,
"learning_rate": 1.161227538288273e-05,
"loss": 3.3754,
"step": 4535
},
{
"epoch": 0.47,
"learning_rate": 1.1595864969858888e-05,
"loss": 3.3544,
"step": 4540
},
{
"epoch": 0.47,
"learning_rate": 1.1579450145642382e-05,
"loss": 3.3428,
"step": 4545
},
{
"epoch": 0.47,
"learning_rate": 1.1563030955606067e-05,
"loss": 3.3489,
"step": 4550
},
{
"epoch": 0.47,
"learning_rate": 1.1546607445134865e-05,
"loss": 3.3652,
"step": 4555
},
{
"epoch": 0.47,
"learning_rate": 1.1530179659625647e-05,
"loss": 3.3745,
"step": 4560
},
{
"epoch": 0.47,
"learning_rate": 1.1513747644487091e-05,
"loss": 3.375,
"step": 4565
},
{
"epoch": 0.47,
"learning_rate": 1.149731144513958e-05,
"loss": 3.3633,
"step": 4570
},
{
"epoch": 0.47,
"learning_rate": 1.1480871107015047e-05,
"loss": 3.3774,
"step": 4575
},
{
"epoch": 0.47,
"learning_rate": 1.1464426675556873e-05,
"loss": 3.3696,
"step": 4580
},
{
"epoch": 0.47,
"learning_rate": 1.1447978196219754e-05,
"loss": 3.332,
"step": 4585
},
{
"epoch": 0.47,
"learning_rate": 1.1431525714469576e-05,
"loss": 3.3364,
"step": 4590
},
{
"epoch": 0.47,
"learning_rate": 1.141506927578328e-05,
"loss": 3.3759,
"step": 4595
},
{
"epoch": 0.47,
"learning_rate": 1.139860892564876e-05,
"loss": 3.347,
"step": 4600
},
{
"epoch": 0.47,
"learning_rate": 1.1382144709564703e-05,
"loss": 3.3402,
"step": 4605
},
{
"epoch": 0.47,
"learning_rate": 1.1365676673040502e-05,
"loss": 3.327,
"step": 4610
},
{
"epoch": 0.47,
"learning_rate": 1.134920486159609e-05,
"loss": 3.3341,
"step": 4615
},
{
"epoch": 0.47,
"learning_rate": 1.1332729320761846e-05,
"loss": 3.3715,
"step": 4620
},
{
"epoch": 0.47,
"learning_rate": 1.1316250096078458e-05,
"loss": 3.3556,
"step": 4625
},
{
"epoch": 0.48,
"learning_rate": 1.1299767233096794e-05,
"loss": 3.3632,
"step": 4630
},
{
"epoch": 0.48,
"learning_rate": 1.128328077737778e-05,
"loss": 3.3481,
"step": 4635
},
{
"epoch": 0.48,
"learning_rate": 1.126679077449227e-05,
"loss": 3.3603,
"step": 4640
},
{
"epoch": 0.48,
"learning_rate": 1.1250297270020922e-05,
"loss": 3.3334,
"step": 4645
},
{
"epoch": 0.48,
"learning_rate": 1.1233800309554083e-05,
"loss": 3.3521,
"step": 4650
},
{
"epoch": 0.48,
"learning_rate": 1.1217299938691639e-05,
"loss": 3.356,
"step": 4655
},
{
"epoch": 0.48,
"learning_rate": 1.1200796203042912e-05,
"loss": 3.3798,
"step": 4660
},
{
"epoch": 0.48,
"learning_rate": 1.1184289148226521e-05,
"loss": 3.354,
"step": 4665
},
{
"epoch": 0.48,
"learning_rate": 1.116777881987026e-05,
"loss": 3.3578,
"step": 4670
},
{
"epoch": 0.48,
"learning_rate": 1.1151265263610975e-05,
"loss": 3.3504,
"step": 4675
},
{
"epoch": 0.48,
"learning_rate": 1.113474852509443e-05,
"loss": 3.3549,
"step": 4680
},
{
"epoch": 0.48,
"learning_rate": 1.1118228649975185e-05,
"loss": 3.3461,
"step": 4685
},
{
"epoch": 0.48,
"learning_rate": 1.1101705683916473e-05,
"loss": 3.3459,
"step": 4690
},
{
"epoch": 0.48,
"learning_rate": 1.1085179672590072e-05,
"loss": 3.3561,
"step": 4695
},
{
"epoch": 0.48,
"learning_rate": 1.1068650661676173e-05,
"loss": 3.3829,
"step": 4700
},
{
"epoch": 0.48,
"learning_rate": 1.1052118696863258e-05,
"loss": 3.3625,
"step": 4705
},
{
"epoch": 0.48,
"learning_rate": 1.103558382384798e-05,
"loss": 3.3398,
"step": 4710
},
{
"epoch": 0.48,
"learning_rate": 1.1019046088335023e-05,
"loss": 3.3633,
"step": 4715
},
{
"epoch": 0.48,
"learning_rate": 1.1002505536036997e-05,
"loss": 3.3527,
"step": 4720
},
{
"epoch": 0.49,
"learning_rate": 1.0985962212674275e-05,
"loss": 3.3526,
"step": 4725
},
{
"epoch": 0.49,
"learning_rate": 1.096941616397491e-05,
"loss": 3.3715,
"step": 4730
},
{
"epoch": 0.49,
"learning_rate": 1.095286743567448e-05,
"loss": 3.3388,
"step": 4735
},
{
"epoch": 0.49,
"learning_rate": 1.0936316073515973e-05,
"loss": 3.353,
"step": 4740
},
{
"epoch": 0.49,
"learning_rate": 1.0919762123249656e-05,
"loss": 3.3712,
"step": 4745
},
{
"epoch": 0.49,
"learning_rate": 1.0903205630632942e-05,
"loss": 3.3622,
"step": 4750
},
{
"epoch": 0.49,
"learning_rate": 1.0886646641430288e-05,
"loss": 3.3788,
"step": 4755
},
{
"epoch": 0.49,
"learning_rate": 1.0870085201413034e-05,
"loss": 3.3842,
"step": 4760
},
{
"epoch": 0.49,
"learning_rate": 1.0853521356359312e-05,
"loss": 3.3665,
"step": 4765
},
{
"epoch": 0.49,
"learning_rate": 1.0836955152053883e-05,
"loss": 3.3418,
"step": 4770
},
{
"epoch": 0.49,
"learning_rate": 1.0820386634288045e-05,
"loss": 3.3411,
"step": 4775
},
{
"epoch": 0.49,
"learning_rate": 1.0803815848859485e-05,
"loss": 3.3331,
"step": 4780
},
{
"epoch": 0.49,
"learning_rate": 1.0787242841572154e-05,
"loss": 3.374,
"step": 4785
},
{
"epoch": 0.49,
"learning_rate": 1.0770667658236156e-05,
"loss": 3.3842,
"step": 4790
},
{
"epoch": 0.49,
"learning_rate": 1.0754090344667591e-05,
"loss": 3.3619,
"step": 4795
},
{
"epoch": 0.49,
"learning_rate": 1.0737510946688468e-05,
"loss": 3.3549,
"step": 4800
},
{
"epoch": 0.49,
"learning_rate": 1.0720929510126543e-05,
"loss": 3.3514,
"step": 4805
},
{
"epoch": 0.49,
"learning_rate": 1.0704346080815218e-05,
"loss": 3.3746,
"step": 4810
},
{
"epoch": 0.49,
"learning_rate": 1.068776070459339e-05,
"loss": 3.3746,
"step": 4815
},
{
"epoch": 0.49,
"learning_rate": 1.067117342730535e-05,
"loss": 3.3469,
"step": 4820
},
{
"epoch": 0.5,
"learning_rate": 1.0654584294800636e-05,
"loss": 3.3524,
"step": 4825
},
{
"epoch": 0.5,
"learning_rate": 1.0637993352933917e-05,
"loss": 3.3609,
"step": 4830
},
{
"epoch": 0.5,
"learning_rate": 1.062140064756487e-05,
"loss": 3.3318,
"step": 4835
},
{
"epoch": 0.5,
"learning_rate": 1.0604806224558028e-05,
"loss": 3.3615,
"step": 4840
},
{
"epoch": 0.5,
"learning_rate": 1.058821012978269e-05,
"loss": 3.3568,
"step": 4845
},
{
"epoch": 0.5,
"learning_rate": 1.057161240911277e-05,
"loss": 3.3374,
"step": 4850
},
{
"epoch": 0.5,
"learning_rate": 1.0555013108426675e-05,
"loss": 3.3645,
"step": 4855
},
{
"epoch": 0.5,
"learning_rate": 1.053841227360718e-05,
"loss": 3.363,
"step": 4860
},
{
"epoch": 0.5,
"learning_rate": 1.0521809950541298e-05,
"loss": 3.3724,
"step": 4865
},
{
"epoch": 0.5,
"learning_rate": 1.050520618512016e-05,
"loss": 3.3864,
"step": 4870
},
{
"epoch": 0.5,
"learning_rate": 1.0488601023238885e-05,
"loss": 3.3368,
"step": 4875
},
{
"epoch": 0.5,
"learning_rate": 1.0471994510796444e-05,
"loss": 3.3616,
"step": 4880
},
{
"epoch": 0.5,
"learning_rate": 1.045538669369555e-05,
"loss": 3.3979,
"step": 4885
},
{
"epoch": 0.5,
"learning_rate": 1.043877761784252e-05,
"loss": 3.3696,
"step": 4890
},
{
"epoch": 0.5,
"learning_rate": 1.0422167329147145e-05,
"loss": 3.3609,
"step": 4895
},
{
"epoch": 0.5,
"learning_rate": 1.0405555873522576e-05,
"loss": 3.3782,
"step": 4900
},
{
"epoch": 0.5,
"learning_rate": 1.0388943296885181e-05,
"loss": 3.3268,
"step": 4905
},
{
"epoch": 0.5,
"learning_rate": 1.037232964515444e-05,
"loss": 3.3384,
"step": 4910
},
{
"epoch": 0.5,
"learning_rate": 1.0355714964252786e-05,
"loss": 3.3798,
"step": 4915
},
{
"epoch": 0.51,
"learning_rate": 1.0339099300105513e-05,
"loss": 3.3563,
"step": 4920
},
{
"epoch": 0.51,
"learning_rate": 1.0322482698640631e-05,
"loss": 3.3752,
"step": 4925
},
{
"epoch": 0.51,
"learning_rate": 1.0305865205788728e-05,
"loss": 3.3422,
"step": 4930
},
{
"epoch": 0.51,
"learning_rate": 1.0289246867482868e-05,
"loss": 3.3152,
"step": 4935
},
{
"epoch": 0.51,
"learning_rate": 1.027262772965845e-05,
"loss": 3.35,
"step": 4940
},
{
"epoch": 0.51,
"learning_rate": 1.0256007838253084e-05,
"loss": 3.3226,
"step": 4945
},
{
"epoch": 0.51,
"learning_rate": 1.0239387239206455e-05,
"loss": 3.3701,
"step": 4950
},
{
"epoch": 0.51,
"learning_rate": 1.0222765978460211e-05,
"loss": 3.3537,
"step": 4955
},
{
"epoch": 0.51,
"learning_rate": 1.0206144101957831e-05,
"loss": 3.3488,
"step": 4960
},
{
"epoch": 0.51,
"learning_rate": 1.0189521655644495e-05,
"loss": 3.3563,
"step": 4965
},
{
"epoch": 0.51,
"learning_rate": 1.0172898685466947e-05,
"loss": 3.3255,
"step": 4970
},
{
"epoch": 0.51,
"learning_rate": 1.0156275237373394e-05,
"loss": 3.3641,
"step": 4975
},
{
"epoch": 0.51,
"learning_rate": 1.0139651357313354e-05,
"loss": 3.3227,
"step": 4980
},
{
"epoch": 0.51,
"learning_rate": 1.0123027091237549e-05,
"loss": 3.3251,
"step": 4985
},
{
"epoch": 0.51,
"learning_rate": 1.0106402485097757e-05,
"loss": 3.3351,
"step": 4990
},
{
"epoch": 0.51,
"learning_rate": 1.00897775848467e-05,
"loss": 3.3688,
"step": 4995
},
{
"epoch": 0.51,
"learning_rate": 1.0073152436437918e-05,
"loss": 3.3765,
"step": 5000
},
{
"epoch": 0.51,
"learning_rate": 1.0056527085825629e-05,
"loss": 3.3487,
"step": 5005
},
{
"epoch": 0.51,
"learning_rate": 1.0039901578964619e-05,
"loss": 3.3531,
"step": 5010
},
{
"epoch": 0.51,
"learning_rate": 1.0023275961810095e-05,
"loss": 3.3649,
"step": 5015
},
{
"epoch": 0.52,
"learning_rate": 1.0006650280317573e-05,
"loss": 3.3219,
"step": 5020
},
{
"epoch": 0.52,
"learning_rate": 9.990024580442754e-06,
"loss": 3.3799,
"step": 5025
},
{
"epoch": 0.52,
"learning_rate": 9.973398908141383e-06,
"loss": 3.3305,
"step": 5030
},
{
"epoch": 0.52,
"learning_rate": 9.956773309369128e-06,
"loss": 3.3466,
"step": 5035
},
{
"epoch": 0.52,
"learning_rate": 9.940147830081455e-06,
"loss": 3.3418,
"step": 5040
},
{
"epoch": 0.52,
"learning_rate": 9.9235225162335e-06,
"loss": 3.3444,
"step": 5045
},
{
"epoch": 0.52,
"learning_rate": 9.906897413779949e-06,
"loss": 3.3251,
"step": 5050
},
{
"epoch": 0.52,
"learning_rate": 9.890272568674886e-06,
"loss": 3.3345,
"step": 5055
},
{
"epoch": 0.52,
"learning_rate": 9.873648026871701e-06,
"loss": 3.3607,
"step": 5060
},
{
"epoch": 0.52,
"learning_rate": 9.857023834322937e-06,
"loss": 3.3777,
"step": 5065
},
{
"epoch": 0.52,
"learning_rate": 9.840400036980176e-06,
"loss": 3.348,
"step": 5070
},
{
"epoch": 0.52,
"learning_rate": 9.823776680793904e-06,
"loss": 3.3405,
"step": 5075
},
{
"epoch": 0.52,
"learning_rate": 9.807153811713386e-06,
"loss": 3.3557,
"step": 5080
},
{
"epoch": 0.52,
"learning_rate": 9.790531475686546e-06,
"loss": 3.3467,
"step": 5085
},
{
"epoch": 0.52,
"learning_rate": 9.773909718659831e-06,
"loss": 3.3388,
"step": 5090
},
{
"epoch": 0.52,
"learning_rate": 9.757288586578093e-06,
"loss": 3.3561,
"step": 5095
},
{
"epoch": 0.52,
"learning_rate": 9.74066812538445e-06,
"loss": 3.3206,
"step": 5100
},
{
"epoch": 0.52,
"learning_rate": 9.724048381020162e-06,
"loss": 3.3642,
"step": 5105
},
{
"epoch": 0.52,
"learning_rate": 9.707429399424514e-06,
"loss": 3.3225,
"step": 5110
},
{
"epoch": 0.53,
"learning_rate": 9.690811226534688e-06,
"loss": 3.3208,
"step": 5115
},
{
"epoch": 0.53,
"learning_rate": 9.67419390828562e-06,
"loss": 3.347,
"step": 5120
},
{
"epoch": 0.53,
"learning_rate": 9.657577490609893e-06,
"loss": 3.3575,
"step": 5125
},
{
"epoch": 0.53,
"learning_rate": 9.64096201943759e-06,
"loss": 3.3727,
"step": 5130
},
{
"epoch": 0.53,
"learning_rate": 9.624347540696184e-06,
"loss": 3.3379,
"step": 5135
},
{
"epoch": 0.53,
"learning_rate": 9.607734100310408e-06,
"loss": 3.3449,
"step": 5140
},
{
"epoch": 0.53,
"learning_rate": 9.59112174420212e-06,
"loss": 3.3356,
"step": 5145
},
{
"epoch": 0.53,
"learning_rate": 9.57451051829018e-06,
"loss": 3.3262,
"step": 5150
},
{
"epoch": 0.53,
"learning_rate": 9.557900468490327e-06,
"loss": 3.3648,
"step": 5155
},
{
"epoch": 0.53,
"learning_rate": 9.541291640715047e-06,
"loss": 3.359,
"step": 5160
},
{
"epoch": 0.53,
"learning_rate": 9.524684080873456e-06,
"loss": 3.3426,
"step": 5165
},
{
"epoch": 0.53,
"learning_rate": 9.50807783487115e-06,
"loss": 3.3423,
"step": 5170
},
{
"epoch": 0.53,
"learning_rate": 9.491472948610105e-06,
"loss": 3.3503,
"step": 5175
},
{
"epoch": 0.53,
"learning_rate": 9.474869467988534e-06,
"loss": 3.316,
"step": 5180
},
{
"epoch": 0.53,
"learning_rate": 9.45826743890077e-06,
"loss": 3.3335,
"step": 5185
},
{
"epoch": 0.53,
"learning_rate": 9.441666907237127e-06,
"loss": 3.3285,
"step": 5190
},
{
"epoch": 0.53,
"learning_rate": 9.425067918883781e-06,
"loss": 3.3326,
"step": 5195
},
{
"epoch": 0.53,
"learning_rate": 9.408470519722646e-06,
"loss": 3.3575,
"step": 5200
},
{
"epoch": 0.53,
"learning_rate": 9.391874755631241e-06,
"loss": 3.3475,
"step": 5205
},
{
"epoch": 0.53,
"learning_rate": 9.375280672482567e-06,
"loss": 3.3359,
"step": 5210
},
{
"epoch": 0.54,
"learning_rate": 9.358688316144972e-06,
"loss": 3.3548,
"step": 5215
},
{
"epoch": 0.54,
"learning_rate": 9.342097732482041e-06,
"loss": 3.3333,
"step": 5220
},
{
"epoch": 0.54,
"learning_rate": 9.32550896735245e-06,
"loss": 3.3762,
"step": 5225
},
{
"epoch": 0.54,
"learning_rate": 9.308922066609858e-06,
"loss": 3.3474,
"step": 5230
},
{
"epoch": 0.54,
"learning_rate": 9.292337076102758e-06,
"loss": 3.3459,
"step": 5235
},
{
"epoch": 0.54,
"learning_rate": 9.275754041674373e-06,
"loss": 3.3069,
"step": 5240
},
{
"epoch": 0.54,
"learning_rate": 9.259173009162515e-06,
"loss": 3.3358,
"step": 5245
},
{
"epoch": 0.54,
"learning_rate": 9.242594024399467e-06,
"loss": 3.3431,
"step": 5250
},
{
"epoch": 0.54,
"learning_rate": 9.226017133211843e-06,
"loss": 3.3378,
"step": 5255
},
{
"epoch": 0.54,
"learning_rate": 9.209442381420476e-06,
"loss": 3.3214,
"step": 5260
},
{
"epoch": 0.54,
"learning_rate": 9.192869814840288e-06,
"loss": 3.3363,
"step": 5265
},
{
"epoch": 0.54,
"learning_rate": 9.176299479280155e-06,
"loss": 3.3669,
"step": 5270
},
{
"epoch": 0.54,
"learning_rate": 9.159731420542786e-06,
"loss": 3.3256,
"step": 5275
},
{
"epoch": 0.54,
"learning_rate": 9.143165684424604e-06,
"loss": 3.3208,
"step": 5280
},
{
"epoch": 0.54,
"learning_rate": 9.126602316715601e-06,
"loss": 3.3535,
"step": 5285
},
{
"epoch": 0.54,
"learning_rate": 9.110041363199233e-06,
"loss": 3.328,
"step": 5290
},
{
"epoch": 0.54,
"learning_rate": 9.093482869652279e-06,
"loss": 3.3766,
"step": 5295
},
{
"epoch": 0.54,
"learning_rate": 9.076926881844713e-06,
"loss": 3.3399,
"step": 5300
},
{
"epoch": 0.54,
"learning_rate": 9.06037344553959e-06,
"loss": 3.3526,
"step": 5305
},
{
"epoch": 0.55,
"learning_rate": 9.043822606492907e-06,
"loss": 3.3426,
"step": 5310
},
{
"epoch": 0.55,
"learning_rate": 9.027274410453489e-06,
"loss": 3.3251,
"step": 5315
},
{
"epoch": 0.55,
"learning_rate": 9.010728903162846e-06,
"loss": 3.3554,
"step": 5320
},
{
"epoch": 0.55,
"learning_rate": 8.994186130355063e-06,
"loss": 3.3117,
"step": 5325
},
{
"epoch": 0.55,
"learning_rate": 8.977646137756662e-06,
"loss": 3.3107,
"step": 5330
},
{
"epoch": 0.55,
"learning_rate": 8.961108971086489e-06,
"loss": 3.3492,
"step": 5335
},
{
"epoch": 0.55,
"learning_rate": 8.944574676055564e-06,
"loss": 3.3525,
"step": 5340
},
{
"epoch": 0.55,
"learning_rate": 8.928043298366979e-06,
"loss": 3.3634,
"step": 5345
},
{
"epoch": 0.55,
"learning_rate": 8.911514883715763e-06,
"loss": 3.3353,
"step": 5350
},
{
"epoch": 0.55,
"learning_rate": 8.894989477788753e-06,
"loss": 3.3184,
"step": 5355
},
{
"epoch": 0.55,
"learning_rate": 8.878467126264467e-06,
"loss": 3.343,
"step": 5360
},
{
"epoch": 0.55,
"learning_rate": 8.86194787481298e-06,
"loss": 3.3254,
"step": 5365
},
{
"epoch": 0.55,
"learning_rate": 8.8454317690958e-06,
"loss": 3.3286,
"step": 5370
},
{
"epoch": 0.55,
"learning_rate": 8.828918854765742e-06,
"loss": 3.3469,
"step": 5375
},
{
"epoch": 0.55,
"learning_rate": 8.812409177466796e-06,
"loss": 3.3302,
"step": 5380
},
{
"epoch": 0.55,
"learning_rate": 8.795902782834006e-06,
"loss": 3.3324,
"step": 5385
},
{
"epoch": 0.55,
"learning_rate": 8.779399716493342e-06,
"loss": 3.3155,
"step": 5390
},
{
"epoch": 0.55,
"learning_rate": 8.762900024061572e-06,
"loss": 3.3336,
"step": 5395
},
{
"epoch": 0.55,
"learning_rate": 8.746403751146142e-06,
"loss": 3.3134,
"step": 5400
},
{
"epoch": 0.55,
"learning_rate": 8.729910943345044e-06,
"loss": 3.3502,
"step": 5405
},
{
"epoch": 0.56,
"learning_rate": 8.713421646246692e-06,
"loss": 3.3115,
"step": 5410
},
{
"epoch": 0.56,
"learning_rate": 8.696935905429793e-06,
"loss": 3.3137,
"step": 5415
},
{
"epoch": 0.56,
"learning_rate": 8.68045376646323e-06,
"loss": 3.3459,
"step": 5420
},
{
"epoch": 0.56,
"learning_rate": 8.663975274905926e-06,
"loss": 3.3688,
"step": 5425
},
{
"epoch": 0.56,
"learning_rate": 8.647500476306724e-06,
"loss": 3.3532,
"step": 5430
},
{
"epoch": 0.56,
"learning_rate": 8.631029416204255e-06,
"loss": 3.3263,
"step": 5435
},
{
"epoch": 0.56,
"learning_rate": 8.61456214012682e-06,
"loss": 3.3302,
"step": 5440
},
{
"epoch": 0.56,
"learning_rate": 8.598098693592263e-06,
"loss": 3.3703,
"step": 5445
},
{
"epoch": 0.56,
"learning_rate": 8.581639122107837e-06,
"loss": 3.3089,
"step": 5450
},
{
"epoch": 0.56,
"learning_rate": 8.565183471170084e-06,
"loss": 3.3272,
"step": 5455
},
{
"epoch": 0.56,
"learning_rate": 8.548731786264713e-06,
"loss": 3.3306,
"step": 5460
},
{
"epoch": 0.56,
"learning_rate": 8.532284112866469e-06,
"loss": 3.3481,
"step": 5465
},
{
"epoch": 0.56,
"learning_rate": 8.515840496439009e-06,
"loss": 3.3229,
"step": 5470
},
{
"epoch": 0.56,
"learning_rate": 8.499400982434773e-06,
"loss": 3.3144,
"step": 5475
},
{
"epoch": 0.56,
"learning_rate": 8.482965616294863e-06,
"loss": 3.3318,
"step": 5480
},
{
"epoch": 0.56,
"learning_rate": 8.466534443448918e-06,
"loss": 3.29,
"step": 5485
},
{
"epoch": 0.56,
"learning_rate": 8.450107509314983e-06,
"loss": 3.3388,
"step": 5490
},
{
"epoch": 0.56,
"learning_rate": 8.433684859299394e-06,
"loss": 3.3479,
"step": 5495
},
{
"epoch": 0.56,
"learning_rate": 8.41726653879663e-06,
"loss": 3.2932,
"step": 5500
},
{
"epoch": 0.57,
"learning_rate": 8.400852593189214e-06,
"loss": 3.3142,
"step": 5505
},
{
"epoch": 0.57,
"learning_rate": 8.384443067847578e-06,
"loss": 3.3353,
"step": 5510
},
{
"epoch": 0.57,
"learning_rate": 8.36803800812993e-06,
"loss": 3.3384,
"step": 5515
},
{
"epoch": 0.57,
"learning_rate": 8.351637459382133e-06,
"loss": 3.3627,
"step": 5520
},
{
"epoch": 0.57,
"learning_rate": 8.335241466937585e-06,
"loss": 3.3282,
"step": 5525
},
{
"epoch": 0.57,
"learning_rate": 8.31885007611709e-06,
"loss": 3.3118,
"step": 5530
},
{
"epoch": 0.57,
"learning_rate": 8.302463332228734e-06,
"loss": 3.3339,
"step": 5535
},
{
"epoch": 0.57,
"learning_rate": 8.286081280567751e-06,
"loss": 3.3178,
"step": 5540
},
{
"epoch": 0.57,
"learning_rate": 8.269703966416412e-06,
"loss": 3.3302,
"step": 5545
},
{
"epoch": 0.57,
"learning_rate": 8.253331435043888e-06,
"loss": 3.3292,
"step": 5550
},
{
"epoch": 0.57,
"learning_rate": 8.236963731706137e-06,
"loss": 3.3259,
"step": 5555
},
{
"epoch": 0.57,
"learning_rate": 8.22060090164576e-06,
"loss": 3.2972,
"step": 5560
},
{
"epoch": 0.57,
"learning_rate": 8.204242990091898e-06,
"loss": 3.3338,
"step": 5565
},
{
"epoch": 0.57,
"learning_rate": 8.187890042260094e-06,
"loss": 3.3246,
"step": 5570
},
{
"epoch": 0.57,
"learning_rate": 8.171542103352166e-06,
"loss": 3.3236,
"step": 5575
},
{
"epoch": 0.57,
"learning_rate": 8.155199218556098e-06,
"loss": 3.3122,
"step": 5580
},
{
"epoch": 0.57,
"learning_rate": 8.138861433045887e-06,
"loss": 3.3686,
"step": 5585
},
{
"epoch": 0.57,
"learning_rate": 8.122528791981447e-06,
"loss": 3.3435,
"step": 5590
},
{
"epoch": 0.57,
"learning_rate": 8.106201340508468e-06,
"loss": 3.3379,
"step": 5595
},
{
"epoch": 0.57,
"learning_rate": 8.089879123758301e-06,
"loss": 3.3528,
"step": 5600
},
{
"epoch": 0.58,
"learning_rate": 8.073562186847816e-06,
"loss": 3.3556,
"step": 5605
},
{
"epoch": 0.58,
"learning_rate": 8.057250574879296e-06,
"loss": 3.3303,
"step": 5610
},
{
"epoch": 0.58,
"learning_rate": 8.040944332940313e-06,
"loss": 3.3402,
"step": 5615
},
{
"epoch": 0.58,
"learning_rate": 8.024643506103574e-06,
"loss": 3.3234,
"step": 5620
},
{
"epoch": 0.58,
"learning_rate": 8.008348139426838e-06,
"loss": 3.3514,
"step": 5625
},
{
"epoch": 0.58,
"learning_rate": 7.992058277952765e-06,
"loss": 3.3101,
"step": 5630
},
{
"epoch": 0.58,
"learning_rate": 7.975773966708794e-06,
"loss": 3.3163,
"step": 5635
},
{
"epoch": 0.58,
"learning_rate": 7.959495250707026e-06,
"loss": 3.3123,
"step": 5640
},
{
"epoch": 0.58,
"learning_rate": 7.943222174944097e-06,
"loss": 3.3163,
"step": 5645
},
{
"epoch": 0.58,
"learning_rate": 7.92695478440105e-06,
"loss": 3.3455,
"step": 5650
},
{
"epoch": 0.58,
"learning_rate": 7.910693124043214e-06,
"loss": 3.3545,
"step": 5655
},
{
"epoch": 0.58,
"learning_rate": 7.89443723882008e-06,
"loss": 3.3437,
"step": 5660
},
{
"epoch": 0.58,
"learning_rate": 7.878187173665174e-06,
"loss": 3.3227,
"step": 5665
},
{
"epoch": 0.58,
"learning_rate": 7.861942973495939e-06,
"loss": 3.3427,
"step": 5670
},
{
"epoch": 0.58,
"learning_rate": 7.845704683213598e-06,
"loss": 3.3086,
"step": 5675
},
{
"epoch": 0.58,
"learning_rate": 7.829472347703046e-06,
"loss": 3.3765,
"step": 5680
},
{
"epoch": 0.58,
"learning_rate": 7.813246011832712e-06,
"loss": 3.3161,
"step": 5685
},
{
"epoch": 0.58,
"learning_rate": 7.79702572045445e-06,
"loss": 3.3682,
"step": 5690
},
{
"epoch": 0.58,
"learning_rate": 7.780811518403397e-06,
"loss": 3.3335,
"step": 5695
},
{
"epoch": 0.59,
"learning_rate": 7.764603450497861e-06,
"loss": 3.3127,
"step": 5700
},
{
"epoch": 0.59,
"learning_rate": 7.748401561539196e-06,
"loss": 3.2919,
"step": 5705
},
{
"epoch": 0.59,
"learning_rate": 7.732205896311678e-06,
"loss": 3.3443,
"step": 5710
},
{
"epoch": 0.59,
"learning_rate": 7.716016499582376e-06,
"loss": 3.303,
"step": 5715
},
{
"epoch": 0.59,
"learning_rate": 7.699833416101033e-06,
"loss": 3.3185,
"step": 5720
},
{
"epoch": 0.59,
"learning_rate": 7.683656690599942e-06,
"loss": 3.3215,
"step": 5725
},
{
"epoch": 0.59,
"learning_rate": 7.667486367793822e-06,
"loss": 3.3261,
"step": 5730
},
{
"epoch": 0.59,
"learning_rate": 7.651322492379694e-06,
"loss": 3.3445,
"step": 5735
},
{
"epoch": 0.59,
"learning_rate": 7.635165109036756e-06,
"loss": 3.3505,
"step": 5740
},
{
"epoch": 0.59,
"learning_rate": 7.619014262426262e-06,
"loss": 3.306,
"step": 5745
},
{
"epoch": 0.59,
"learning_rate": 7.602869997191398e-06,
"loss": 3.3131,
"step": 5750
},
{
"epoch": 0.59,
"learning_rate": 7.586732357957158e-06,
"loss": 3.3371,
"step": 5755
},
{
"epoch": 0.59,
"learning_rate": 7.570601389330222e-06,
"loss": 3.3456,
"step": 5760
},
{
"epoch": 0.59,
"learning_rate": 7.554477135898828e-06,
"loss": 3.2972,
"step": 5765
},
{
"epoch": 0.59,
"learning_rate": 7.538359642232654e-06,
"loss": 3.3107,
"step": 5770
},
{
"epoch": 0.59,
"learning_rate": 7.522248952882695e-06,
"loss": 3.3091,
"step": 5775
},
{
"epoch": 0.59,
"learning_rate": 7.506145112381138e-06,
"loss": 3.3101,
"step": 5780
},
{
"epoch": 0.59,
"learning_rate": 7.490048165241233e-06,
"loss": 3.3269,
"step": 5785
},
{
"epoch": 0.59,
"learning_rate": 7.473958155957182e-06,
"loss": 3.3064,
"step": 5790
},
{
"epoch": 0.59,
"learning_rate": 7.457875129004008e-06,
"loss": 3.2897,
"step": 5795
},
{
"epoch": 0.6,
"learning_rate": 7.441799128837437e-06,
"loss": 3.2875,
"step": 5800
},
{
"epoch": 0.6,
"learning_rate": 7.425730199893761e-06,
"loss": 3.3285,
"step": 5805
},
{
"epoch": 0.6,
"learning_rate": 7.40966838658974e-06,
"loss": 3.3292,
"step": 5810
},
{
"epoch": 0.6,
"learning_rate": 7.3936137333224565e-06,
"loss": 3.3438,
"step": 5815
},
{
"epoch": 0.6,
"learning_rate": 7.3775662844692075e-06,
"loss": 3.3151,
"step": 5820
},
{
"epoch": 0.6,
"learning_rate": 7.361526084387369e-06,
"loss": 3.2861,
"step": 5825
},
{
"epoch": 0.6,
"learning_rate": 7.345493177414284e-06,
"loss": 3.3326,
"step": 5830
},
{
"epoch": 0.6,
"learning_rate": 7.3294676078671405e-06,
"loss": 3.342,
"step": 5835
},
{
"epoch": 0.6,
"learning_rate": 7.313449420042837e-06,
"loss": 3.3066,
"step": 5840
},
{
"epoch": 0.6,
"learning_rate": 7.297438658217878e-06,
"loss": 3.3273,
"step": 5845
},
{
"epoch": 0.6,
"learning_rate": 7.2814353666482276e-06,
"loss": 3.3008,
"step": 5850
},
{
"epoch": 0.6,
"learning_rate": 7.265439589569212e-06,
"loss": 3.3439,
"step": 5855
},
{
"epoch": 0.6,
"learning_rate": 7.249451371195384e-06,
"loss": 3.3129,
"step": 5860
},
{
"epoch": 0.6,
"learning_rate": 7.233470755720402e-06,
"loss": 3.35,
"step": 5865
},
{
"epoch": 0.6,
"learning_rate": 7.217497787316909e-06,
"loss": 3.3494,
"step": 5870
},
{
"epoch": 0.6,
"learning_rate": 7.201532510136411e-06,
"loss": 3.3338,
"step": 5875
},
{
"epoch": 0.6,
"learning_rate": 7.18557496830915e-06,
"loss": 3.3099,
"step": 5880
},
{
"epoch": 0.6,
"learning_rate": 7.169625205943995e-06,
"loss": 3.3496,
"step": 5885
},
{
"epoch": 0.6,
"learning_rate": 7.153683267128304e-06,
"loss": 3.3073,
"step": 5890
},
{
"epoch": 0.61,
"learning_rate": 7.137749195927815e-06,
"loss": 3.3288,
"step": 5895
},
{
"epoch": 0.61,
"learning_rate": 7.121823036386514e-06,
"loss": 3.3173,
"step": 5900
},
{
"epoch": 0.61,
"learning_rate": 7.10590483252652e-06,
"loss": 3.3351,
"step": 5905
},
{
"epoch": 0.61,
"learning_rate": 7.089994628347965e-06,
"loss": 3.3412,
"step": 5910
},
{
"epoch": 0.61,
"learning_rate": 7.074092467828864e-06,
"loss": 3.3308,
"step": 5915
},
{
"epoch": 0.61,
"learning_rate": 7.058198394924997e-06,
"loss": 3.2986,
"step": 5920
},
{
"epoch": 0.61,
"learning_rate": 7.042312453569793e-06,
"loss": 3.3275,
"step": 5925
},
{
"epoch": 0.61,
"learning_rate": 7.026434687674204e-06,
"loss": 3.3116,
"step": 5930
},
{
"epoch": 0.61,
"learning_rate": 7.010565141126584e-06,
"loss": 3.3234,
"step": 5935
},
{
"epoch": 0.61,
"learning_rate": 6.994703857792562e-06,
"loss": 3.3192,
"step": 5940
},
{
"epoch": 0.61,
"learning_rate": 6.978850881514934e-06,
"loss": 3.3206,
"step": 5945
},
{
"epoch": 0.61,
"learning_rate": 6.963006256113527e-06,
"loss": 3.3144,
"step": 5950
},
{
"epoch": 0.61,
"learning_rate": 6.94717002538509e-06,
"loss": 3.3564,
"step": 5955
},
{
"epoch": 0.61,
"learning_rate": 6.931342233103171e-06,
"loss": 3.2916,
"step": 5960
},
{
"epoch": 0.61,
"learning_rate": 6.915522923017983e-06,
"loss": 3.3106,
"step": 5965
},
{
"epoch": 0.61,
"learning_rate": 6.8997121388563e-06,
"loss": 3.3251,
"step": 5970
},
{
"epoch": 0.61,
"learning_rate": 6.883909924321328e-06,
"loss": 3.3333,
"step": 5975
},
{
"epoch": 0.61,
"learning_rate": 6.868116323092589e-06,
"loss": 3.3074,
"step": 5980
},
{
"epoch": 0.61,
"learning_rate": 6.852331378825785e-06,
"loss": 3.327,
"step": 5985
},
{
"epoch": 0.61,
"learning_rate": 6.8365551351527e-06,
"loss": 3.3249,
"step": 5990
},
{
"epoch": 0.62,
"learning_rate": 6.820787635681068e-06,
"loss": 3.3421,
"step": 5995
},
{
"epoch": 0.62,
"learning_rate": 6.80502892399445e-06,
"loss": 3.3011,
"step": 6000
},
{
"epoch": 0.62,
"learning_rate": 6.78927904365211e-06,
"loss": 3.3154,
"step": 6005
},
{
"epoch": 0.62,
"learning_rate": 6.773538038188912e-06,
"loss": 3.3476,
"step": 6010
},
{
"epoch": 0.62,
"learning_rate": 6.757805951115182e-06,
"loss": 3.3043,
"step": 6015
},
{
"epoch": 0.62,
"learning_rate": 6.742082825916599e-06,
"loss": 3.3314,
"step": 6020
},
{
"epoch": 0.62,
"learning_rate": 6.726368706054063e-06,
"loss": 3.2959,
"step": 6025
},
{
"epoch": 0.62,
"learning_rate": 6.710663634963588e-06,
"loss": 3.3122,
"step": 6030
},
{
"epoch": 0.62,
"learning_rate": 6.694967656056175e-06,
"loss": 3.3096,
"step": 6035
},
{
"epoch": 0.62,
"learning_rate": 6.67928081271769e-06,
"loss": 3.3262,
"step": 6040
},
{
"epoch": 0.62,
"learning_rate": 6.663603148308754e-06,
"loss": 3.3224,
"step": 6045
},
{
"epoch": 0.62,
"learning_rate": 6.6479347061646046e-06,
"loss": 3.3478,
"step": 6050
},
{
"epoch": 0.62,
"learning_rate": 6.632275529594997e-06,
"loss": 3.3353,
"step": 6055
},
{
"epoch": 0.62,
"learning_rate": 6.616625661884073e-06,
"loss": 3.3296,
"step": 6060
},
{
"epoch": 0.62,
"learning_rate": 6.600985146290246e-06,
"loss": 3.3236,
"step": 6065
},
{
"epoch": 0.62,
"learning_rate": 6.585354026046069e-06,
"loss": 3.2962,
"step": 6070
},
{
"epoch": 0.62,
"learning_rate": 6.569732344358137e-06,
"loss": 3.2868,
"step": 6075
},
{
"epoch": 0.62,
"learning_rate": 6.554120144406948e-06,
"loss": 3.2703,
"step": 6080
},
{
"epoch": 0.62,
"learning_rate": 6.5385174693467955e-06,
"loss": 3.3199,
"step": 6085
},
{
"epoch": 0.63,
"learning_rate": 6.522924362305639e-06,
"loss": 3.3387,
"step": 6090
},
{
"epoch": 0.63,
"learning_rate": 6.507340866384997e-06,
"loss": 3.3014,
"step": 6095
},
{
"epoch": 0.63,
"learning_rate": 6.491767024659818e-06,
"loss": 3.2966,
"step": 6100
},
{
"epoch": 0.63,
"learning_rate": 6.476202880178369e-06,
"loss": 3.3131,
"step": 6105
},
{
"epoch": 0.63,
"learning_rate": 6.460648475962104e-06,
"loss": 3.3123,
"step": 6110
},
{
"epoch": 0.63,
"learning_rate": 6.445103855005563e-06,
"loss": 3.3045,
"step": 6115
},
{
"epoch": 0.63,
"learning_rate": 6.429569060276237e-06,
"loss": 3.3193,
"step": 6120
},
{
"epoch": 0.63,
"learning_rate": 6.414044134714461e-06,
"loss": 3.2906,
"step": 6125
},
{
"epoch": 0.63,
"learning_rate": 6.398529121233291e-06,
"loss": 3.2899,
"step": 6130
},
{
"epoch": 0.63,
"learning_rate": 6.3830240627183745e-06,
"loss": 3.3199,
"step": 6135
},
{
"epoch": 0.63,
"learning_rate": 6.36752900202785e-06,
"loss": 3.301,
"step": 6140
},
{
"epoch": 0.63,
"learning_rate": 6.352043981992222e-06,
"loss": 3.3046,
"step": 6145
},
{
"epoch": 0.63,
"learning_rate": 6.336569045414238e-06,
"loss": 3.3354,
"step": 6150
},
{
"epoch": 0.63,
"learning_rate": 6.321104235068775e-06,
"loss": 3.3019,
"step": 6155
},
{
"epoch": 0.63,
"learning_rate": 6.305649593702721e-06,
"loss": 3.2985,
"step": 6160
},
{
"epoch": 0.63,
"learning_rate": 6.290205164034849e-06,
"loss": 3.318,
"step": 6165
},
{
"epoch": 0.63,
"learning_rate": 6.274770988755712e-06,
"loss": 3.3463,
"step": 6170
},
{
"epoch": 0.63,
"learning_rate": 6.259347110527516e-06,
"loss": 3.3031,
"step": 6175
},
{
"epoch": 0.63,
"learning_rate": 6.243933571984009e-06,
"loss": 3.2965,
"step": 6180
},
{
"epoch": 0.63,
"learning_rate": 6.228530415730349e-06,
"loss": 3.2875,
"step": 6185
},
{
"epoch": 0.64,
"learning_rate": 6.213137684343002e-06,
"loss": 3.337,
"step": 6190
},
{
"epoch": 0.64,
"learning_rate": 6.197755420369622e-06,
"loss": 3.3082,
"step": 6195
},
{
"epoch": 0.64,
"learning_rate": 6.182383666328925e-06,
"loss": 3.3336,
"step": 6200
},
{
"epoch": 0.64,
"learning_rate": 6.1670224647105714e-06,
"loss": 3.3176,
"step": 6205
},
{
"epoch": 0.64,
"learning_rate": 6.151671857975061e-06,
"loss": 3.3146,
"step": 6210
},
{
"epoch": 0.64,
"learning_rate": 6.136331888553606e-06,
"loss": 3.3012,
"step": 6215
},
{
"epoch": 0.64,
"learning_rate": 6.121002598848017e-06,
"loss": 3.3134,
"step": 6220
},
{
"epoch": 0.64,
"learning_rate": 6.105684031230577e-06,
"loss": 3.3179,
"step": 6225
},
{
"epoch": 0.64,
"learning_rate": 6.090376228043938e-06,
"loss": 3.3075,
"step": 6230
},
{
"epoch": 0.64,
"learning_rate": 6.075079231600999e-06,
"loss": 3.3314,
"step": 6235
},
{
"epoch": 0.64,
"learning_rate": 6.059793084184782e-06,
"loss": 3.3294,
"step": 6240
},
{
"epoch": 0.64,
"learning_rate": 6.0445178280483285e-06,
"loss": 3.2846,
"step": 6245
},
{
"epoch": 0.64,
"learning_rate": 6.029253505414565e-06,
"loss": 3.2943,
"step": 6250
},
{
"epoch": 0.64,
"learning_rate": 6.014000158476204e-06,
"loss": 3.3094,
"step": 6255
},
{
"epoch": 0.64,
"learning_rate": 5.998757829395617e-06,
"loss": 3.2884,
"step": 6260
},
{
"epoch": 0.64,
"learning_rate": 5.983526560304723e-06,
"loss": 3.3439,
"step": 6265
},
{
"epoch": 0.64,
"learning_rate": 5.968306393304863e-06,
"loss": 3.3057,
"step": 6270
},
{
"epoch": 0.64,
"learning_rate": 5.9530973704666984e-06,
"loss": 3.3483,
"step": 6275
},
{
"epoch": 0.64,
"learning_rate": 5.9378995338300815e-06,
"loss": 3.2989,
"step": 6280
},
{
"epoch": 0.65,
"learning_rate": 5.9227129254039486e-06,
"loss": 3.2861,
"step": 6285
},
{
"epoch": 0.65,
"learning_rate": 5.907537587166191e-06,
"loss": 3.3116,
"step": 6290
},
{
"epoch": 0.65,
"learning_rate": 5.892373561063558e-06,
"loss": 3.3052,
"step": 6295
},
{
"epoch": 0.65,
"learning_rate": 5.877220889011526e-06,
"loss": 3.3233,
"step": 6300
},
{
"epoch": 0.65,
"learning_rate": 5.862079612894187e-06,
"loss": 3.3429,
"step": 6305
},
{
"epoch": 0.65,
"learning_rate": 5.846949774564133e-06,
"loss": 3.2817,
"step": 6310
},
{
"epoch": 0.65,
"learning_rate": 5.8318314158423395e-06,
"loss": 3.2818,
"step": 6315
},
{
"epoch": 0.65,
"learning_rate": 5.8167245785180535e-06,
"loss": 3.3262,
"step": 6320
},
{
"epoch": 0.65,
"learning_rate": 5.801629304348675e-06,
"loss": 3.3519,
"step": 6325
},
{
"epoch": 0.65,
"learning_rate": 5.789561438525277e-06,
"loss": 3.335,
"step": 6330
},
{
"epoch": 0.65,
"learning_rate": 5.774487083161278e-06,
"loss": 3.3075,
"step": 6335
},
{
"epoch": 0.65,
"learning_rate": 5.759424407702493e-06,
"loss": 3.3132,
"step": 6340
},
{
"epoch": 0.65,
"learning_rate": 5.744373453784256e-06,
"loss": 3.3155,
"step": 6345
},
{
"epoch": 0.65,
"learning_rate": 5.729334263009495e-06,
"loss": 3.3061,
"step": 6350
},
{
"epoch": 0.65,
"learning_rate": 5.714306876948621e-06,
"loss": 3.3282,
"step": 6355
},
{
"epoch": 0.65,
"learning_rate": 5.699291337139419e-06,
"loss": 3.317,
"step": 6360
},
{
"epoch": 0.65,
"learning_rate": 5.684287685086931e-06,
"loss": 3.3401,
"step": 6365
},
{
"epoch": 0.65,
"learning_rate": 5.669295962263337e-06,
"loss": 3.3114,
"step": 6370
},
{
"epoch": 0.65,
"learning_rate": 5.654316210107843e-06,
"loss": 3.2913,
"step": 6375
},
{
"epoch": 0.65,
"learning_rate": 5.6393484700265666e-06,
"loss": 3.2988,
"step": 6380
},
{
"epoch": 0.66,
"learning_rate": 5.624392783392422e-06,
"loss": 3.3231,
"step": 6385
},
{
"epoch": 0.66,
"learning_rate": 5.609449191545009e-06,
"loss": 3.3397,
"step": 6390
},
{
"epoch": 0.66,
"learning_rate": 5.5945177357904935e-06,
"loss": 3.3236,
"step": 6395
},
{
"epoch": 0.66,
"learning_rate": 5.579598457401489e-06,
"loss": 3.3173,
"step": 6400
},
{
"epoch": 0.66,
"learning_rate": 5.564691397616961e-06,
"loss": 3.3354,
"step": 6405
},
{
"epoch": 0.66,
"learning_rate": 5.549796597642093e-06,
"loss": 3.3391,
"step": 6410
},
{
"epoch": 0.66,
"learning_rate": 5.534914098648185e-06,
"loss": 3.3339,
"step": 6415
},
{
"epoch": 0.66,
"learning_rate": 5.52004394177253e-06,
"loss": 3.3056,
"step": 6420
},
{
"epoch": 0.66,
"learning_rate": 5.505186168118314e-06,
"loss": 3.3162,
"step": 6425
},
{
"epoch": 0.66,
"learning_rate": 5.490340818754485e-06,
"loss": 3.2815,
"step": 6430
},
{
"epoch": 0.66,
"learning_rate": 5.47550793471566e-06,
"loss": 3.302,
"step": 6435
},
{
"epoch": 0.66,
"learning_rate": 5.460687557001983e-06,
"loss": 3.2952,
"step": 6440
},
{
"epoch": 0.66,
"learning_rate": 5.445879726579042e-06,
"loss": 3.3209,
"step": 6445
},
{
"epoch": 0.66,
"learning_rate": 5.431084484377742e-06,
"loss": 3.356,
"step": 6450
},
{
"epoch": 0.66,
"learning_rate": 5.416301871294186e-06,
"loss": 3.3438,
"step": 6455
},
{
"epoch": 0.66,
"learning_rate": 5.401531928189574e-06,
"loss": 3.3069,
"step": 6460
},
{
"epoch": 0.66,
"learning_rate": 5.386774695890083e-06,
"loss": 3.344,
"step": 6465
},
{
"epoch": 0.66,
"learning_rate": 5.372030215186753e-06,
"loss": 3.3163,
"step": 6470
},
{
"epoch": 0.66,
"learning_rate": 5.357298526835381e-06,
"loss": 3.3111,
"step": 6475
},
{
"epoch": 0.67,
"learning_rate": 5.342579671556402e-06,
"loss": 3.3036,
"step": 6480
},
{
"epoch": 0.67,
"learning_rate": 5.327873690034775e-06,
"loss": 3.3204,
"step": 6485
},
{
"epoch": 0.67,
"learning_rate": 5.313180622919883e-06,
"loss": 3.3088,
"step": 6490
},
{
"epoch": 0.67,
"learning_rate": 5.298500510825399e-06,
"loss": 3.2783,
"step": 6495
},
{
"epoch": 0.67,
"learning_rate": 5.2838333943291984e-06,
"loss": 3.3212,
"step": 6500
},
{
"epoch": 0.67,
"learning_rate": 5.269179313973232e-06,
"loss": 3.3111,
"step": 6505
},
{
"epoch": 0.67,
"learning_rate": 5.254538310263411e-06,
"loss": 3.2709,
"step": 6510
},
{
"epoch": 0.67,
"learning_rate": 5.239910423669509e-06,
"loss": 3.3042,
"step": 6515
},
{
"epoch": 0.67,
"learning_rate": 5.225295694625036e-06,
"loss": 3.3092,
"step": 6520
},
{
"epoch": 0.67,
"learning_rate": 5.210694163527138e-06,
"loss": 3.2934,
"step": 6525
},
{
"epoch": 0.67,
"learning_rate": 5.196105870736479e-06,
"loss": 3.3031,
"step": 6530
},
{
"epoch": 0.67,
"learning_rate": 5.181530856577121e-06,
"loss": 3.3218,
"step": 6535
},
{
"epoch": 0.67,
"learning_rate": 5.166969161336435e-06,
"loss": 3.2857,
"step": 6540
},
{
"epoch": 0.67,
"learning_rate": 5.152420825264968e-06,
"loss": 3.303,
"step": 6545
},
{
"epoch": 0.67,
"learning_rate": 5.1378858885763475e-06,
"loss": 3.2928,
"step": 6550
},
{
"epoch": 0.67,
"learning_rate": 5.123364391447156e-06,
"loss": 3.3229,
"step": 6555
},
{
"epoch": 0.67,
"learning_rate": 5.1088563740168355e-06,
"loss": 3.3101,
"step": 6560
},
{
"epoch": 0.67,
"learning_rate": 5.094361876387557e-06,
"loss": 3.3175,
"step": 6565
},
{
"epoch": 0.67,
"learning_rate": 5.079880938624133e-06,
"loss": 3.3263,
"step": 6570
},
{
"epoch": 0.67,
"learning_rate": 5.065413600753888e-06,
"loss": 3.2984,
"step": 6575
},
{
"epoch": 0.68,
"learning_rate": 5.050959902766552e-06,
"loss": 3.3193,
"step": 6580
},
{
"epoch": 0.68,
"learning_rate": 5.036519884614157e-06,
"loss": 3.324,
"step": 6585
},
{
"epoch": 0.68,
"learning_rate": 5.022093586210921e-06,
"loss": 3.3079,
"step": 6590
},
{
"epoch": 0.68,
"learning_rate": 5.0076810474331395e-06,
"loss": 3.3138,
"step": 6595
},
{
"epoch": 0.68,
"learning_rate": 4.993282308119074e-06,
"loss": 3.3108,
"step": 6600
},
{
"epoch": 0.68,
"learning_rate": 4.9788974080688416e-06,
"loss": 3.2662,
"step": 6605
},
{
"epoch": 0.68,
"learning_rate": 4.964526387044304e-06,
"loss": 3.3211,
"step": 6610
},
{
"epoch": 0.68,
"learning_rate": 4.950169284768968e-06,
"loss": 3.3244,
"step": 6615
},
{
"epoch": 0.68,
"learning_rate": 4.9358261409278515e-06,
"loss": 3.3103,
"step": 6620
},
{
"epoch": 0.68,
"learning_rate": 4.921496995167404e-06,
"loss": 3.3248,
"step": 6625
},
{
"epoch": 0.68,
"learning_rate": 4.9071818870953745e-06,
"loss": 3.3114,
"step": 6630
},
{
"epoch": 0.68,
"learning_rate": 4.892880856280713e-06,
"loss": 3.3497,
"step": 6635
},
{
"epoch": 0.68,
"learning_rate": 4.878593942253456e-06,
"loss": 3.292,
"step": 6640
},
{
"epoch": 0.68,
"learning_rate": 4.864321184504622e-06,
"loss": 3.3124,
"step": 6645
},
{
"epoch": 0.68,
"learning_rate": 4.850062622486098e-06,
"loss": 3.301,
"step": 6650
},
{
"epoch": 0.68,
"learning_rate": 4.835818295610531e-06,
"loss": 3.2937,
"step": 6655
},
{
"epoch": 0.68,
"learning_rate": 4.821588243251223e-06,
"loss": 3.3096,
"step": 6660
},
{
"epoch": 0.68,
"learning_rate": 4.807372504742013e-06,
"loss": 3.2841,
"step": 6665
},
{
"epoch": 0.68,
"learning_rate": 4.7931711193771805e-06,
"loss": 3.2898,
"step": 6670
},
{
"epoch": 0.69,
"learning_rate": 4.77898412641133e-06,
"loss": 3.3198,
"step": 6675
},
{
"epoch": 0.69,
"learning_rate": 4.764811565059283e-06,
"loss": 3.3456,
"step": 6680
},
{
"epoch": 0.69,
"learning_rate": 4.750653474495969e-06,
"loss": 3.3202,
"step": 6685
},
{
"epoch": 0.69,
"learning_rate": 4.7365098938563195e-06,
"loss": 3.3289,
"step": 6690
},
{
"epoch": 0.69,
"learning_rate": 4.722380862235156e-06,
"loss": 3.3172,
"step": 6695
},
{
"epoch": 0.69,
"learning_rate": 4.708266418687092e-06,
"loss": 3.2886,
"step": 6700
},
{
"epoch": 0.69,
"learning_rate": 4.694166602226404e-06,
"loss": 3.315,
"step": 6705
},
{
"epoch": 0.69,
"learning_rate": 4.680081451826949e-06,
"loss": 3.3038,
"step": 6710
},
{
"epoch": 0.69,
"learning_rate": 4.666011006422041e-06,
"loss": 3.297,
"step": 6715
},
{
"epoch": 0.69,
"learning_rate": 4.651955304904348e-06,
"loss": 3.3293,
"step": 6720
},
{
"epoch": 0.69,
"learning_rate": 4.637914386125781e-06,
"loss": 3.3173,
"step": 6725
},
{
"epoch": 0.69,
"learning_rate": 4.623888288897395e-06,
"loss": 3.2995,
"step": 6730
},
{
"epoch": 0.69,
"learning_rate": 4.60987705198927e-06,
"loss": 3.317,
"step": 6735
},
{
"epoch": 0.69,
"learning_rate": 4.595880714130415e-06,
"loss": 3.3505,
"step": 6740
},
{
"epoch": 0.69,
"learning_rate": 4.581899314008657e-06,
"loss": 3.2903,
"step": 6745
},
{
"epoch": 0.69,
"learning_rate": 4.5679328902705224e-06,
"loss": 3.3395,
"step": 6750
},
{
"epoch": 0.69,
"learning_rate": 4.553981481521156e-06,
"loss": 3.2834,
"step": 6755
},
{
"epoch": 0.69,
"learning_rate": 4.540045126324182e-06,
"loss": 3.3247,
"step": 6760
},
{
"epoch": 0.69,
"learning_rate": 4.52612386320163e-06,
"loss": 3.3246,
"step": 6765
},
{
"epoch": 0.69,
"learning_rate": 4.512217730633806e-06,
"loss": 3.2869,
"step": 6770
},
{
"epoch": 0.7,
"learning_rate": 4.498326767059196e-06,
"loss": 3.2991,
"step": 6775
},
{
"epoch": 0.7,
"learning_rate": 4.484451010874351e-06,
"loss": 3.2981,
"step": 6780
},
{
"epoch": 0.7,
"learning_rate": 4.4705905004337925e-06,
"loss": 3.311,
"step": 6785
},
{
"epoch": 0.7,
"learning_rate": 4.456745274049898e-06,
"loss": 3.3263,
"step": 6790
},
{
"epoch": 0.7,
"learning_rate": 4.442915369992802e-06,
"loss": 3.3202,
"step": 6795
},
{
"epoch": 0.7,
"learning_rate": 4.4291008264902744e-06,
"loss": 3.3266,
"step": 6800
},
{
"epoch": 0.7,
"learning_rate": 4.415301681727638e-06,
"loss": 3.3141,
"step": 6805
},
{
"epoch": 0.7,
"learning_rate": 4.40151797384765e-06,
"loss": 3.3069,
"step": 6810
},
{
"epoch": 0.7,
"learning_rate": 4.387749740950392e-06,
"loss": 3.3015,
"step": 6815
},
{
"epoch": 0.7,
"learning_rate": 4.373997021093176e-06,
"loss": 3.3116,
"step": 6820
},
{
"epoch": 0.7,
"learning_rate": 4.360259852290431e-06,
"loss": 3.2859,
"step": 6825
},
{
"epoch": 0.7,
"learning_rate": 4.3465382725136015e-06,
"loss": 3.317,
"step": 6830
},
{
"epoch": 0.7,
"learning_rate": 4.332832319691044e-06,
"loss": 3.3087,
"step": 6835
},
{
"epoch": 0.7,
"learning_rate": 4.319142031707918e-06,
"loss": 3.3001,
"step": 6840
},
{
"epoch": 0.7,
"learning_rate": 4.305467446406077e-06,
"loss": 3.3174,
"step": 6845
},
{
"epoch": 0.7,
"learning_rate": 4.291808601583982e-06,
"loss": 3.3165,
"step": 6850
},
{
"epoch": 0.7,
"learning_rate": 4.278165534996577e-06,
"loss": 3.3251,
"step": 6855
},
{
"epoch": 0.7,
"learning_rate": 4.264538284355194e-06,
"loss": 3.308,
"step": 6860
},
{
"epoch": 0.7,
"learning_rate": 4.250926887327451e-06,
"loss": 3.2723,
"step": 6865
},
{
"epoch": 0.71,
"learning_rate": 4.2373313815371395e-06,
"loss": 3.3212,
"step": 6870
},
{
"epoch": 0.71,
"learning_rate": 4.22375180456413e-06,
"loss": 3.2993,
"step": 6875
},
{
"epoch": 0.71,
"learning_rate": 4.2101881939442645e-06,
"loss": 3.3228,
"step": 6880
},
{
"epoch": 0.71,
"learning_rate": 4.1966405871692394e-06,
"loss": 3.2958,
"step": 6885
},
{
"epoch": 0.71,
"learning_rate": 4.18310902168653e-06,
"loss": 3.2818,
"step": 6890
},
{
"epoch": 0.71,
"learning_rate": 4.169593534899262e-06,
"loss": 3.3064,
"step": 6895
},
{
"epoch": 0.71,
"learning_rate": 4.156094164166122e-06,
"loss": 3.2605,
"step": 6900
},
{
"epoch": 0.71,
"learning_rate": 4.142610946801248e-06,
"loss": 3.3101,
"step": 6905
},
{
"epoch": 0.71,
"learning_rate": 4.129143920074126e-06,
"loss": 3.3303,
"step": 6910
},
{
"epoch": 0.71,
"learning_rate": 4.11569312120949e-06,
"loss": 3.304,
"step": 6915
},
{
"epoch": 0.71,
"learning_rate": 4.10225858738722e-06,
"loss": 3.3225,
"step": 6920
},
{
"epoch": 0.71,
"learning_rate": 4.088840355742238e-06,
"loss": 3.2888,
"step": 6925
},
{
"epoch": 0.71,
"learning_rate": 4.075438463364394e-06,
"loss": 3.3037,
"step": 6930
},
{
"epoch": 0.71,
"learning_rate": 4.062052947298387e-06,
"loss": 3.2861,
"step": 6935
},
{
"epoch": 0.71,
"learning_rate": 4.0486838445436445e-06,
"loss": 3.3013,
"step": 6940
},
{
"epoch": 0.71,
"learning_rate": 4.035331192054225e-06,
"loss": 3.3137,
"step": 6945
},
{
"epoch": 0.71,
"learning_rate": 4.021995026738715e-06,
"loss": 3.3227,
"step": 6950
},
{
"epoch": 0.71,
"learning_rate": 4.008675385460131e-06,
"loss": 3.3012,
"step": 6955
},
{
"epoch": 0.71,
"learning_rate": 3.995372305035815e-06,
"loss": 3.3244,
"step": 6960
},
{
"epoch": 0.71,
"learning_rate": 3.982085822237332e-06,
"loss": 3.3113,
"step": 6965
},
{
"epoch": 0.72,
"learning_rate": 3.968815973790361e-06,
"loss": 3.2902,
"step": 6970
},
{
"epoch": 0.72,
"learning_rate": 3.955562796374614e-06,
"loss": 3.3019,
"step": 6975
},
{
"epoch": 0.72,
"learning_rate": 3.942326326623713e-06,
"loss": 3.2806,
"step": 6980
},
{
"epoch": 0.72,
"learning_rate": 3.9291066011251024e-06,
"loss": 3.306,
"step": 6985
},
{
"epoch": 0.72,
"learning_rate": 3.915903656419942e-06,
"loss": 3.3425,
"step": 6990
},
{
"epoch": 0.72,
"learning_rate": 3.902717529003005e-06,
"loss": 3.2898,
"step": 6995
},
{
"epoch": 0.72,
"learning_rate": 3.8895482553225874e-06,
"loss": 3.304,
"step": 7000
},
{
"epoch": 0.72,
"learning_rate": 3.876395871780381e-06,
"loss": 3.2967,
"step": 7005
},
{
"epoch": 0.72,
"learning_rate": 3.863260414731411e-06,
"loss": 3.2621,
"step": 7010
},
{
"epoch": 0.72,
"learning_rate": 3.8501419204839085e-06,
"loss": 3.3084,
"step": 7015
},
{
"epoch": 0.72,
"learning_rate": 3.837040425299209e-06,
"loss": 3.2824,
"step": 7020
},
{
"epoch": 0.72,
"learning_rate": 3.8239559653916684e-06,
"loss": 3.3124,
"step": 7025
},
{
"epoch": 0.72,
"learning_rate": 3.8108885769285555e-06,
"loss": 3.2996,
"step": 7030
},
{
"epoch": 0.72,
"learning_rate": 3.7978382960299476e-06,
"loss": 3.2839,
"step": 7035
},
{
"epoch": 0.72,
"learning_rate": 3.7848051587686363e-06,
"loss": 3.3113,
"step": 7040
},
{
"epoch": 0.72,
"learning_rate": 3.771789201170025e-06,
"loss": 3.2749,
"step": 7045
},
{
"epoch": 0.72,
"learning_rate": 3.7587904592120307e-06,
"loss": 3.3367,
"step": 7050
},
{
"epoch": 0.72,
"learning_rate": 3.7458089688249823e-06,
"loss": 3.311,
"step": 7055
},
{
"epoch": 0.72,
"learning_rate": 3.7328447658915277e-06,
"loss": 3.2968,
"step": 7060
},
{
"epoch": 0.73,
"learning_rate": 3.719897886246521e-06,
"loss": 3.2959,
"step": 7065
},
{
"epoch": 0.73,
"learning_rate": 3.7069683656769396e-06,
"loss": 3.2592,
"step": 7070
},
{
"epoch": 0.73,
"learning_rate": 3.694056239921776e-06,
"loss": 3.317,
"step": 7075
},
{
"epoch": 0.73,
"learning_rate": 3.6811615446719418e-06,
"loss": 3.3112,
"step": 7080
},
{
"epoch": 0.73,
"learning_rate": 3.6682843155701684e-06,
"loss": 3.3045,
"step": 7085
},
{
"epoch": 0.73,
"learning_rate": 3.655424588210906e-06,
"loss": 3.3146,
"step": 7090
},
{
"epoch": 0.73,
"learning_rate": 3.6425823981402297e-06,
"loss": 3.304,
"step": 7095
},
{
"epoch": 0.73,
"learning_rate": 3.6297577808557406e-06,
"loss": 3.2753,
"step": 7100
},
{
"epoch": 0.73,
"learning_rate": 3.616950771806459e-06,
"loss": 3.2887,
"step": 7105
},
{
"epoch": 0.73,
"learning_rate": 3.604161406392742e-06,
"loss": 3.2948,
"step": 7110
},
{
"epoch": 0.73,
"learning_rate": 3.5913897199661716e-06,
"loss": 3.3032,
"step": 7115
},
{
"epoch": 0.73,
"learning_rate": 3.5786357478294677e-06,
"loss": 3.3039,
"step": 7120
},
{
"epoch": 0.73,
"learning_rate": 3.5658995252363805e-06,
"loss": 3.2876,
"step": 7125
},
{
"epoch": 0.73,
"learning_rate": 3.5531810873916005e-06,
"loss": 3.2972,
"step": 7130
},
{
"epoch": 0.73,
"learning_rate": 3.540480469450659e-06,
"loss": 3.2791,
"step": 7135
},
{
"epoch": 0.73,
"learning_rate": 3.52779770651983e-06,
"loss": 3.2985,
"step": 7140
},
{
"epoch": 0.73,
"learning_rate": 3.5151328336560363e-06,
"loss": 3.3382,
"step": 7145
},
{
"epoch": 0.73,
"learning_rate": 3.50248588586674e-06,
"loss": 3.3049,
"step": 7150
},
{
"epoch": 0.73,
"learning_rate": 3.4898568981098678e-06,
"loss": 3.2783,
"step": 7155
},
{
"epoch": 0.74,
"learning_rate": 3.477245905293698e-06,
"loss": 3.2496,
"step": 7160
},
{
"epoch": 0.74,
"learning_rate": 3.464652942276767e-06,
"loss": 3.2848,
"step": 7165
},
{
"epoch": 0.74,
"learning_rate": 3.452078043867777e-06,
"loss": 3.2948,
"step": 7170
},
{
"epoch": 0.74,
"learning_rate": 3.4395212448254944e-06,
"loss": 3.3074,
"step": 7175
},
{
"epoch": 0.74,
"learning_rate": 3.4269825798586576e-06,
"loss": 3.2856,
"step": 7180
},
{
"epoch": 0.74,
"learning_rate": 3.4144620836258835e-06,
"loss": 3.2771,
"step": 7185
},
{
"epoch": 0.74,
"learning_rate": 3.4019597907355586e-06,
"loss": 3.3349,
"step": 7190
},
{
"epoch": 0.74,
"learning_rate": 3.389475735745761e-06,
"loss": 3.3313,
"step": 7195
},
{
"epoch": 0.74,
"learning_rate": 3.377009953164154e-06,
"loss": 3.2901,
"step": 7200
},
{
"epoch": 0.74,
"learning_rate": 3.3645624774478967e-06,
"loss": 3.2823,
"step": 7205
},
{
"epoch": 0.74,
"learning_rate": 3.3521333430035397e-06,
"loss": 3.284,
"step": 7210
},
{
"epoch": 0.74,
"learning_rate": 3.3397225841869408e-06,
"loss": 3.2731,
"step": 7215
},
{
"epoch": 0.74,
"learning_rate": 3.327330235303161e-06,
"loss": 3.2848,
"step": 7220
},
{
"epoch": 0.74,
"learning_rate": 3.314956330606378e-06,
"loss": 3.31,
"step": 7225
},
{
"epoch": 0.74,
"learning_rate": 3.3026009042997864e-06,
"loss": 3.3137,
"step": 7230
},
{
"epoch": 0.74,
"learning_rate": 3.2902639905354948e-06,
"loss": 3.3119,
"step": 7235
},
{
"epoch": 0.74,
"learning_rate": 3.2779456234144545e-06,
"loss": 3.3162,
"step": 7240
},
{
"epoch": 0.74,
"learning_rate": 3.265645836986343e-06,
"loss": 3.2823,
"step": 7245
},
{
"epoch": 0.74,
"learning_rate": 3.253364665249481e-06,
"loss": 3.3369,
"step": 7250
},
{
"epoch": 0.74,
"learning_rate": 3.241102142150734e-06,
"loss": 3.3076,
"step": 7255
},
{
"epoch": 0.75,
"learning_rate": 3.2288583015854234e-06,
"loss": 3.3118,
"step": 7260
},
{
"epoch": 0.75,
"learning_rate": 3.2166331773972227e-06,
"loss": 3.2874,
"step": 7265
},
{
"epoch": 0.75,
"learning_rate": 3.204426803378076e-06,
"loss": 3.28,
"step": 7270
},
{
"epoch": 0.75,
"learning_rate": 3.192239213268099e-06,
"loss": 3.2863,
"step": 7275
},
{
"epoch": 0.75,
"learning_rate": 3.1800704407554884e-06,
"loss": 3.2691,
"step": 7280
},
{
"epoch": 0.75,
"learning_rate": 3.1679205194764173e-06,
"loss": 3.2996,
"step": 7285
},
{
"epoch": 0.75,
"learning_rate": 3.1557894830149616e-06,
"loss": 3.302,
"step": 7290
},
{
"epoch": 0.75,
"learning_rate": 3.1436773649029906e-06,
"loss": 3.2936,
"step": 7295
},
{
"epoch": 0.75,
"learning_rate": 3.1315841986200847e-06,
"loss": 3.2785,
"step": 7300
},
{
"epoch": 0.75,
"learning_rate": 3.1195100175934357e-06,
"loss": 3.3168,
"step": 7305
},
{
"epoch": 0.75,
"learning_rate": 3.107454855197759e-06,
"loss": 3.3032,
"step": 7310
},
{
"epoch": 0.75,
"learning_rate": 3.0954187447551996e-06,
"loss": 3.2976,
"step": 7315
},
{
"epoch": 0.75,
"learning_rate": 3.0834017195352405e-06,
"loss": 3.2897,
"step": 7320
},
{
"epoch": 0.75,
"learning_rate": 3.0714038127546142e-06,
"loss": 3.3018,
"step": 7325
},
{
"epoch": 0.75,
"learning_rate": 3.0594250575771954e-06,
"loss": 3.299,
"step": 7330
},
{
"epoch": 0.75,
"learning_rate": 3.047465487113933e-06,
"loss": 3.2533,
"step": 7335
},
{
"epoch": 0.75,
"learning_rate": 3.035525134422743e-06,
"loss": 3.3027,
"step": 7340
},
{
"epoch": 0.75,
"learning_rate": 3.023604032508419e-06,
"loss": 3.2931,
"step": 7345
},
{
"epoch": 0.75,
"learning_rate": 3.011702214322545e-06,
"loss": 3.3184,
"step": 7350
},
{
"epoch": 0.76,
"learning_rate": 2.999819712763402e-06,
"loss": 3.2858,
"step": 7355
},
{
"epoch": 0.76,
"learning_rate": 2.9879565606758755e-06,
"loss": 3.3183,
"step": 7360
},
{
"epoch": 0.76,
"learning_rate": 2.9784799926630415e-06,
"loss": 3.2936,
"step": 7365
},
{
"epoch": 0.76,
"learning_rate": 2.9666517522227576e-06,
"loss": 3.2514,
"step": 7370
},
{
"epoch": 0.76,
"learning_rate": 2.9548429529349452e-06,
"loss": 3.2958,
"step": 7375
},
{
"epoch": 0.76,
"learning_rate": 2.943053627440771e-06,
"loss": 3.2819,
"step": 7380
},
{
"epoch": 0.76,
"learning_rate": 2.931283808327562e-06,
"loss": 3.2744,
"step": 7385
},
{
"epoch": 0.76,
"learning_rate": 2.9195335281287395e-06,
"loss": 3.3165,
"step": 7390
},
{
"epoch": 0.76,
"learning_rate": 2.9078028193237107e-06,
"loss": 3.2909,
"step": 7395
},
{
"epoch": 0.76,
"learning_rate": 2.8960917143377865e-06,
"loss": 3.2847,
"step": 7400
},
{
"epoch": 0.76,
"learning_rate": 2.8844002455420894e-06,
"loss": 3.2775,
"step": 7405
},
{
"epoch": 0.76,
"learning_rate": 2.8727284452534634e-06,
"loss": 3.2883,
"step": 7410
},
{
"epoch": 0.76,
"learning_rate": 2.8610763457343895e-06,
"loss": 3.2857,
"step": 7415
},
{
"epoch": 0.76,
"learning_rate": 2.849443979192892e-06,
"loss": 3.2906,
"step": 7420
},
{
"epoch": 0.76,
"learning_rate": 2.837831377782443e-06,
"loss": 3.3134,
"step": 7425
},
{
"epoch": 0.76,
"learning_rate": 2.8262385736018925e-06,
"loss": 3.3238,
"step": 7430
},
{
"epoch": 0.76,
"learning_rate": 2.814665598695362e-06,
"loss": 3.282,
"step": 7435
},
{
"epoch": 0.76,
"learning_rate": 2.803112485052163e-06,
"loss": 3.2921,
"step": 7440
},
{
"epoch": 0.76,
"learning_rate": 2.7915792646067088e-06,
"loss": 3.3177,
"step": 7445
},
{
"epoch": 0.76,
"learning_rate": 2.7800659692384237e-06,
"loss": 3.3009,
"step": 7450
},
{
"epoch": 0.77,
"learning_rate": 2.7685726307716564e-06,
"loss": 3.3029,
"step": 7455
},
{
"epoch": 0.77,
"learning_rate": 2.7570992809755937e-06,
"loss": 3.2818,
"step": 7460
},
{
"epoch": 0.77,
"learning_rate": 2.7479350142952733e-06,
"loss": 3.2972,
"step": 7465
},
{
"epoch": 0.77,
"learning_rate": 2.736497723988406e-06,
"loss": 3.2862,
"step": 7470
},
{
"epoch": 0.77,
"learning_rate": 2.725080511011745e-06,
"loss": 3.2853,
"step": 7475
},
{
"epoch": 0.77,
"learning_rate": 2.713683406924057e-06,
"loss": 3.2694,
"step": 7480
},
{
"epoch": 0.77,
"learning_rate": 2.702306443228516e-06,
"loss": 3.2785,
"step": 7485
},
{
"epoch": 0.77,
"learning_rate": 2.6909496513726354e-06,
"loss": 3.2912,
"step": 7490
},
{
"epoch": 0.77,
"learning_rate": 2.6796130627481663e-06,
"loss": 3.298,
"step": 7495
},
{
"epoch": 0.77,
"learning_rate": 2.668296708691015e-06,
"loss": 3.2919,
"step": 7500
},
{
"epoch": 0.77,
"learning_rate": 2.6570006204811595e-06,
"loss": 3.2967,
"step": 7505
},
{
"epoch": 0.77,
"learning_rate": 2.6457248293425576e-06,
"loss": 3.2963,
"step": 7510
},
{
"epoch": 0.77,
"learning_rate": 2.634469366443063e-06,
"loss": 3.323,
"step": 7515
},
{
"epoch": 0.77,
"learning_rate": 2.623234262894343e-06,
"loss": 3.2898,
"step": 7520
},
{
"epoch": 0.77,
"learning_rate": 2.6120195497517818e-06,
"loss": 3.3104,
"step": 7525
},
{
"epoch": 0.77,
"learning_rate": 2.600825258014407e-06,
"loss": 3.2927,
"step": 7530
},
{
"epoch": 0.77,
"learning_rate": 2.589651418624798e-06,
"loss": 3.3129,
"step": 7535
},
{
"epoch": 0.77,
"learning_rate": 2.578498062468999e-06,
"loss": 3.3192,
"step": 7540
},
{
"epoch": 0.77,
"learning_rate": 2.567365220376441e-06,
"loss": 3.3048,
"step": 7545
},
{
"epoch": 0.78,
"learning_rate": 2.556252923119843e-06,
"loss": 3.3125,
"step": 7550
},
{
"epoch": 0.78,
"learning_rate": 2.5451612014151427e-06,
"loss": 3.2848,
"step": 7555
},
{
"epoch": 0.78,
"learning_rate": 2.5340900859214003e-06,
"loss": 3.2921,
"step": 7560
},
{
"epoch": 0.78,
"learning_rate": 2.5230396072407204e-06,
"loss": 3.2902,
"step": 7565
},
{
"epoch": 0.78,
"learning_rate": 2.5120097959181578e-06,
"loss": 3.2713,
"step": 7570
},
{
"epoch": 0.78,
"learning_rate": 2.501000682441647e-06,
"loss": 3.2878,
"step": 7575
},
{
"epoch": 0.78,
"learning_rate": 2.4900122972419083e-06,
"loss": 3.3087,
"step": 7580
},
{
"epoch": 0.78,
"learning_rate": 2.4790446706923664e-06,
"loss": 3.2623,
"step": 7585
},
{
"epoch": 0.78,
"learning_rate": 2.4680978331090655e-06,
"loss": 3.2834,
"step": 7590
},
{
"epoch": 0.78,
"learning_rate": 2.4571718147505872e-06,
"loss": 3.2755,
"step": 7595
},
{
"epoch": 0.78,
"learning_rate": 2.4462666458179664e-06,
"loss": 3.3104,
"step": 7600
},
{
"epoch": 0.78,
"learning_rate": 2.4353823564546064e-06,
"loss": 3.3256,
"step": 7605
},
{
"epoch": 0.78,
"learning_rate": 2.424518976746194e-06,
"loss": 3.2682,
"step": 7610
},
{
"epoch": 0.78,
"learning_rate": 2.4136765367206216e-06,
"loss": 3.255,
"step": 7615
},
{
"epoch": 0.78,
"learning_rate": 2.405017681413605e-06,
"loss": 3.2806,
"step": 7620
},
{
"epoch": 0.78,
"learning_rate": 2.394213008302627e-06,
"loss": 3.2818,
"step": 7625
},
{
"epoch": 0.78,
"learning_rate": 2.3834293586444e-06,
"loss": 3.2964,
"step": 7630
},
{
"epoch": 0.78,
"learning_rate": 2.372666762246433e-06,
"loss": 3.3073,
"step": 7635
},
{
"epoch": 0.78,
"learning_rate": 2.3619252488580345e-06,
"loss": 3.3,
"step": 7640
},
{
"epoch": 0.78,
"learning_rate": 2.3512048481702454e-06,
"loss": 3.2655,
"step": 7645
},
{
"epoch": 0.79,
"learning_rate": 2.3405055898157416e-06,
"loss": 3.3318,
"step": 7650
},
{
"epoch": 0.79,
"learning_rate": 2.3298275033687613e-06,
"loss": 3.298,
"step": 7655
},
{
"epoch": 0.79,
"learning_rate": 2.3191706183450225e-06,
"loss": 3.3327,
"step": 7660
},
{
"epoch": 0.79,
"learning_rate": 2.3085349642016317e-06,
"loss": 3.2962,
"step": 7665
},
{
"epoch": 0.79,
"learning_rate": 2.297920570337019e-06,
"loss": 3.3318,
"step": 7670
},
{
"epoch": 0.79,
"learning_rate": 2.287327466090845e-06,
"loss": 3.31,
"step": 7675
},
{
"epoch": 0.79,
"learning_rate": 2.2767556807439216e-06,
"loss": 3.2856,
"step": 7680
},
{
"epoch": 0.79,
"learning_rate": 2.2662052435181335e-06,
"loss": 3.3154,
"step": 7685
},
{
"epoch": 0.79,
"learning_rate": 2.2556761835763576e-06,
"loss": 3.2669,
"step": 7690
},
{
"epoch": 0.79,
"learning_rate": 2.245168530022378e-06,
"loss": 3.2833,
"step": 7695
},
{
"epoch": 0.79,
"learning_rate": 2.234682311900812e-06,
"loss": 3.2926,
"step": 7700
},
{
"epoch": 0.79,
"learning_rate": 2.2242175581970247e-06,
"loss": 3.2746,
"step": 7705
},
{
"epoch": 0.79,
"learning_rate": 2.213774297837047e-06,
"loss": 3.304,
"step": 7710
},
{
"epoch": 0.79,
"learning_rate": 2.2033525596875027e-06,
"loss": 3.3048,
"step": 7715
},
{
"epoch": 0.79,
"learning_rate": 2.192952372555528e-06,
"loss": 3.2855,
"step": 7720
},
{
"epoch": 0.79,
"learning_rate": 2.182573765188686e-06,
"loss": 3.2732,
"step": 7725
},
{
"epoch": 0.79,
"learning_rate": 2.1722167662748874e-06,
"loss": 3.2926,
"step": 7730
},
{
"epoch": 0.79,
"learning_rate": 2.161881404442321e-06,
"loss": 3.285,
"step": 7735
},
{
"epoch": 0.79,
"learning_rate": 2.151567708259361e-06,
"loss": 3.3023,
"step": 7740
},
{
"epoch": 0.8,
"learning_rate": 2.1412757062345022e-06,
"loss": 3.2894,
"step": 7745
},
{
"epoch": 0.8,
"learning_rate": 2.1310054268162628e-06,
"loss": 3.2888,
"step": 7750
},
{
"epoch": 0.8,
"learning_rate": 2.120756898393126e-06,
"loss": 3.2795,
"step": 7755
},
{
"epoch": 0.8,
"learning_rate": 2.1105301492934503e-06,
"loss": 3.2553,
"step": 7760
},
{
"epoch": 0.8,
"learning_rate": 2.1003252077853906e-06,
"loss": 3.248,
"step": 7765
},
{
"epoch": 0.8,
"learning_rate": 2.090142102076825e-06,
"loss": 3.3011,
"step": 7770
},
{
"epoch": 0.8,
"learning_rate": 2.0799808603152737e-06,
"loss": 3.3006,
"step": 7775
},
{
"epoch": 0.8,
"learning_rate": 2.069841510587821e-06,
"loss": 3.2864,
"step": 7780
},
{
"epoch": 0.8,
"learning_rate": 2.0597240809210404e-06,
"loss": 3.2518,
"step": 7785
},
{
"epoch": 0.8,
"learning_rate": 2.0496285992809163e-06,
"loss": 3.2973,
"step": 7790
},
{
"epoch": 0.8,
"learning_rate": 2.039555093572757e-06,
"loss": 3.313,
"step": 7795
},
{
"epoch": 0.8,
"learning_rate": 2.0295035916411377e-06,
"loss": 3.2947,
"step": 7800
},
{
"epoch": 0.8,
"learning_rate": 2.0194741212698066e-06,
"loss": 3.2519,
"step": 7805
},
{
"epoch": 0.8,
"learning_rate": 2.0094667101816133e-06,
"loss": 3.3003,
"step": 7810
},
{
"epoch": 0.8,
"learning_rate": 1.9994813860384342e-06,
"loss": 3.3083,
"step": 7815
},
{
"epoch": 0.8,
"learning_rate": 1.989518176441094e-06,
"loss": 3.2958,
"step": 7820
},
{
"epoch": 0.8,
"learning_rate": 1.9795771089292913e-06,
"loss": 3.2794,
"step": 7825
},
{
"epoch": 0.8,
"learning_rate": 1.9696582109815145e-06,
"loss": 3.2749,
"step": 7830
},
{
"epoch": 0.8,
"learning_rate": 1.959761510014979e-06,
"loss": 3.276,
"step": 7835
},
{
"epoch": 0.8,
"learning_rate": 1.9498870333855436e-06,
"loss": 3.3054,
"step": 7840
},
{
"epoch": 0.81,
"learning_rate": 1.9400348083876308e-06,
"loss": 3.2713,
"step": 7845
},
{
"epoch": 0.81,
"learning_rate": 1.9302048622541635e-06,
"loss": 3.2601,
"step": 7850
},
{
"epoch": 0.81,
"learning_rate": 1.9203972221564772e-06,
"loss": 3.3,
"step": 7855
},
{
"epoch": 0.81,
"learning_rate": 1.9106119152042545e-06,
"loss": 3.2794,
"step": 7860
},
{
"epoch": 0.81,
"learning_rate": 1.9008489684454456e-06,
"loss": 3.2949,
"step": 7865
},
{
"epoch": 0.81,
"learning_rate": 1.8911084088661903e-06,
"loss": 3.262,
"step": 7870
},
{
"epoch": 0.81,
"learning_rate": 1.8813902633907499e-06,
"loss": 3.3085,
"step": 7875
},
{
"epoch": 0.81,
"learning_rate": 1.8716945588814339e-06,
"loss": 3.3054,
"step": 7880
},
{
"epoch": 0.81,
"learning_rate": 1.8620213221385108e-06,
"loss": 3.282,
"step": 7885
},
{
"epoch": 0.81,
"learning_rate": 1.8523705799001556e-06,
"loss": 3.2407,
"step": 7890
},
{
"epoch": 0.81,
"learning_rate": 1.842742358842362e-06,
"loss": 3.3074,
"step": 7895
},
{
"epoch": 0.81,
"learning_rate": 1.8331366855788702e-06,
"loss": 3.2899,
"step": 7900
},
{
"epoch": 0.81,
"learning_rate": 1.8235535866610975e-06,
"loss": 3.2802,
"step": 7905
},
{
"epoch": 0.81,
"learning_rate": 1.8139930885780621e-06,
"loss": 3.2734,
"step": 7910
},
{
"epoch": 0.81,
"learning_rate": 1.8044552177563101e-06,
"loss": 3.2506,
"step": 7915
},
{
"epoch": 0.81,
"learning_rate": 1.7949400005598416e-06,
"loss": 3.2716,
"step": 7920
},
{
"epoch": 0.81,
"learning_rate": 1.7854474632900431e-06,
"loss": 3.2922,
"step": 7925
},
{
"epoch": 0.81,
"learning_rate": 1.7759776321856014e-06,
"loss": 3.2956,
"step": 7930
},
{
"epoch": 0.81,
"learning_rate": 1.7665305334224514e-06,
"loss": 3.2613,
"step": 7935
},
{
"epoch": 0.82,
"learning_rate": 1.7571061931136845e-06,
"loss": 3.3031,
"step": 7940
},
{
"epoch": 0.82,
"learning_rate": 1.7477046373094908e-06,
"loss": 3.3053,
"step": 7945
},
{
"epoch": 0.82,
"learning_rate": 1.7383258919970746e-06,
"loss": 3.2591,
"step": 7950
},
{
"epoch": 0.82,
"learning_rate": 1.7289699831005946e-06,
"loss": 3.3021,
"step": 7955
},
{
"epoch": 0.82,
"learning_rate": 1.7196369364810816e-06,
"loss": 3.3034,
"step": 7960
},
{
"epoch": 0.82,
"learning_rate": 1.7103267779363786e-06,
"loss": 3.3135,
"step": 7965
},
{
"epoch": 0.82,
"learning_rate": 1.701039533201052e-06,
"loss": 3.2869,
"step": 7970
},
{
"epoch": 0.82,
"learning_rate": 1.6917752279463406e-06,
"loss": 3.307,
"step": 7975
},
{
"epoch": 0.82,
"learning_rate": 1.6825338877800712e-06,
"loss": 3.2935,
"step": 7980
},
{
"epoch": 0.82,
"learning_rate": 1.673315538246595e-06,
"loss": 3.2563,
"step": 7985
},
{
"epoch": 0.82,
"learning_rate": 1.6641202048267102e-06,
"loss": 3.3076,
"step": 7990
},
{
"epoch": 0.82,
"learning_rate": 1.6549479129375966e-06,
"loss": 3.3103,
"step": 7995
},
{
"epoch": 0.82,
"learning_rate": 1.6457986879327459e-06,
"loss": 3.2997,
"step": 8000
},
{
"epoch": 0.82,
"learning_rate": 1.6366725551018868e-06,
"loss": 3.2573,
"step": 8005
},
{
"epoch": 0.82,
"learning_rate": 1.6275695396709223e-06,
"loss": 3.2803,
"step": 8010
},
{
"epoch": 0.82,
"learning_rate": 1.618489666801848e-06,
"loss": 3.2775,
"step": 8015
},
{
"epoch": 0.82,
"learning_rate": 1.6094329615926974e-06,
"loss": 3.269,
"step": 8020
},
{
"epoch": 0.82,
"learning_rate": 1.6003994490774622e-06,
"loss": 3.2442,
"step": 8025
},
{
"epoch": 0.82,
"learning_rate": 1.5913891542260284e-06,
"loss": 3.3082,
"step": 8030
},
{
"epoch": 0.82,
"learning_rate": 1.5824021019441016e-06,
"loss": 3.244,
"step": 8035
},
{
"epoch": 0.83,
"learning_rate": 1.573438317073146e-06,
"loss": 3.3122,
"step": 8040
},
{
"epoch": 0.83,
"learning_rate": 1.5644978243903087e-06,
"loss": 3.299,
"step": 8045
},
{
"epoch": 0.83,
"learning_rate": 1.5555806486083559e-06,
"loss": 3.2703,
"step": 8050
},
{
"epoch": 0.83,
"learning_rate": 1.5466868143755975e-06,
"loss": 3.3111,
"step": 8055
},
{
"epoch": 0.83,
"learning_rate": 1.53781634627583e-06,
"loss": 3.2756,
"step": 8060
},
{
"epoch": 0.83,
"learning_rate": 1.528969268828261e-06,
"loss": 3.2836,
"step": 8065
},
{
"epoch": 0.83,
"learning_rate": 1.520145606487442e-06,
"loss": 3.2704,
"step": 8070
},
{
"epoch": 0.83,
"learning_rate": 1.5113453836432034e-06,
"loss": 3.2674,
"step": 8075
},
{
"epoch": 0.83,
"learning_rate": 1.502568624620584e-06,
"loss": 3.298,
"step": 8080
},
{
"epoch": 0.83,
"learning_rate": 1.4938153536797684e-06,
"loss": 3.2892,
"step": 8085
},
{
"epoch": 0.83,
"learning_rate": 1.4850855950160103e-06,
"loss": 3.2547,
"step": 8090
},
{
"epoch": 0.83,
"learning_rate": 1.4763793727595788e-06,
"loss": 3.2729,
"step": 8095
},
{
"epoch": 0.83,
"learning_rate": 1.4676967109756823e-06,
"loss": 3.2964,
"step": 8100
},
{
"epoch": 0.83,
"learning_rate": 1.4590376336644086e-06,
"loss": 3.2885,
"step": 8105
},
{
"epoch": 0.83,
"learning_rate": 1.4504021647606448e-06,
"loss": 3.2628,
"step": 8110
},
{
"epoch": 0.83,
"learning_rate": 1.4417903281340306e-06,
"loss": 3.293,
"step": 8115
},
{
"epoch": 0.83,
"learning_rate": 1.4332021475888801e-06,
"loss": 3.2924,
"step": 8120
},
{
"epoch": 0.83,
"learning_rate": 1.4246376468641198e-06,
"loss": 3.2881,
"step": 8125
},
{
"epoch": 0.83,
"learning_rate": 1.4160968496332183e-06,
"loss": 3.3112,
"step": 8130
},
{
"epoch": 0.84,
"learning_rate": 1.4075797795041279e-06,
"loss": 3.2929,
"step": 8135
},
{
"epoch": 0.84,
"learning_rate": 1.3990864600192133e-06,
"loss": 3.2537,
"step": 8140
},
{
"epoch": 0.84,
"learning_rate": 1.390616914655195e-06,
"loss": 3.2784,
"step": 8145
},
{
"epoch": 0.84,
"learning_rate": 1.3821711668230675e-06,
"loss": 3.2776,
"step": 8150
},
{
"epoch": 0.84,
"learning_rate": 1.3737492398680551e-06,
"loss": 3.2766,
"step": 8155
},
{
"epoch": 0.84,
"learning_rate": 1.3653511570695355e-06,
"loss": 3.2719,
"step": 8160
},
{
"epoch": 0.84,
"learning_rate": 1.356976941640976e-06,
"loss": 3.289,
"step": 8165
},
{
"epoch": 0.84,
"learning_rate": 1.3486266167298733e-06,
"loss": 3.2855,
"step": 8170
},
{
"epoch": 0.84,
"learning_rate": 1.340300205417686e-06,
"loss": 3.3072,
"step": 8175
},
{
"epoch": 0.84,
"learning_rate": 1.331997730719773e-06,
"loss": 3.2851,
"step": 8180
},
{
"epoch": 0.84,
"learning_rate": 1.3237192155853284e-06,
"loss": 3.2476,
"step": 8185
},
{
"epoch": 0.84,
"learning_rate": 1.3154646828973217e-06,
"loss": 3.2732,
"step": 8190
},
{
"epoch": 0.84,
"learning_rate": 1.3072341554724232e-06,
"loss": 3.3164,
"step": 8195
},
{
"epoch": 0.84,
"learning_rate": 1.2990276560609594e-06,
"loss": 3.2812,
"step": 8200
},
{
"epoch": 0.84,
"learning_rate": 1.2908452073468348e-06,
"loss": 3.2912,
"step": 8205
},
{
"epoch": 0.84,
"learning_rate": 1.282686831947474e-06,
"loss": 3.2888,
"step": 8210
},
{
"epoch": 0.84,
"learning_rate": 1.2745525524137626e-06,
"loss": 3.2631,
"step": 8215
},
{
"epoch": 0.84,
"learning_rate": 1.2664423912299807e-06,
"loss": 3.307,
"step": 8220
},
{
"epoch": 0.84,
"learning_rate": 1.2583563708137393e-06,
"loss": 3.2872,
"step": 8225
},
{
"epoch": 0.84,
"learning_rate": 1.2502945135159272e-06,
"loss": 3.2584,
"step": 8230
},
{
"epoch": 0.85,
"learning_rate": 1.2422568416206337e-06,
"loss": 3.2873,
"step": 8235
},
{
"epoch": 0.85,
"learning_rate": 1.2342433773451036e-06,
"loss": 3.2726,
"step": 8240
},
{
"epoch": 0.85,
"learning_rate": 1.2262541428396668e-06,
"loss": 3.2613,
"step": 8245
},
{
"epoch": 0.85,
"learning_rate": 1.2182891601876778e-06,
"loss": 3.2725,
"step": 8250
},
{
"epoch": 0.85,
"learning_rate": 1.2103484514054564e-06,
"loss": 3.2914,
"step": 8255
},
{
"epoch": 0.85,
"learning_rate": 1.202432038442226e-06,
"loss": 3.3065,
"step": 8260
},
{
"epoch": 0.85,
"learning_rate": 1.194539943180052e-06,
"loss": 3.315,
"step": 8265
},
{
"epoch": 0.85,
"learning_rate": 1.1866721874337827e-06,
"loss": 3.2763,
"step": 8270
},
{
"epoch": 0.85,
"learning_rate": 1.1788287929509924e-06,
"loss": 3.2773,
"step": 8275
},
{
"epoch": 0.85,
"learning_rate": 1.1710097814119093e-06,
"loss": 3.2957,
"step": 8280
},
{
"epoch": 0.85,
"learning_rate": 1.1632151744293707e-06,
"loss": 3.273,
"step": 8285
},
{
"epoch": 0.85,
"learning_rate": 1.1554449935487533e-06,
"loss": 3.2953,
"step": 8290
},
{
"epoch": 0.85,
"learning_rate": 1.1476992602479175e-06,
"loss": 3.2936,
"step": 8295
},
{
"epoch": 0.85,
"learning_rate": 1.139977995937147e-06,
"loss": 3.2597,
"step": 8300
},
{
"epoch": 0.85,
"learning_rate": 1.1322812219590917e-06,
"loss": 3.2854,
"step": 8305
},
{
"epoch": 0.85,
"learning_rate": 1.1246089595887023e-06,
"loss": 3.2469,
"step": 8310
},
{
"epoch": 0.85,
"learning_rate": 1.1169612300331834e-06,
"loss": 3.2948,
"step": 8315
},
{
"epoch": 0.85,
"learning_rate": 1.1093380544319166e-06,
"loss": 3.2602,
"step": 8320
},
{
"epoch": 0.85,
"learning_rate": 1.101739453856423e-06,
"loss": 3.2798,
"step": 8325
},
{
"epoch": 0.86,
"learning_rate": 1.0941654493102893e-06,
"loss": 3.2818,
"step": 8330
},
{
"epoch": 0.86,
"learning_rate": 1.0866160617291188e-06,
"loss": 3.2841,
"step": 8335
},
{
"epoch": 0.86,
"learning_rate": 1.0790913119804691e-06,
"loss": 3.2983,
"step": 8340
},
{
"epoch": 0.86,
"learning_rate": 1.0715912208637925e-06,
"loss": 3.2592,
"step": 8345
},
{
"epoch": 0.86,
"learning_rate": 1.0641158091103832e-06,
"loss": 3.3059,
"step": 8350
},
{
"epoch": 0.86,
"learning_rate": 1.0566650973833204e-06,
"loss": 3.2818,
"step": 8355
},
{
"epoch": 0.86,
"learning_rate": 1.0492391062774076e-06,
"loss": 3.248,
"step": 8360
},
{
"epoch": 0.86,
"learning_rate": 1.0418378563191157e-06,
"loss": 3.2681,
"step": 8365
},
{
"epoch": 0.86,
"learning_rate": 1.0344613679665306e-06,
"loss": 3.2611,
"step": 8370
},
{
"epoch": 0.86,
"learning_rate": 1.027109661609288e-06,
"loss": 3.2629,
"step": 8375
},
{
"epoch": 0.86,
"learning_rate": 1.019782757568528e-06,
"loss": 3.2744,
"step": 8380
},
{
"epoch": 0.86,
"learning_rate": 1.0124806760968341e-06,
"loss": 3.3047,
"step": 8385
},
{
"epoch": 0.86,
"learning_rate": 1.0052034373781716e-06,
"loss": 3.277,
"step": 8390
},
{
"epoch": 0.86,
"learning_rate": 9.979510615278432e-07,
"loss": 3.292,
"step": 8395
},
{
"epoch": 0.86,
"learning_rate": 9.90723568592422e-07,
"loss": 3.2525,
"step": 8400
},
{
"epoch": 0.86,
"learning_rate": 9.835209785497045e-07,
"loss": 3.2571,
"step": 8405
},
{
"epoch": 0.86,
"learning_rate": 9.763433113086528e-07,
"loss": 3.2751,
"step": 8410
},
{
"epoch": 0.86,
"learning_rate": 9.69190586709332e-07,
"loss": 3.2489,
"step": 8415
},
{
"epoch": 0.86,
"learning_rate": 9.620628245228714e-07,
"loss": 3.2893,
"step": 8420
},
{
"epoch": 0.86,
"learning_rate": 9.549600444513952e-07,
"loss": 3.3105,
"step": 8425
},
{
"epoch": 0.87,
"learning_rate": 9.478822661279763e-07,
"loss": 3.2897,
"step": 8430
},
{
"epoch": 0.87,
"learning_rate": 9.408295091165765e-07,
"loss": 3.2818,
"step": 8435
},
{
"epoch": 0.87,
"learning_rate": 9.33801792911998e-07,
"loss": 3.2626,
"step": 8440
},
{
"epoch": 0.87,
"learning_rate": 9.267991369398255e-07,
"loss": 3.3027,
"step": 8445
},
{
"epoch": 0.87,
"learning_rate": 9.198215605563732e-07,
"loss": 3.3029,
"step": 8450
},
{
"epoch": 0.87,
"learning_rate": 9.128690830486341e-07,
"loss": 3.2613,
"step": 8455
},
{
"epoch": 0.87,
"learning_rate": 9.059417236342194e-07,
"loss": 3.2515,
"step": 8460
},
{
"epoch": 0.87,
"learning_rate": 8.990395014613163e-07,
"loss": 3.2854,
"step": 8465
},
{
"epoch": 0.87,
"learning_rate": 8.921624356086256e-07,
"loss": 3.2896,
"step": 8470
},
{
"epoch": 0.87,
"learning_rate": 8.853105450853128e-07,
"loss": 3.2407,
"step": 8475
},
{
"epoch": 0.87,
"learning_rate": 8.784838488309577e-07,
"loss": 3.2668,
"step": 8480
},
{
"epoch": 0.87,
"learning_rate": 8.716823657154971e-07,
"loss": 3.2732,
"step": 8485
},
{
"epoch": 0.87,
"learning_rate": 8.649061145391758e-07,
"loss": 3.253,
"step": 8490
},
{
"epoch": 0.87,
"learning_rate": 8.581551140324962e-07,
"loss": 3.2737,
"step": 8495
},
{
"epoch": 0.87,
"learning_rate": 8.514293828561593e-07,
"loss": 3.2807,
"step": 8500
},
{
"epoch": 0.87,
"learning_rate": 8.447289396010228e-07,
"loss": 3.2653,
"step": 8505
},
{
"epoch": 0.87,
"learning_rate": 8.380538027880425e-07,
"loss": 3.2816,
"step": 8510
},
{
"epoch": 0.87,
"learning_rate": 8.314039908682247e-07,
"loss": 3.2654,
"step": 8515
},
{
"epoch": 0.87,
"learning_rate": 8.247795222225763e-07,
"loss": 3.326,
"step": 8520
},
{
"epoch": 0.88,
"learning_rate": 8.181804151620465e-07,
"loss": 3.2938,
"step": 8525
},
{
"epoch": 0.88,
"learning_rate": 8.116066879274875e-07,
"loss": 3.2424,
"step": 8530
},
{
"epoch": 0.88,
"learning_rate": 8.050583586895944e-07,
"loss": 3.2997,
"step": 8535
},
{
"epoch": 0.88,
"learning_rate": 7.985354455488615e-07,
"loss": 3.3034,
"step": 8540
},
{
"epoch": 0.88,
"learning_rate": 7.920379665355237e-07,
"loss": 3.283,
"step": 8545
},
{
"epoch": 0.88,
"learning_rate": 7.855659396095183e-07,
"loss": 3.2677,
"step": 8550
},
{
"epoch": 0.88,
"learning_rate": 7.791193826604282e-07,
"loss": 3.2842,
"step": 8555
},
{
"epoch": 0.88,
"learning_rate": 7.726983135074328e-07,
"loss": 3.3262,
"step": 8560
},
{
"epoch": 0.88,
"learning_rate": 7.663027498992592e-07,
"loss": 3.2995,
"step": 8565
},
{
"epoch": 0.88,
"learning_rate": 7.599327095141363e-07,
"loss": 3.2576,
"step": 8570
},
{
"epoch": 0.88,
"learning_rate": 7.535882099597391e-07,
"loss": 3.2752,
"step": 8575
},
{
"epoch": 0.88,
"learning_rate": 7.472692687731498e-07,
"loss": 3.2643,
"step": 8580
},
{
"epoch": 0.88,
"learning_rate": 7.409759034207975e-07,
"loss": 3.2734,
"step": 8585
},
{
"epoch": 0.88,
"learning_rate": 7.347081312984194e-07,
"loss": 3.2759,
"step": 8590
},
{
"epoch": 0.88,
"learning_rate": 7.284659697310104e-07,
"loss": 3.2535,
"step": 8595
},
{
"epoch": 0.88,
"learning_rate": 7.222494359727716e-07,
"loss": 3.2791,
"step": 8600
},
{
"epoch": 0.88,
"learning_rate": 7.160585472070669e-07,
"loss": 3.2831,
"step": 8605
},
{
"epoch": 0.88,
"learning_rate": 7.098933205463742e-07,
"loss": 3.2818,
"step": 8610
},
{
"epoch": 0.88,
"learning_rate": 7.037537730322363e-07,
"loss": 3.2748,
"step": 8615
},
{
"epoch": 0.88,
"learning_rate": 6.97639921635217e-07,
"loss": 3.2676,
"step": 8620
},
{
"epoch": 0.89,
"learning_rate": 6.915517832548524e-07,
"loss": 3.3018,
"step": 8625
},
{
"epoch": 0.89,
"learning_rate": 6.854893747196034e-07,
"loss": 3.2746,
"step": 8630
},
{
"epoch": 0.89,
"learning_rate": 6.794527127868078e-07,
"loss": 3.3115,
"step": 8635
},
{
"epoch": 0.89,
"learning_rate": 6.734418141426391e-07,
"loss": 3.2836,
"step": 8640
},
{
"epoch": 0.89,
"learning_rate": 6.674566954020589e-07,
"loss": 3.2557,
"step": 8645
},
{
"epoch": 0.89,
"learning_rate": 6.61497373108766e-07,
"loss": 3.2776,
"step": 8650
},
{
"epoch": 0.89,
"learning_rate": 6.55563863735157e-07,
"loss": 3.2634,
"step": 8655
},
{
"epoch": 0.89,
"learning_rate": 6.496561836822745e-07,
"loss": 3.2757,
"step": 8660
},
{
"epoch": 0.89,
"learning_rate": 6.43774349279771e-07,
"loss": 3.2799,
"step": 8665
},
{
"epoch": 0.89,
"learning_rate": 6.37918376785851e-07,
"loss": 3.2686,
"step": 8670
},
{
"epoch": 0.89,
"learning_rate": 6.320882823872409e-07,
"loss": 3.2869,
"step": 8675
},
{
"epoch": 0.89,
"learning_rate": 6.262840821991278e-07,
"loss": 3.2576,
"step": 8680
},
{
"epoch": 0.89,
"learning_rate": 6.205057922651303e-07,
"loss": 3.3013,
"step": 8685
},
{
"epoch": 0.89,
"learning_rate": 6.147534285572443e-07,
"loss": 3.2858,
"step": 8690
},
{
"epoch": 0.89,
"learning_rate": 6.090270069758042e-07,
"loss": 3.3093,
"step": 8695
},
{
"epoch": 0.89,
"learning_rate": 6.03326543349434e-07,
"loss": 3.2905,
"step": 8700
},
{
"epoch": 0.89,
"learning_rate": 5.976520534350094e-07,
"loss": 3.298,
"step": 8705
},
{
"epoch": 0.89,
"learning_rate": 5.920035529176082e-07,
"loss": 3.2839,
"step": 8710
},
{
"epoch": 0.89,
"learning_rate": 5.863810574104723e-07,
"loss": 3.2714,
"step": 8715
},
{
"epoch": 0.9,
"learning_rate": 5.807845824549596e-07,
"loss": 3.2802,
"step": 8720
},
{
"epoch": 0.9,
"learning_rate": 5.75214143520505e-07,
"loss": 3.2793,
"step": 8725
},
{
"epoch": 0.9,
"learning_rate": 5.696697560045772e-07,
"loss": 3.2689,
"step": 8730
},
{
"epoch": 0.9,
"learning_rate": 5.641514352326338e-07,
"loss": 3.2587,
"step": 8735
},
{
"epoch": 0.9,
"learning_rate": 5.586591964580812e-07,
"loss": 3.274,
"step": 8740
},
{
"epoch": 0.9,
"learning_rate": 5.531930548622311e-07,
"loss": 3.284,
"step": 8745
},
{
"epoch": 0.9,
"learning_rate": 5.477530255542573e-07,
"loss": 3.2707,
"step": 8750
},
{
"epoch": 0.9,
"learning_rate": 5.423391235711584e-07,
"loss": 3.2782,
"step": 8755
},
{
"epoch": 0.9,
"learning_rate": 5.369513638777147e-07,
"loss": 3.2873,
"step": 8760
},
{
"epoch": 0.9,
"learning_rate": 5.315897613664378e-07,
"loss": 3.2874,
"step": 8765
},
{
"epoch": 0.9,
"learning_rate": 5.262543308575451e-07,
"loss": 3.2797,
"step": 8770
},
{
"epoch": 0.9,
"learning_rate": 5.209450870989086e-07,
"loss": 3.3091,
"step": 8775
},
{
"epoch": 0.9,
"learning_rate": 5.156620447660165e-07,
"loss": 3.2891,
"step": 8780
},
{
"epoch": 0.9,
"learning_rate": 5.104052184619324e-07,
"loss": 3.295,
"step": 8785
},
{
"epoch": 0.9,
"learning_rate": 5.051746227172538e-07,
"loss": 3.2895,
"step": 8790
},
{
"epoch": 0.9,
"learning_rate": 4.999702719900767e-07,
"loss": 3.2923,
"step": 8795
},
{
"epoch": 0.9,
"learning_rate": 4.947921806659495e-07,
"loss": 3.3033,
"step": 8800
},
{
"epoch": 0.9,
"learning_rate": 4.89640363057834e-07,
"loss": 3.2588,
"step": 8805
},
{
"epoch": 0.9,
"learning_rate": 4.845148334060734e-07,
"loss": 3.2688,
"step": 8810
},
{
"epoch": 0.9,
"learning_rate": 4.794156058783428e-07,
"loss": 3.3199,
"step": 8815
},
{
"epoch": 0.91,
"learning_rate": 4.7434269456961725e-07,
"loss": 3.2976,
"step": 8820
},
{
"epoch": 0.91,
"learning_rate": 4.692961135021268e-07,
"loss": 3.2956,
"step": 8825
},
{
"epoch": 0.91,
"learning_rate": 4.6427587662532636e-07,
"loss": 3.2688,
"step": 8830
},
{
"epoch": 0.91,
"learning_rate": 4.5928199781584584e-07,
"loss": 3.3109,
"step": 8835
},
{
"epoch": 0.91,
"learning_rate": 4.5431449087746216e-07,
"loss": 3.2951,
"step": 8840
},
{
"epoch": 0.91,
"learning_rate": 4.4937336954105516e-07,
"loss": 3.243,
"step": 8845
},
{
"epoch": 0.91,
"learning_rate": 4.444586474645707e-07,
"loss": 3.2626,
"step": 8850
},
{
"epoch": 0.91,
"learning_rate": 4.395703382329852e-07,
"loss": 3.2786,
"step": 8855
},
{
"epoch": 0.91,
"learning_rate": 4.3470845535826255e-07,
"loss": 3.2755,
"step": 8860
},
{
"epoch": 0.91,
"learning_rate": 4.2987301227932377e-07,
"loss": 3.2818,
"step": 8865
},
{
"epoch": 0.91,
"learning_rate": 4.2506402236200616e-07,
"loss": 3.2512,
"step": 8870
},
{
"epoch": 0.91,
"learning_rate": 4.202814988990278e-07,
"loss": 3.2715,
"step": 8875
},
{
"epoch": 0.91,
"learning_rate": 4.1552545510994746e-07,
"loss": 3.2619,
"step": 8880
},
{
"epoch": 0.91,
"learning_rate": 4.107959041411314e-07,
"loss": 3.2924,
"step": 8885
},
{
"epoch": 0.91,
"learning_rate": 4.0609285906571536e-07,
"loss": 3.2535,
"step": 8890
},
{
"epoch": 0.91,
"learning_rate": 4.01416332883573e-07,
"loss": 3.2827,
"step": 8895
},
{
"epoch": 0.91,
"learning_rate": 3.9676633852126834e-07,
"loss": 3.2735,
"step": 8900
},
{
"epoch": 0.91,
"learning_rate": 3.9214288883203444e-07,
"loss": 3.2674,
"step": 8905
},
{
"epoch": 0.91,
"learning_rate": 3.875459965957307e-07,
"loss": 3.2839,
"step": 8910
},
{
"epoch": 0.92,
"learning_rate": 3.829756745188029e-07,
"loss": 3.2896,
"step": 8915
},
{
"epoch": 0.92,
"learning_rate": 3.7843193523426026e-07,
"loss": 3.3003,
"step": 8920
},
{
"epoch": 0.92,
"learning_rate": 3.7391479130162833e-07,
"loss": 3.2831,
"step": 8925
},
{
"epoch": 0.92,
"learning_rate": 3.6942425520692047e-07,
"loss": 3.2677,
"step": 8930
},
{
"epoch": 0.92,
"learning_rate": 3.6496033936260334e-07,
"loss": 3.2695,
"step": 8935
},
{
"epoch": 0.92,
"learning_rate": 3.6052305610756235e-07,
"loss": 3.2717,
"step": 8940
},
{
"epoch": 0.92,
"learning_rate": 3.5611241770706293e-07,
"loss": 3.2862,
"step": 8945
},
{
"epoch": 0.92,
"learning_rate": 3.5172843635272403e-07,
"loss": 3.2815,
"step": 8950
},
{
"epoch": 0.92,
"learning_rate": 3.473711241624789e-07,
"loss": 3.3103,
"step": 8955
},
{
"epoch": 0.92,
"learning_rate": 3.430404931805464e-07,
"loss": 3.2718,
"step": 8960
},
{
"epoch": 0.92,
"learning_rate": 3.3873655537739026e-07,
"loss": 3.263,
"step": 8965
},
{
"epoch": 0.92,
"learning_rate": 3.3445932264969504e-07,
"loss": 3.2483,
"step": 8970
},
{
"epoch": 0.92,
"learning_rate": 3.302088068203246e-07,
"loss": 3.2871,
"step": 8975
},
{
"epoch": 0.92,
"learning_rate": 3.259850196382985e-07,
"loss": 3.294,
"step": 8980
},
{
"epoch": 0.92,
"learning_rate": 3.217879727787487e-07,
"loss": 3.2767,
"step": 8985
},
{
"epoch": 0.92,
"learning_rate": 3.176176778428974e-07,
"loss": 3.2855,
"step": 8990
},
{
"epoch": 0.92,
"learning_rate": 3.134741463580204e-07,
"loss": 3.2995,
"step": 8995
},
{
"epoch": 0.92,
"learning_rate": 3.093573897774149e-07,
"loss": 3.2776,
"step": 9000
},
{
"epoch": 0.92,
"learning_rate": 3.052674194803662e-07,
"loss": 3.2495,
"step": 9005
},
{
"epoch": 0.92,
"learning_rate": 3.0120424677212434e-07,
"loss": 3.2901,
"step": 9010
},
{
"epoch": 0.93,
"learning_rate": 2.971678828838609e-07,
"loss": 3.2924,
"step": 9015
},
{
"epoch": 0.93,
"learning_rate": 2.931583389726478e-07,
"loss": 3.2595,
"step": 9020
},
{
"epoch": 0.93,
"learning_rate": 2.8917562612142293e-07,
"loss": 3.2705,
"step": 9025
},
{
"epoch": 0.93,
"learning_rate": 2.852197553389568e-07,
"loss": 3.2435,
"step": 9030
},
{
"epoch": 0.93,
"learning_rate": 2.812907375598273e-07,
"loss": 3.3048,
"step": 9035
},
{
"epoch": 0.93,
"learning_rate": 2.7738858364438457e-07,
"loss": 3.2694,
"step": 9040
},
{
"epoch": 0.93,
"learning_rate": 2.7351330437872525e-07,
"loss": 3.2592,
"step": 9045
},
{
"epoch": 0.93,
"learning_rate": 2.696649104746607e-07,
"loss": 3.2565,
"step": 9050
},
{
"epoch": 0.93,
"learning_rate": 2.6584341256968624e-07,
"loss": 3.275,
"step": 9055
},
{
"epoch": 0.93,
"learning_rate": 2.6204882122695343e-07,
"loss": 3.268,
"step": 9060
},
{
"epoch": 0.93,
"learning_rate": 2.58281146935242e-07,
"loss": 3.2635,
"step": 9065
},
{
"epoch": 0.93,
"learning_rate": 2.5454040010892354e-07,
"loss": 3.2905,
"step": 9070
},
{
"epoch": 0.93,
"learning_rate": 2.5082659108794595e-07,
"loss": 3.3049,
"step": 9075
},
{
"epoch": 0.93,
"learning_rate": 2.4713973013779204e-07,
"loss": 3.2745,
"step": 9080
},
{
"epoch": 0.93,
"learning_rate": 2.434798274494587e-07,
"loss": 3.2704,
"step": 9085
},
{
"epoch": 0.93,
"learning_rate": 2.398468931394249e-07,
"loss": 3.2951,
"step": 9090
},
{
"epoch": 0.93,
"learning_rate": 2.3624093724962883e-07,
"loss": 3.2698,
"step": 9095
},
{
"epoch": 0.93,
"learning_rate": 2.3266196974743084e-07,
"loss": 3.2305,
"step": 9100
},
{
"epoch": 0.93,
"learning_rate": 2.291100005255964e-07,
"loss": 3.2776,
"step": 9105
},
{
"epoch": 0.94,
"learning_rate": 2.2558503940226296e-07,
"loss": 3.2877,
"step": 9110
},
{
"epoch": 0.94,
"learning_rate": 2.2208709612091096e-07,
"loss": 3.2611,
"step": 9115
},
{
"epoch": 0.94,
"learning_rate": 2.1861618035034394e-07,
"loss": 3.2604,
"step": 9120
},
{
"epoch": 0.94,
"learning_rate": 2.1517230168465408e-07,
"loss": 3.2839,
"step": 9125
},
{
"epoch": 0.94,
"learning_rate": 2.1175546964320226e-07,
"loss": 3.3282,
"step": 9130
},
{
"epoch": 0.94,
"learning_rate": 2.083656936705858e-07,
"loss": 3.2492,
"step": 9135
},
{
"epoch": 0.94,
"learning_rate": 2.050029831366185e-07,
"loss": 3.2816,
"step": 9140
},
{
"epoch": 0.94,
"learning_rate": 2.0166734733629843e-07,
"loss": 3.2894,
"step": 9145
},
{
"epoch": 0.94,
"learning_rate": 1.983587954897881e-07,
"loss": 3.2791,
"step": 9150
},
{
"epoch": 0.94,
"learning_rate": 1.9507733674238305e-07,
"loss": 3.265,
"step": 9155
},
{
"epoch": 0.94,
"learning_rate": 1.918229801644944e-07,
"loss": 3.2694,
"step": 9160
},
{
"epoch": 0.94,
"learning_rate": 1.885957347516132e-07,
"loss": 3.2654,
"step": 9165
},
{
"epoch": 0.94,
"learning_rate": 1.8539560942429592e-07,
"loss": 3.301,
"step": 9170
},
{
"epoch": 0.94,
"learning_rate": 1.822226130281335e-07,
"loss": 3.2797,
"step": 9175
},
{
"epoch": 0.94,
"learning_rate": 1.7907675433372907e-07,
"loss": 3.2806,
"step": 9180
},
{
"epoch": 0.94,
"learning_rate": 1.7595804203667355e-07,
"loss": 3.3021,
"step": 9185
},
{
"epoch": 0.94,
"learning_rate": 1.7286648475752122e-07,
"loss": 3.2882,
"step": 9190
},
{
"epoch": 0.94,
"learning_rate": 1.6980209104176747e-07,
"loss": 3.2911,
"step": 9195
},
{
"epoch": 0.94,
"learning_rate": 1.6676486935982116e-07,
"loss": 3.2956,
"step": 9200
},
{
"epoch": 0.94,
"learning_rate": 1.6375482810698673e-07,
"loss": 3.2737,
"step": 9205
},
{
"epoch": 0.95,
"learning_rate": 1.6077197560343537e-07,
"loss": 3.2746,
"step": 9210
},
{
"epoch": 0.95,
"learning_rate": 1.5781632009418513e-07,
"loss": 3.2838,
"step": 9215
},
{
"epoch": 0.95,
"learning_rate": 1.5488786974908188e-07,
"loss": 3.3016,
"step": 9220
},
{
"epoch": 0.95,
"learning_rate": 1.5198663266276724e-07,
"loss": 3.286,
"step": 9225
},
{
"epoch": 0.95,
"learning_rate": 1.4911261685466416e-07,
"loss": 3.2825,
"step": 9230
},
{
"epoch": 0.95,
"learning_rate": 1.4626583026895235e-07,
"loss": 3.2414,
"step": 9235
},
{
"epoch": 0.95,
"learning_rate": 1.4344628077454626e-07,
"loss": 3.2936,
"step": 9240
},
{
"epoch": 0.95,
"learning_rate": 1.406539761650727e-07,
"loss": 3.2469,
"step": 9245
},
{
"epoch": 0.95,
"learning_rate": 1.3788892415884881e-07,
"loss": 3.2852,
"step": 9250
},
{
"epoch": 0.95,
"learning_rate": 1.3515113239886302e-07,
"loss": 3.283,
"step": 9255
},
{
"epoch": 0.95,
"learning_rate": 1.3244060845275298e-07,
"loss": 3.259,
"step": 9260
},
{
"epoch": 0.95,
"learning_rate": 1.2975735981278327e-07,
"loss": 3.2594,
"step": 9265
},
{
"epoch": 0.95,
"learning_rate": 1.2710139389582654e-07,
"loss": 3.25,
"step": 9270
},
{
"epoch": 0.95,
"learning_rate": 1.2447271804334137e-07,
"loss": 3.2664,
"step": 9275
},
{
"epoch": 0.95,
"learning_rate": 1.2187133952135445e-07,
"loss": 3.2906,
"step": 9280
},
{
"epoch": 0.95,
"learning_rate": 1.1929726552043607e-07,
"loss": 3.2723,
"step": 9285
},
{
"epoch": 0.95,
"learning_rate": 1.1675050315568703e-07,
"loss": 3.2456,
"step": 9290
},
{
"epoch": 0.95,
"learning_rate": 1.1423105946671064e-07,
"loss": 3.2726,
"step": 9295
},
{
"epoch": 0.95,
"learning_rate": 1.1173894141759955e-07,
"loss": 3.2828,
"step": 9300
},
{
"epoch": 0.96,
"learning_rate": 1.092741558969146e-07,
"loss": 3.2799,
"step": 9305
},
{
"epoch": 0.96,
"learning_rate": 1.068367097176659e-07,
"loss": 3.2998,
"step": 9310
},
{
"epoch": 0.96,
"learning_rate": 1.0442660961729523e-07,
"loss": 3.296,
"step": 9315
},
{
"epoch": 0.96,
"learning_rate": 1.020438622576514e-07,
"loss": 3.2948,
"step": 9320
},
{
"epoch": 0.96,
"learning_rate": 9.968847422498152e-08,
"loss": 3.2566,
"step": 9325
},
{
"epoch": 0.96,
"learning_rate": 9.736045202990651e-08,
"loss": 3.2575,
"step": 9330
},
{
"epoch": 0.96,
"learning_rate": 9.505980210740007e-08,
"loss": 3.273,
"step": 9335
},
{
"epoch": 0.96,
"learning_rate": 9.278653081678079e-08,
"loss": 3.2977,
"step": 9340
},
{
"epoch": 0.96,
"learning_rate": 9.054064444168564e-08,
"loss": 3.2649,
"step": 9345
},
{
"epoch": 0.96,
"learning_rate": 8.832214919005877e-08,
"loss": 3.2967,
"step": 9350
},
{
"epoch": 0.96,
"learning_rate": 8.613105119412712e-08,
"loss": 3.3035,
"step": 9355
},
{
"epoch": 0.96,
"learning_rate": 8.396735651039046e-08,
"loss": 3.2843,
"step": 9360
},
{
"epoch": 0.96,
"learning_rate": 8.183107111960353e-08,
"loss": 3.2812,
"step": 9365
},
{
"epoch": 0.96,
"learning_rate": 7.97222009267551e-08,
"loss": 3.2732,
"step": 9370
},
{
"epoch": 0.96,
"learning_rate": 7.764075176105445e-08,
"loss": 3.2867,
"step": 9375
},
{
"epoch": 0.96,
"learning_rate": 7.558672937591937e-08,
"loss": 3.2719,
"step": 9380
},
{
"epoch": 0.96,
"learning_rate": 7.356013944895046e-08,
"loss": 3.279,
"step": 9385
},
{
"epoch": 0.96,
"learning_rate": 7.156098758192453e-08,
"loss": 3.2891,
"step": 9390
},
{
"epoch": 0.96,
"learning_rate": 6.958927930077685e-08,
"loss": 3.2454,
"step": 9395
},
{
"epoch": 0.96,
"learning_rate": 6.764502005558115e-08,
"loss": 3.2557,
"step": 9400
},
{
"epoch": 0.97,
"learning_rate": 6.572821522054295e-08,
"loss": 3.2519,
"step": 9405
},
{
"epoch": 0.97,
"learning_rate": 6.383887009397515e-08,
"loss": 3.2945,
"step": 9410
},
{
"epoch": 0.97,
"learning_rate": 6.197698989829026e-08,
"loss": 3.2533,
"step": 9415
},
{
"epoch": 0.97,
"learning_rate": 6.014257977998594e-08,
"loss": 3.2706,
"step": 9420
},
{
"epoch": 0.97,
"learning_rate": 5.833564480962617e-08,
"loss": 3.2836,
"step": 9425
},
{
"epoch": 0.97,
"learning_rate": 5.655618998182899e-08,
"loss": 3.2431,
"step": 9430
},
{
"epoch": 0.97,
"learning_rate": 5.480422021525655e-08,
"loss": 3.2896,
"step": 9435
},
{
"epoch": 0.97,
"learning_rate": 5.307974035259511e-08,
"loss": 3.2848,
"step": 9440
},
{
"epoch": 0.97,
"learning_rate": 5.1382755160549466e-08,
"loss": 3.2868,
"step": 9445
},
{
"epoch": 0.97,
"learning_rate": 4.971326932981968e-08,
"loss": 3.2602,
"step": 9450
},
{
"epoch": 0.97,
"learning_rate": 4.807128747509882e-08,
"loss": 3.2547,
"step": 9455
},
{
"epoch": 0.97,
"learning_rate": 4.645681413505299e-08,
"loss": 3.289,
"step": 9460
},
{
"epoch": 0.97,
"learning_rate": 4.4869853772310235e-08,
"loss": 3.2911,
"step": 9465
},
{
"epoch": 0.97,
"learning_rate": 4.331041077344944e-08,
"loss": 3.2862,
"step": 9470
},
{
"epoch": 0.97,
"learning_rate": 4.177848944898699e-08,
"loss": 3.2793,
"step": 9475
},
{
"epoch": 0.97,
"learning_rate": 4.027409403336901e-08,
"loss": 3.2741,
"step": 9480
},
{
"epoch": 0.97,
"learning_rate": 3.8797228684952504e-08,
"loss": 3.2785,
"step": 9485
},
{
"epoch": 0.97,
"learning_rate": 3.734789748599754e-08,
"loss": 3.2867,
"step": 9490
},
{
"epoch": 0.97,
"learning_rate": 3.5926104442658426e-08,
"loss": 3.2456,
"step": 9495
},
{
"epoch": 0.98,
"learning_rate": 3.453185348496702e-08,
"loss": 3.2583,
"step": 9500
},
{
"epoch": 0.98,
"learning_rate": 3.316514846682939e-08,
"loss": 3.2788,
"step": 9505
},
{
"epoch": 0.98,
"learning_rate": 3.182599316600699e-08,
"loss": 3.2592,
"step": 9510
},
{
"epoch": 0.98,
"learning_rate": 3.0514391284111043e-08,
"loss": 3.2706,
"step": 9515
},
{
"epoch": 0.98,
"learning_rate": 2.9230346446591506e-08,
"loss": 3.259,
"step": 9520
},
{
"epoch": 0.98,
"learning_rate": 2.797386220272702e-08,
"loss": 3.2798,
"step": 9525
},
{
"epoch": 0.98,
"learning_rate": 2.674494202561384e-08,
"loss": 3.259,
"step": 9530
},
{
"epoch": 0.98,
"learning_rate": 2.5543589312160276e-08,
"loss": 3.2784,
"step": 9535
},
{
"epoch": 0.98,
"learning_rate": 2.4369807383071154e-08,
"loss": 3.2779,
"step": 9540
},
{
"epoch": 0.98,
"learning_rate": 2.3223599482842252e-08,
"loss": 3.2632,
"step": 9545
},
{
"epoch": 0.98,
"learning_rate": 2.2104968779752546e-08,
"loss": 3.2426,
"step": 9550
},
{
"epoch": 0.98,
"learning_rate": 2.1013918365851982e-08,
"loss": 3.263,
"step": 9555
},
{
"epoch": 0.98,
"learning_rate": 1.9950451256957048e-08,
"loss": 3.2811,
"step": 9560
},
{
"epoch": 0.98,
"learning_rate": 1.8914570392636332e-08,
"loss": 3.2586,
"step": 9565
},
{
"epoch": 0.98,
"learning_rate": 1.7906278636210527e-08,
"loss": 3.2595,
"step": 9570
},
{
"epoch": 0.98,
"learning_rate": 1.6925578774737994e-08,
"loss": 3.2839,
"step": 9575
},
{
"epoch": 0.98,
"learning_rate": 1.5972473519009212e-08,
"loss": 3.2604,
"step": 9580
},
{
"epoch": 0.98,
"learning_rate": 1.5046965503540124e-08,
"loss": 3.2561,
"step": 9585
},
{
"epoch": 0.98,
"learning_rate": 1.4149057286562128e-08,
"loss": 3.2574,
"step": 9590
},
{
"epoch": 0.98,
"learning_rate": 1.327875135001988e-08,
"loss": 3.2554,
"step": 9595
},
{
"epoch": 0.99,
"learning_rate": 1.243605009955906e-08,
"loss": 3.2622,
"step": 9600
},
{
"epoch": 0.99,
"learning_rate": 1.1620955864523053e-08,
"loss": 3.2632,
"step": 9605
},
{
"epoch": 0.99,
"learning_rate": 1.0833470897947396e-08,
"loss": 3.2642,
"step": 9610
},
{
"epoch": 0.99,
"learning_rate": 1.007359737654756e-08,
"loss": 3.284,
"step": 9615
},
{
"epoch": 0.99,
"learning_rate": 9.341337400721185e-09,
"loss": 3.2716,
"step": 9620
},
{
"epoch": 0.99,
"learning_rate": 8.636692994535845e-09,
"loss": 3.3286,
"step": 9625
},
{
"epoch": 0.99,
"learning_rate": 7.959666105727959e-09,
"loss": 3.2922,
"step": 9630
},
{
"epoch": 0.99,
"learning_rate": 7.310258605691678e-09,
"loss": 3.2543,
"step": 9635
},
{
"epoch": 0.99,
"learning_rate": 6.6884722894822174e-09,
"loss": 3.2923,
"step": 9640
},
{
"epoch": 0.99,
"learning_rate": 6.094308875801425e-09,
"loss": 3.2795,
"step": 9645
},
{
"epoch": 0.99,
"learning_rate": 5.52777000700111e-09,
"loss": 3.2784,
"step": 9650
},
{
"epoch": 0.99,
"learning_rate": 4.988857249071943e-09,
"loss": 3.242,
"step": 9655
},
{
"epoch": 0.99,
"learning_rate": 4.4775720916445665e-09,
"loss": 3.2848,
"step": 9660
},
{
"epoch": 0.99,
"learning_rate": 3.993915947982929e-09,
"loss": 3.2318,
"step": 9665
},
{
"epoch": 0.99,
"learning_rate": 3.53789015497763e-09,
"loss": 3.2863,
"step": 9670
},
{
"epoch": 0.99,
"learning_rate": 3.109495973150356e-09,
"loss": 3.2526,
"step": 9675
},
{
"epoch": 0.99,
"learning_rate": 2.7087345866394497e-09,
"loss": 3.2524,
"step": 9680
},
{
"epoch": 0.99,
"learning_rate": 2.335607103207682e-09,
"loss": 3.2777,
"step": 9685
},
{
"epoch": 0.99,
"learning_rate": 1.990114554228928e-09,
"loss": 3.278,
"step": 9690
},
{
"epoch": 1.0,
"learning_rate": 1.6722578946937184e-09,
"loss": 3.2891,
"step": 9695
},
{
"epoch": 1.0,
"learning_rate": 1.3820380032025794e-09,
"loss": 3.2557,
"step": 9700
},
{
"epoch": 1.0,
"learning_rate": 1.1194556819627e-09,
"loss": 3.2817,
"step": 9705
},
{
"epoch": 1.0,
"learning_rate": 8.845116567879342e-10,
"loss": 3.2412,
"step": 9710
},
{
"epoch": 1.0,
"learning_rate": 6.772065770976888e-10,
"loss": 3.2796,
"step": 9715
},
{
"epoch": 1.0,
"learning_rate": 4.975410159102634e-10,
"loss": 3.2618,
"step": 9720
},
{
"epoch": 1.0,
"learning_rate": 3.4551546984729067e-10,
"loss": 3.2715,
"step": 9725
},
{
"epoch": 1.0,
"learning_rate": 2.211303591292957e-10,
"loss": 3.3119,
"step": 9730
},
{
"epoch": 1.0,
"learning_rate": 1.2438602757125495e-10,
"loss": 3.2758,
"step": 9735
},
{
"epoch": 1.0,
"learning_rate": 5.528274259147814e-11,
"loss": 3.2601,
"step": 9740
},
{
"epoch": 1.0,
"step": 9741,
"total_flos": 2.4313337791056445e+18,
"train_loss": 3.456989575382033,
"train_runtime": 78610.9062,
"train_samples_per_second": 15.862,
"train_steps_per_second": 0.124
}
],
"max_steps": 9741,
"num_train_epochs": 1,
"total_flos": 2.4313337791056445e+18,
"trial_name": null,
"trial_params": null
}