{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9802476562891664, "global_step": 39500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.627, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6212, "step": 20 }, { "epoch": 0.0, "learning_rate": 3e-06, "loss": 0.5906, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.542, "step": 40 }, { "epoch": 0.0, "learning_rate": 5e-06, "loss": 0.5064, "step": 50 }, { "epoch": 0.0, "learning_rate": 6e-06, "loss": 0.4598, "step": 60 }, { "epoch": 0.0, "learning_rate": 7.000000000000001e-06, "loss": 0.3761, "step": 70 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 0.3409, "step": 80 }, { "epoch": 0.0, "learning_rate": 9e-06, "loss": 0.3374, "step": 90 }, { "epoch": 0.01, "learning_rate": 1e-05, "loss": 0.2803, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.1000000000000001e-05, "loss": 0.2415, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.2e-05, "loss": 0.2212, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.3000000000000001e-05, "loss": 0.1497, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-05, "loss": 0.1694, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.5e-05, "loss": 0.1512, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.6000000000000003e-05, "loss": 0.1523, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.7000000000000003e-05, "loss": 0.1044, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.8e-05, "loss": 0.1025, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.9e-05, "loss": 0.1075, "step": 190 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.085, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.1e-05, "loss": 0.0883, "step": 210 }, { "epoch": 0.01, "learning_rate": 2.2000000000000003e-05, "loss": 0.0546, "step": 220 }, { "epoch": 0.01, "learning_rate": 2.3000000000000003e-05, "loss": 0.1068, "step": 230 }, { "epoch": 0.01, "learning_rate": 2.4e-05, "loss": 0.0824, "step": 240 }, { "epoch": 0.01, "learning_rate": 2.5e-05, "loss": 0.0863, "step": 250 }, { "epoch": 0.01, "learning_rate": 2.6000000000000002e-05, "loss": 0.1005, "step": 260 }, { "epoch": 0.01, "learning_rate": 2.7000000000000002e-05, "loss": 0.113, "step": 270 }, { "epoch": 0.01, "learning_rate": 2.8000000000000003e-05, "loss": 0.0989, "step": 280 }, { "epoch": 0.01, "learning_rate": 2.9e-05, "loss": 0.0959, "step": 290 }, { "epoch": 0.02, "learning_rate": 3e-05, "loss": 0.0529, "step": 300 }, { "epoch": 0.02, "learning_rate": 3.1e-05, "loss": 0.0689, "step": 310 }, { "epoch": 0.02, "learning_rate": 3.2000000000000005e-05, "loss": 0.0797, "step": 320 }, { "epoch": 0.02, "learning_rate": 3.3e-05, "loss": 0.0628, "step": 330 }, { "epoch": 0.02, "learning_rate": 3.4000000000000007e-05, "loss": 0.0309, "step": 340 }, { "epoch": 0.02, "learning_rate": 3.5e-05, "loss": 0.1104, "step": 350 }, { "epoch": 0.02, "learning_rate": 3.6e-05, "loss": 0.0691, "step": 360 }, { "epoch": 0.02, "learning_rate": 3.7e-05, "loss": 0.0876, "step": 370 }, { "epoch": 0.02, "learning_rate": 3.8e-05, "loss": 0.0534, "step": 380 }, { "epoch": 0.02, "learning_rate": 3.9000000000000006e-05, "loss": 0.0618, "step": 390 }, { "epoch": 0.02, "learning_rate": 4e-05, "loss": 0.1438, "step": 400 }, { "epoch": 0.02, "learning_rate": 4.1e-05, "loss": 0.0504, "step": 410 }, { "epoch": 0.02, "learning_rate": 4.2e-05, "loss": 0.098, "step": 420 }, { "epoch": 0.02, "learning_rate": 4.3e-05, "loss": 0.0448, "step": 430 }, { "epoch": 0.02, "learning_rate": 4.4000000000000006e-05, "loss": 0.0951, "step": 440 }, { "epoch": 0.02, "learning_rate": 4.5e-05, "loss": 0.1086, "step": 450 }, { "epoch": 0.02, "learning_rate": 4.600000000000001e-05, "loss": 0.0434, "step": 460 }, { "epoch": 0.02, "learning_rate": 4.7e-05, "loss": 0.0603, "step": 470 }, { "epoch": 0.02, "learning_rate": 4.8e-05, "loss": 0.0898, "step": 480 }, { "epoch": 0.02, "learning_rate": 4.9e-05, "loss": 0.1291, "step": 490 }, { "epoch": 0.03, "learning_rate": 5e-05, "loss": 0.0306, "step": 500 }, { "epoch": 0.03, "learning_rate": 4.998730771183429e-05, "loss": 0.0977, "step": 510 }, { "epoch": 0.03, "learning_rate": 4.997461542366858e-05, "loss": 0.058, "step": 520 }, { "epoch": 0.03, "learning_rate": 4.996192313550287e-05, "loss": 0.0504, "step": 530 }, { "epoch": 0.03, "learning_rate": 4.9949230847337156e-05, "loss": 0.0767, "step": 540 }, { "epoch": 0.03, "learning_rate": 4.9936538559171446e-05, "loss": 0.0566, "step": 550 }, { "epoch": 0.03, "learning_rate": 4.992384627100574e-05, "loss": 0.0603, "step": 560 }, { "epoch": 0.03, "learning_rate": 4.991115398284003e-05, "loss": 0.0826, "step": 570 }, { "epoch": 0.03, "learning_rate": 4.989846169467432e-05, "loss": 0.047, "step": 580 }, { "epoch": 0.03, "learning_rate": 4.988576940650861e-05, "loss": 0.1339, "step": 590 }, { "epoch": 0.03, "learning_rate": 4.9873077118342896e-05, "loss": 0.0571, "step": 600 }, { "epoch": 0.03, "learning_rate": 4.9860384830177186e-05, "loss": 0.0292, "step": 610 }, { "epoch": 0.03, "learning_rate": 4.9847692542011476e-05, "loss": 0.0647, "step": 620 }, { "epoch": 0.03, "learning_rate": 4.9835000253845765e-05, "loss": 0.076, "step": 630 }, { "epoch": 0.03, "learning_rate": 4.9822307965680055e-05, "loss": 0.0845, "step": 640 }, { "epoch": 0.03, "learning_rate": 4.9809615677514345e-05, "loss": 0.043, "step": 650 }, { "epoch": 0.03, "learning_rate": 4.979692338934863e-05, "loss": 0.0575, "step": 660 }, { "epoch": 0.03, "learning_rate": 4.978423110118292e-05, "loss": 0.0749, "step": 670 }, { "epoch": 0.03, "learning_rate": 4.9771538813017215e-05, "loss": 0.0663, "step": 680 }, { "epoch": 0.03, "learning_rate": 4.9758846524851505e-05, "loss": 0.0853, "step": 690 }, { "epoch": 0.04, "learning_rate": 4.9746154236685795e-05, "loss": 0.0742, "step": 700 }, { "epoch": 0.04, "learning_rate": 4.9733461948520085e-05, "loss": 0.0544, "step": 710 }, { "epoch": 0.04, "learning_rate": 4.972076966035437e-05, "loss": 0.0577, "step": 720 }, { "epoch": 0.04, "learning_rate": 4.970807737218866e-05, "loss": 0.0873, "step": 730 }, { "epoch": 0.04, "learning_rate": 4.969538508402295e-05, "loss": 0.0574, "step": 740 }, { "epoch": 0.04, "learning_rate": 4.968269279585724e-05, "loss": 0.0617, "step": 750 }, { "epoch": 0.04, "learning_rate": 4.967000050769153e-05, "loss": 0.0853, "step": 760 }, { "epoch": 0.04, "learning_rate": 4.965730821952582e-05, "loss": 0.0453, "step": 770 }, { "epoch": 0.04, "learning_rate": 4.964461593136011e-05, "loss": 0.0413, "step": 780 }, { "epoch": 0.04, "learning_rate": 4.96319236431944e-05, "loss": 0.0426, "step": 790 }, { "epoch": 0.04, "learning_rate": 4.961923135502869e-05, "loss": 0.0858, "step": 800 }, { "epoch": 0.04, "learning_rate": 4.960653906686298e-05, "loss": 0.0502, "step": 810 }, { "epoch": 0.04, "learning_rate": 4.959384677869727e-05, "loss": 0.0633, "step": 820 }, { "epoch": 0.04, "learning_rate": 4.958115449053156e-05, "loss": 0.0716, "step": 830 }, { "epoch": 0.04, "learning_rate": 4.956846220236585e-05, "loss": 0.1308, "step": 840 }, { "epoch": 0.04, "learning_rate": 4.955576991420013e-05, "loss": 0.0818, "step": 850 }, { "epoch": 0.04, "learning_rate": 4.954307762603442e-05, "loss": 0.0784, "step": 860 }, { "epoch": 0.04, "learning_rate": 4.953038533786871e-05, "loss": 0.0507, "step": 870 }, { "epoch": 0.04, "learning_rate": 4.9517693049703e-05, "loss": 0.0241, "step": 880 }, { "epoch": 0.04, "learning_rate": 4.950500076153729e-05, "loss": 0.1688, "step": 890 }, { "epoch": 0.05, "learning_rate": 4.949230847337158e-05, "loss": 0.0458, "step": 900 }, { "epoch": 0.05, "learning_rate": 4.947961618520587e-05, "loss": 0.0707, "step": 910 }, { "epoch": 0.05, "learning_rate": 4.946692389704016e-05, "loss": 0.0492, "step": 920 }, { "epoch": 0.05, "learning_rate": 4.945423160887445e-05, "loss": 0.112, "step": 930 }, { "epoch": 0.05, "learning_rate": 4.944153932070874e-05, "loss": 0.0898, "step": 940 }, { "epoch": 0.05, "learning_rate": 4.942884703254303e-05, "loss": 0.0269, "step": 950 }, { "epoch": 0.05, "learning_rate": 4.941615474437732e-05, "loss": 0.1038, "step": 960 }, { "epoch": 0.05, "learning_rate": 4.9403462456211605e-05, "loss": 0.0438, "step": 970 }, { "epoch": 0.05, "learning_rate": 4.9390770168045895e-05, "loss": 0.0816, "step": 980 }, { "epoch": 0.05, "learning_rate": 4.9378077879880185e-05, "loss": 0.1172, "step": 990 }, { "epoch": 0.05, "learning_rate": 4.9365385591714475e-05, "loss": 0.0871, "step": 1000 }, { "epoch": 0.05, "learning_rate": 4.9352693303548765e-05, "loss": 0.0254, "step": 1010 }, { "epoch": 0.05, "learning_rate": 4.934000101538306e-05, "loss": 0.1241, "step": 1020 }, { "epoch": 0.05, "learning_rate": 4.9327308727217345e-05, "loss": 0.0486, "step": 1030 }, { "epoch": 0.05, "learning_rate": 4.9314616439051635e-05, "loss": 0.0821, "step": 1040 }, { "epoch": 0.05, "learning_rate": 4.9301924150885925e-05, "loss": 0.0154, "step": 1050 }, { "epoch": 0.05, "learning_rate": 4.9289231862720215e-05, "loss": 0.0555, "step": 1060 }, { "epoch": 0.05, "learning_rate": 4.9276539574554505e-05, "loss": 0.0309, "step": 1070 }, { "epoch": 0.05, "learning_rate": 4.9263847286388795e-05, "loss": 0.0867, "step": 1080 }, { "epoch": 0.05, "learning_rate": 4.9251154998223085e-05, "loss": 0.0935, "step": 1090 }, { "epoch": 0.06, "learning_rate": 4.923846271005737e-05, "loss": 0.063, "step": 1100 }, { "epoch": 0.06, "learning_rate": 4.922577042189166e-05, "loss": 0.0786, "step": 1110 }, { "epoch": 0.06, "learning_rate": 4.921307813372595e-05, "loss": 0.0768, "step": 1120 }, { "epoch": 0.06, "learning_rate": 4.920038584556024e-05, "loss": 0.0738, "step": 1130 }, { "epoch": 0.06, "learning_rate": 4.9187693557394535e-05, "loss": 0.1275, "step": 1140 }, { "epoch": 0.06, "learning_rate": 4.917500126922882e-05, "loss": 0.0466, "step": 1150 }, { "epoch": 0.06, "learning_rate": 4.916230898106311e-05, "loss": 0.0239, "step": 1160 }, { "epoch": 0.06, "learning_rate": 4.91496166928974e-05, "loss": 0.0673, "step": 1170 }, { "epoch": 0.06, "learning_rate": 4.913692440473169e-05, "loss": 0.0706, "step": 1180 }, { "epoch": 0.06, "learning_rate": 4.912423211656598e-05, "loss": 0.056, "step": 1190 }, { "epoch": 0.06, "learning_rate": 4.911153982840027e-05, "loss": 0.1112, "step": 1200 }, { "epoch": 0.06, "learning_rate": 4.909884754023456e-05, "loss": 0.0453, "step": 1210 }, { "epoch": 0.06, "learning_rate": 4.908615525206884e-05, "loss": 0.0624, "step": 1220 }, { "epoch": 0.06, "learning_rate": 4.907346296390313e-05, "loss": 0.0962, "step": 1230 }, { "epoch": 0.06, "learning_rate": 4.906077067573742e-05, "loss": 0.0533, "step": 1240 }, { "epoch": 0.06, "learning_rate": 4.904807838757172e-05, "loss": 0.0605, "step": 1250 }, { "epoch": 0.06, "learning_rate": 4.903538609940601e-05, "loss": 0.0793, "step": 1260 }, { "epoch": 0.06, "learning_rate": 4.90226938112403e-05, "loss": 0.0849, "step": 1270 }, { "epoch": 0.06, "learning_rate": 4.901000152307458e-05, "loss": 0.0498, "step": 1280 }, { "epoch": 0.06, "learning_rate": 4.899730923490887e-05, "loss": 0.0738, "step": 1290 }, { "epoch": 0.07, "learning_rate": 4.898461694674316e-05, "loss": 0.0392, "step": 1300 }, { "epoch": 0.07, "learning_rate": 4.897192465857745e-05, "loss": 0.0664, "step": 1310 }, { "epoch": 0.07, "learning_rate": 4.895923237041174e-05, "loss": 0.068, "step": 1320 }, { "epoch": 0.07, "learning_rate": 4.894654008224603e-05, "loss": 0.0686, "step": 1330 }, { "epoch": 0.07, "learning_rate": 4.8933847794080314e-05, "loss": 0.0666, "step": 1340 }, { "epoch": 0.07, "learning_rate": 4.8921155505914604e-05, "loss": 0.0572, "step": 1350 }, { "epoch": 0.07, "learning_rate": 4.8908463217748894e-05, "loss": 0.0661, "step": 1360 }, { "epoch": 0.07, "learning_rate": 4.889577092958319e-05, "loss": 0.0515, "step": 1370 }, { "epoch": 0.07, "learning_rate": 4.888307864141748e-05, "loss": 0.038, "step": 1380 }, { "epoch": 0.07, "learning_rate": 4.887038635325177e-05, "loss": 0.0481, "step": 1390 }, { "epoch": 0.07, "learning_rate": 4.8857694065086054e-05, "loss": 0.0793, "step": 1400 }, { "epoch": 0.07, "learning_rate": 4.8845001776920344e-05, "loss": 0.0786, "step": 1410 }, { "epoch": 0.07, "learning_rate": 4.8832309488754634e-05, "loss": 0.034, "step": 1420 }, { "epoch": 0.07, "learning_rate": 4.8819617200588924e-05, "loss": 0.0773, "step": 1430 }, { "epoch": 0.07, "learning_rate": 4.8806924912423214e-05, "loss": 0.0738, "step": 1440 }, { "epoch": 0.07, "learning_rate": 4.8794232624257504e-05, "loss": 0.0452, "step": 1450 }, { "epoch": 0.07, "learning_rate": 4.8781540336091794e-05, "loss": 0.103, "step": 1460 }, { "epoch": 0.07, "learning_rate": 4.876884804792608e-05, "loss": 0.1, "step": 1470 }, { "epoch": 0.07, "learning_rate": 4.8756155759760374e-05, "loss": 0.0612, "step": 1480 }, { "epoch": 0.07, "learning_rate": 4.8743463471594664e-05, "loss": 0.0821, "step": 1490 }, { "epoch": 0.08, "learning_rate": 4.8730771183428954e-05, "loss": 0.0387, "step": 1500 }, { "epoch": 0.08, "learning_rate": 4.8718078895263244e-05, "loss": 0.1322, "step": 1510 }, { "epoch": 0.08, "learning_rate": 4.8705386607097534e-05, "loss": 0.0313, "step": 1520 }, { "epoch": 0.08, "learning_rate": 4.869269431893182e-05, "loss": 0.0442, "step": 1530 }, { "epoch": 0.08, "learning_rate": 4.868000203076611e-05, "loss": 0.0385, "step": 1540 }, { "epoch": 0.08, "learning_rate": 4.86673097426004e-05, "loss": 0.0571, "step": 1550 }, { "epoch": 0.08, "learning_rate": 4.865461745443469e-05, "loss": 0.0315, "step": 1560 }, { "epoch": 0.08, "learning_rate": 4.864192516626898e-05, "loss": 0.0634, "step": 1570 }, { "epoch": 0.08, "learning_rate": 4.862923287810327e-05, "loss": 0.0749, "step": 1580 }, { "epoch": 0.08, "learning_rate": 4.861654058993755e-05, "loss": 0.0743, "step": 1590 }, { "epoch": 0.08, "learning_rate": 4.860384830177185e-05, "loss": 0.0832, "step": 1600 }, { "epoch": 0.08, "learning_rate": 4.859115601360614e-05, "loss": 0.0266, "step": 1610 }, { "epoch": 0.08, "learning_rate": 4.857846372544043e-05, "loss": 0.0712, "step": 1620 }, { "epoch": 0.08, "learning_rate": 4.856577143727472e-05, "loss": 0.0801, "step": 1630 }, { "epoch": 0.08, "learning_rate": 4.855307914910901e-05, "loss": 0.0537, "step": 1640 }, { "epoch": 0.08, "learning_rate": 4.854038686094329e-05, "loss": 0.0521, "step": 1650 }, { "epoch": 0.08, "learning_rate": 4.852769457277758e-05, "loss": 0.0773, "step": 1660 }, { "epoch": 0.08, "learning_rate": 4.851500228461187e-05, "loss": 0.0462, "step": 1670 }, { "epoch": 0.08, "learning_rate": 4.850230999644616e-05, "loss": 0.0616, "step": 1680 }, { "epoch": 0.08, "learning_rate": 4.848961770828045e-05, "loss": 0.0463, "step": 1690 }, { "epoch": 0.09, "learning_rate": 4.847692542011474e-05, "loss": 0.1289, "step": 1700 }, { "epoch": 0.09, "learning_rate": 4.846423313194903e-05, "loss": 0.0542, "step": 1710 }, { "epoch": 0.09, "learning_rate": 4.845154084378332e-05, "loss": 0.0151, "step": 1720 }, { "epoch": 0.09, "learning_rate": 4.843884855561761e-05, "loss": 0.1101, "step": 1730 }, { "epoch": 0.09, "learning_rate": 4.84261562674519e-05, "loss": 0.0583, "step": 1740 }, { "epoch": 0.09, "learning_rate": 4.841346397928619e-05, "loss": 0.057, "step": 1750 }, { "epoch": 0.09, "learning_rate": 4.840077169112048e-05, "loss": 0.0411, "step": 1760 }, { "epoch": 0.09, "learning_rate": 4.838807940295477e-05, "loss": 0.0399, "step": 1770 }, { "epoch": 0.09, "learning_rate": 4.837538711478905e-05, "loss": 0.0448, "step": 1780 }, { "epoch": 0.09, "learning_rate": 4.836269482662334e-05, "loss": 0.0519, "step": 1790 }, { "epoch": 0.09, "learning_rate": 4.835000253845763e-05, "loss": 0.0689, "step": 1800 }, { "epoch": 0.09, "learning_rate": 4.833731025029192e-05, "loss": 0.0896, "step": 1810 }, { "epoch": 0.09, "learning_rate": 4.832461796212621e-05, "loss": 0.0334, "step": 1820 }, { "epoch": 0.09, "learning_rate": 4.83119256739605e-05, "loss": 0.0932, "step": 1830 }, { "epoch": 0.09, "learning_rate": 4.829923338579479e-05, "loss": 0.0767, "step": 1840 }, { "epoch": 0.09, "learning_rate": 4.828654109762908e-05, "loss": 0.1134, "step": 1850 }, { "epoch": 0.09, "learning_rate": 4.827384880946337e-05, "loss": 0.0453, "step": 1860 }, { "epoch": 0.09, "learning_rate": 4.826115652129766e-05, "loss": 0.0461, "step": 1870 }, { "epoch": 0.09, "learning_rate": 4.824846423313195e-05, "loss": 0.0322, "step": 1880 }, { "epoch": 0.09, "learning_rate": 4.823577194496624e-05, "loss": 0.0591, "step": 1890 }, { "epoch": 0.1, "learning_rate": 4.8223079656800526e-05, "loss": 0.0539, "step": 1900 }, { "epoch": 0.1, "learning_rate": 4.8210387368634816e-05, "loss": 0.081, "step": 1910 }, { "epoch": 0.1, "learning_rate": 4.8197695080469106e-05, "loss": 0.0268, "step": 1920 }, { "epoch": 0.1, "learning_rate": 4.8185002792303396e-05, "loss": 0.0919, "step": 1930 }, { "epoch": 0.1, "learning_rate": 4.817231050413769e-05, "loss": 0.0389, "step": 1940 }, { "epoch": 0.1, "learning_rate": 4.815961821597198e-05, "loss": 0.067, "step": 1950 }, { "epoch": 0.1, "learning_rate": 4.8146925927806266e-05, "loss": 0.0538, "step": 1960 }, { "epoch": 0.1, "learning_rate": 4.8134233639640556e-05, "loss": 0.0593, "step": 1970 }, { "epoch": 0.1, "learning_rate": 4.8121541351474846e-05, "loss": 0.121, "step": 1980 }, { "epoch": 0.1, "learning_rate": 4.8108849063309136e-05, "loss": 0.065, "step": 1990 }, { "epoch": 0.1, "learning_rate": 4.8096156775143426e-05, "loss": 0.038, "step": 2000 }, { "epoch": 0.1, "learning_rate": 4.8083464486977716e-05, "loss": 0.0791, "step": 2010 }, { "epoch": 0.1, "learning_rate": 4.8070772198812006e-05, "loss": 0.0836, "step": 2020 }, { "epoch": 0.1, "learning_rate": 4.805807991064629e-05, "loss": 0.056, "step": 2030 }, { "epoch": 0.1, "learning_rate": 4.804538762248058e-05, "loss": 0.0482, "step": 2040 }, { "epoch": 0.1, "learning_rate": 4.803269533431487e-05, "loss": 0.0451, "step": 2050 }, { "epoch": 0.1, "learning_rate": 4.8020003046149166e-05, "loss": 0.0849, "step": 2060 }, { "epoch": 0.1, "learning_rate": 4.8007310757983456e-05, "loss": 0.0521, "step": 2070 }, { "epoch": 0.1, "learning_rate": 4.799461846981774e-05, "loss": 0.0551, "step": 2080 }, { "epoch": 0.1, "learning_rate": 4.798192618165203e-05, "loss": 0.0276, "step": 2090 }, { "epoch": 0.11, "learning_rate": 4.796923389348632e-05, "loss": 0.0653, "step": 2100 }, { "epoch": 0.11, "learning_rate": 4.795654160532061e-05, "loss": 0.0652, "step": 2110 }, { "epoch": 0.11, "learning_rate": 4.79438493171549e-05, "loss": 0.044, "step": 2120 }, { "epoch": 0.11, "learning_rate": 4.793115702898919e-05, "loss": 0.0832, "step": 2130 }, { "epoch": 0.11, "learning_rate": 4.791846474082348e-05, "loss": 0.0534, "step": 2140 }, { "epoch": 0.11, "learning_rate": 4.790577245265776e-05, "loss": 0.0555, "step": 2150 }, { "epoch": 0.11, "learning_rate": 4.789308016449205e-05, "loss": 0.0587, "step": 2160 }, { "epoch": 0.11, "learning_rate": 4.788038787632635e-05, "loss": 0.0659, "step": 2170 }, { "epoch": 0.11, "learning_rate": 4.786769558816064e-05, "loss": 0.0591, "step": 2180 }, { "epoch": 0.11, "learning_rate": 4.785500329999493e-05, "loss": 0.0571, "step": 2190 }, { "epoch": 0.11, "learning_rate": 4.784231101182922e-05, "loss": 0.0464, "step": 2200 }, { "epoch": 0.11, "learning_rate": 4.78296187236635e-05, "loss": 0.093, "step": 2210 }, { "epoch": 0.11, "learning_rate": 4.781692643549779e-05, "loss": 0.0929, "step": 2220 }, { "epoch": 0.11, "learning_rate": 4.780423414733208e-05, "loss": 0.0714, "step": 2230 }, { "epoch": 0.11, "learning_rate": 4.779154185916637e-05, "loss": 0.0778, "step": 2240 }, { "epoch": 0.11, "learning_rate": 4.777884957100066e-05, "loss": 0.041, "step": 2250 }, { "epoch": 0.11, "learning_rate": 4.776615728283495e-05, "loss": 0.0449, "step": 2260 }, { "epoch": 0.11, "learning_rate": 4.775346499466924e-05, "loss": 0.0479, "step": 2270 }, { "epoch": 0.11, "learning_rate": 4.7740772706503526e-05, "loss": 0.0239, "step": 2280 }, { "epoch": 0.11, "learning_rate": 4.772808041833782e-05, "loss": 0.0405, "step": 2290 }, { "epoch": 0.12, "learning_rate": 4.771538813017211e-05, "loss": 0.0515, "step": 2300 }, { "epoch": 0.12, "learning_rate": 4.77026958420064e-05, "loss": 0.0731, "step": 2310 }, { "epoch": 0.12, "learning_rate": 4.769000355384069e-05, "loss": 0.0336, "step": 2320 }, { "epoch": 0.12, "learning_rate": 4.7677311265674976e-05, "loss": 0.0683, "step": 2330 }, { "epoch": 0.12, "learning_rate": 4.7664618977509266e-05, "loss": 0.0317, "step": 2340 }, { "epoch": 0.12, "learning_rate": 4.7651926689343556e-05, "loss": 0.0402, "step": 2350 }, { "epoch": 0.12, "learning_rate": 4.7639234401177846e-05, "loss": 0.0323, "step": 2360 }, { "epoch": 0.12, "learning_rate": 4.7626542113012136e-05, "loss": 0.0814, "step": 2370 }, { "epoch": 0.12, "learning_rate": 4.7613849824846426e-05, "loss": 0.117, "step": 2380 }, { "epoch": 0.12, "learning_rate": 4.7601157536680716e-05, "loss": 0.0371, "step": 2390 }, { "epoch": 0.12, "learning_rate": 4.7588465248515006e-05, "loss": 0.0507, "step": 2400 }, { "epoch": 0.12, "learning_rate": 4.7575772960349296e-05, "loss": 0.1098, "step": 2410 }, { "epoch": 0.12, "learning_rate": 4.7563080672183586e-05, "loss": 0.026, "step": 2420 }, { "epoch": 0.12, "learning_rate": 4.7550388384017876e-05, "loss": 0.0418, "step": 2430 }, { "epoch": 0.12, "learning_rate": 4.7537696095852166e-05, "loss": 0.0543, "step": 2440 }, { "epoch": 0.12, "learning_rate": 4.7525003807686456e-05, "loss": 0.0426, "step": 2450 }, { "epoch": 0.12, "learning_rate": 4.751231151952074e-05, "loss": 0.0545, "step": 2460 }, { "epoch": 0.12, "learning_rate": 4.749961923135503e-05, "loss": 0.0787, "step": 2470 }, { "epoch": 0.12, "learning_rate": 4.748692694318932e-05, "loss": 0.0359, "step": 2480 }, { "epoch": 0.12, "learning_rate": 4.747423465502361e-05, "loss": 0.0319, "step": 2490 }, { "epoch": 0.13, "learning_rate": 4.74615423668579e-05, "loss": 0.0897, "step": 2500 }, { "epoch": 0.13, "learning_rate": 4.744885007869219e-05, "loss": 0.0679, "step": 2510 }, { "epoch": 0.13, "learning_rate": 4.743615779052648e-05, "loss": 0.0381, "step": 2520 }, { "epoch": 0.13, "learning_rate": 4.742346550236077e-05, "loss": 0.0466, "step": 2530 }, { "epoch": 0.13, "learning_rate": 4.741077321419506e-05, "loss": 0.0474, "step": 2540 }, { "epoch": 0.13, "learning_rate": 4.739808092602935e-05, "loss": 0.0404, "step": 2550 }, { "epoch": 0.13, "learning_rate": 4.738538863786364e-05, "loss": 0.1523, "step": 2560 }, { "epoch": 0.13, "learning_rate": 4.737269634969793e-05, "loss": 0.0634, "step": 2570 }, { "epoch": 0.13, "learning_rate": 4.736000406153221e-05, "loss": 0.0437, "step": 2580 }, { "epoch": 0.13, "learning_rate": 4.73473117733665e-05, "loss": 0.0231, "step": 2590 }, { "epoch": 0.13, "learning_rate": 4.733461948520079e-05, "loss": 0.0388, "step": 2600 }, { "epoch": 0.13, "learning_rate": 4.732192719703508e-05, "loss": 0.0779, "step": 2610 }, { "epoch": 0.13, "learning_rate": 4.730923490886937e-05, "loss": 0.0452, "step": 2620 }, { "epoch": 0.13, "learning_rate": 4.729654262070367e-05, "loss": 0.1071, "step": 2630 }, { "epoch": 0.13, "learning_rate": 4.728385033253795e-05, "loss": 0.049, "step": 2640 }, { "epoch": 0.13, "learning_rate": 4.727115804437224e-05, "loss": 0.052, "step": 2650 }, { "epoch": 0.13, "learning_rate": 4.725846575620653e-05, "loss": 0.0745, "step": 2660 }, { "epoch": 0.13, "learning_rate": 4.724577346804082e-05, "loss": 0.0397, "step": 2670 }, { "epoch": 0.13, "learning_rate": 4.723308117987511e-05, "loss": 0.0649, "step": 2680 }, { "epoch": 0.13, "learning_rate": 4.72203888917094e-05, "loss": 0.0266, "step": 2690 }, { "epoch": 0.14, "learning_rate": 4.720769660354369e-05, "loss": 0.0247, "step": 2700 }, { "epoch": 0.14, "learning_rate": 4.7195004315377975e-05, "loss": 0.0783, "step": 2710 }, { "epoch": 0.14, "learning_rate": 4.7182312027212265e-05, "loss": 0.0444, "step": 2720 }, { "epoch": 0.14, "learning_rate": 4.7169619739046555e-05, "loss": 0.0918, "step": 2730 }, { "epoch": 0.14, "learning_rate": 4.7156927450880845e-05, "loss": 0.0518, "step": 2740 }, { "epoch": 0.14, "learning_rate": 4.714423516271514e-05, "loss": 0.0486, "step": 2750 }, { "epoch": 0.14, "learning_rate": 4.7131542874549425e-05, "loss": 0.0695, "step": 2760 }, { "epoch": 0.14, "learning_rate": 4.7118850586383715e-05, "loss": 0.0451, "step": 2770 }, { "epoch": 0.14, "learning_rate": 4.7106158298218005e-05, "loss": 0.0591, "step": 2780 }, { "epoch": 0.14, "learning_rate": 4.7093466010052295e-05, "loss": 0.0098, "step": 2790 }, { "epoch": 0.14, "learning_rate": 4.7080773721886585e-05, "loss": 0.0286, "step": 2800 }, { "epoch": 0.14, "learning_rate": 4.7068081433720875e-05, "loss": 0.0286, "step": 2810 }, { "epoch": 0.14, "learning_rate": 4.7055389145555165e-05, "loss": 0.0256, "step": 2820 }, { "epoch": 0.14, "learning_rate": 4.704269685738945e-05, "loss": 0.0833, "step": 2830 }, { "epoch": 0.14, "learning_rate": 4.703000456922374e-05, "loss": 0.0469, "step": 2840 }, { "epoch": 0.14, "learning_rate": 4.701731228105803e-05, "loss": 0.0436, "step": 2850 }, { "epoch": 0.14, "learning_rate": 4.7004619992892325e-05, "loss": 0.0301, "step": 2860 }, { "epoch": 0.14, "learning_rate": 4.6991927704726615e-05, "loss": 0.0615, "step": 2870 }, { "epoch": 0.14, "learning_rate": 4.6979235416560905e-05, "loss": 0.0371, "step": 2880 }, { "epoch": 0.14, "learning_rate": 4.696654312839519e-05, "loss": 0.0259, "step": 2890 }, { "epoch": 0.15, "learning_rate": 4.695385084022948e-05, "loss": 0.0843, "step": 2900 }, { "epoch": 0.15, "learning_rate": 4.694115855206377e-05, "loss": 0.0261, "step": 2910 }, { "epoch": 0.15, "learning_rate": 4.692846626389806e-05, "loss": 0.0831, "step": 2920 }, { "epoch": 0.15, "learning_rate": 4.691577397573235e-05, "loss": 0.066, "step": 2930 }, { "epoch": 0.15, "learning_rate": 4.690308168756664e-05, "loss": 0.0606, "step": 2940 }, { "epoch": 0.15, "learning_rate": 4.689038939940093e-05, "loss": 0.0649, "step": 2950 }, { "epoch": 0.15, "learning_rate": 4.687769711123521e-05, "loss": 0.0521, "step": 2960 }, { "epoch": 0.15, "learning_rate": 4.68650048230695e-05, "loss": 0.0588, "step": 2970 }, { "epoch": 0.15, "learning_rate": 4.68523125349038e-05, "loss": 0.0587, "step": 2980 }, { "epoch": 0.15, "learning_rate": 4.683962024673809e-05, "loss": 0.0486, "step": 2990 }, { "epoch": 0.15, "learning_rate": 4.682692795857238e-05, "loss": 0.0635, "step": 3000 }, { "epoch": 0.15, "learning_rate": 4.681423567040666e-05, "loss": 0.0664, "step": 3010 }, { "epoch": 0.15, "learning_rate": 4.680154338224095e-05, "loss": 0.0235, "step": 3020 }, { "epoch": 0.15, "learning_rate": 4.678885109407524e-05, "loss": 0.063, "step": 3030 }, { "epoch": 0.15, "learning_rate": 4.677615880590953e-05, "loss": 0.0151, "step": 3040 }, { "epoch": 0.15, "learning_rate": 4.676346651774382e-05, "loss": 0.0321, "step": 3050 }, { "epoch": 0.15, "learning_rate": 4.675077422957811e-05, "loss": 0.0766, "step": 3060 }, { "epoch": 0.15, "learning_rate": 4.67380819414124e-05, "loss": 0.0721, "step": 3070 }, { "epoch": 0.15, "learning_rate": 4.6725389653246684e-05, "loss": 0.0532, "step": 3080 }, { "epoch": 0.15, "learning_rate": 4.6712697365080974e-05, "loss": 0.0341, "step": 3090 }, { "epoch": 0.16, "learning_rate": 4.670000507691527e-05, "loss": 0.088, "step": 3100 }, { "epoch": 0.16, "learning_rate": 4.668731278874956e-05, "loss": 0.0409, "step": 3110 }, { "epoch": 0.16, "learning_rate": 4.667462050058385e-05, "loss": 0.0939, "step": 3120 }, { "epoch": 0.16, "learning_rate": 4.666192821241814e-05, "loss": 0.0241, "step": 3130 }, { "epoch": 0.16, "learning_rate": 4.6649235924252424e-05, "loss": 0.0585, "step": 3140 }, { "epoch": 0.16, "learning_rate": 4.6636543636086714e-05, "loss": 0.051, "step": 3150 }, { "epoch": 0.16, "learning_rate": 4.6623851347921004e-05, "loss": 0.0621, "step": 3160 }, { "epoch": 0.16, "learning_rate": 4.6611159059755294e-05, "loss": 0.0674, "step": 3170 }, { "epoch": 0.16, "learning_rate": 4.6598466771589584e-05, "loss": 0.1069, "step": 3180 }, { "epoch": 0.16, "learning_rate": 4.6585774483423874e-05, "loss": 0.0732, "step": 3190 }, { "epoch": 0.16, "learning_rate": 4.6573082195258164e-05, "loss": 0.067, "step": 3200 }, { "epoch": 0.16, "learning_rate": 4.6560389907092454e-05, "loss": 0.05, "step": 3210 }, { "epoch": 0.16, "learning_rate": 4.6547697618926744e-05, "loss": 0.0678, "step": 3220 }, { "epoch": 0.16, "learning_rate": 4.6535005330761034e-05, "loss": 0.1148, "step": 3230 }, { "epoch": 0.16, "learning_rate": 4.6522313042595324e-05, "loss": 0.0725, "step": 3240 }, { "epoch": 0.16, "learning_rate": 4.6509620754429614e-05, "loss": 0.0571, "step": 3250 }, { "epoch": 0.16, "learning_rate": 4.64969284662639e-05, "loss": 0.0323, "step": 3260 }, { "epoch": 0.16, "learning_rate": 4.648423617809819e-05, "loss": 0.0507, "step": 3270 }, { "epoch": 0.16, "learning_rate": 4.647154388993248e-05, "loss": 0.0492, "step": 3280 }, { "epoch": 0.16, "learning_rate": 4.645885160176677e-05, "loss": 0.0533, "step": 3290 }, { "epoch": 0.17, "learning_rate": 4.644615931360106e-05, "loss": 0.0589, "step": 3300 }, { "epoch": 0.17, "learning_rate": 4.643346702543535e-05, "loss": 0.0255, "step": 3310 }, { "epoch": 0.17, "learning_rate": 4.642077473726964e-05, "loss": 0.0232, "step": 3320 }, { "epoch": 0.17, "learning_rate": 4.640808244910393e-05, "loss": 0.0831, "step": 3330 }, { "epoch": 0.17, "learning_rate": 4.639539016093822e-05, "loss": 0.0478, "step": 3340 }, { "epoch": 0.17, "learning_rate": 4.638269787277251e-05, "loss": 0.0353, "step": 3350 }, { "epoch": 0.17, "learning_rate": 4.63700055846068e-05, "loss": 0.0703, "step": 3360 }, { "epoch": 0.17, "learning_rate": 4.635731329644109e-05, "loss": 0.0364, "step": 3370 }, { "epoch": 0.17, "learning_rate": 4.634462100827538e-05, "loss": 0.0534, "step": 3380 }, { "epoch": 0.17, "learning_rate": 4.633192872010966e-05, "loss": 0.0635, "step": 3390 }, { "epoch": 0.17, "learning_rate": 4.631923643194395e-05, "loss": 0.0355, "step": 3400 }, { "epoch": 0.17, "learning_rate": 4.630654414377824e-05, "loss": 0.0299, "step": 3410 }, { "epoch": 0.17, "learning_rate": 4.629385185561253e-05, "loss": 0.0447, "step": 3420 }, { "epoch": 0.17, "learning_rate": 4.628115956744682e-05, "loss": 0.0556, "step": 3430 }, { "epoch": 0.17, "learning_rate": 4.626846727928111e-05, "loss": 0.0537, "step": 3440 }, { "epoch": 0.17, "learning_rate": 4.62557749911154e-05, "loss": 0.0789, "step": 3450 }, { "epoch": 0.17, "learning_rate": 4.624308270294969e-05, "loss": 0.0444, "step": 3460 }, { "epoch": 0.17, "learning_rate": 4.623039041478398e-05, "loss": 0.0563, "step": 3470 }, { "epoch": 0.17, "learning_rate": 4.621769812661827e-05, "loss": 0.043, "step": 3480 }, { "epoch": 0.17, "learning_rate": 4.620500583845256e-05, "loss": 0.0596, "step": 3490 }, { "epoch": 0.18, "learning_rate": 4.619231355028685e-05, "loss": 0.0499, "step": 3500 }, { "epoch": 0.18, "learning_rate": 4.6179621262121134e-05, "loss": 0.0303, "step": 3510 }, { "epoch": 0.18, "learning_rate": 4.6166928973955424e-05, "loss": 0.1042, "step": 3520 }, { "epoch": 0.18, "learning_rate": 4.6154236685789714e-05, "loss": 0.0577, "step": 3530 }, { "epoch": 0.18, "learning_rate": 4.6141544397624004e-05, "loss": 0.0993, "step": 3540 }, { "epoch": 0.18, "learning_rate": 4.6128852109458294e-05, "loss": 0.0242, "step": 3550 }, { "epoch": 0.18, "learning_rate": 4.611615982129259e-05, "loss": 0.0457, "step": 3560 }, { "epoch": 0.18, "learning_rate": 4.6103467533126874e-05, "loss": 0.0664, "step": 3570 }, { "epoch": 0.18, "learning_rate": 4.6090775244961164e-05, "loss": 0.0674, "step": 3580 }, { "epoch": 0.18, "learning_rate": 4.6078082956795453e-05, "loss": 0.045, "step": 3590 }, { "epoch": 0.18, "learning_rate": 4.6065390668629743e-05, "loss": 0.0489, "step": 3600 }, { "epoch": 0.18, "learning_rate": 4.6052698380464033e-05, "loss": 0.0669, "step": 3610 }, { "epoch": 0.18, "learning_rate": 4.6040006092298323e-05, "loss": 0.0801, "step": 3620 }, { "epoch": 0.18, "learning_rate": 4.6027313804132613e-05, "loss": 0.0664, "step": 3630 }, { "epoch": 0.18, "learning_rate": 4.60146215159669e-05, "loss": 0.0531, "step": 3640 }, { "epoch": 0.18, "learning_rate": 4.600192922780119e-05, "loss": 0.0837, "step": 3650 }, { "epoch": 0.18, "learning_rate": 4.598923693963548e-05, "loss": 0.0938, "step": 3660 }, { "epoch": 0.18, "learning_rate": 4.5976544651469773e-05, "loss": 0.0514, "step": 3670 }, { "epoch": 0.18, "learning_rate": 4.5963852363304063e-05, "loss": 0.0486, "step": 3680 }, { "epoch": 0.18, "learning_rate": 4.595116007513835e-05, "loss": 0.048, "step": 3690 }, { "epoch": 0.19, "learning_rate": 4.5938467786972637e-05, "loss": 0.0338, "step": 3700 }, { "epoch": 0.19, "learning_rate": 4.5925775498806927e-05, "loss": 0.0493, "step": 3710 }, { "epoch": 0.19, "learning_rate": 4.5913083210641217e-05, "loss": 0.0335, "step": 3720 }, { "epoch": 0.19, "learning_rate": 4.5900390922475507e-05, "loss": 0.0321, "step": 3730 }, { "epoch": 0.19, "learning_rate": 4.5887698634309797e-05, "loss": 0.0442, "step": 3740 }, { "epoch": 0.19, "learning_rate": 4.5875006346144087e-05, "loss": 0.0431, "step": 3750 }, { "epoch": 0.19, "learning_rate": 4.586231405797837e-05, "loss": 0.0551, "step": 3760 }, { "epoch": 0.19, "learning_rate": 4.584962176981266e-05, "loss": 0.0694, "step": 3770 }, { "epoch": 0.19, "learning_rate": 4.583692948164695e-05, "loss": 0.0783, "step": 3780 }, { "epoch": 0.19, "learning_rate": 4.5824237193481247e-05, "loss": 0.0728, "step": 3790 }, { "epoch": 0.19, "learning_rate": 4.5811544905315537e-05, "loss": 0.0319, "step": 3800 }, { "epoch": 0.19, "learning_rate": 4.5798852617149826e-05, "loss": 0.0947, "step": 3810 }, { "epoch": 0.19, "learning_rate": 4.578616032898411e-05, "loss": 0.071, "step": 3820 }, { "epoch": 0.19, "learning_rate": 4.57734680408184e-05, "loss": 0.0699, "step": 3830 }, { "epoch": 0.19, "learning_rate": 4.576077575265269e-05, "loss": 0.0565, "step": 3840 }, { "epoch": 0.19, "learning_rate": 4.574808346448698e-05, "loss": 0.0703, "step": 3850 }, { "epoch": 0.19, "learning_rate": 4.573539117632127e-05, "loss": 0.0757, "step": 3860 }, { "epoch": 0.19, "learning_rate": 4.572269888815556e-05, "loss": 0.0884, "step": 3870 }, { "epoch": 0.19, "learning_rate": 4.571000659998985e-05, "loss": 0.0512, "step": 3880 }, { "epoch": 0.2, "learning_rate": 4.569731431182413e-05, "loss": 0.0551, "step": 3890 }, { "epoch": 0.2, "learning_rate": 4.568462202365843e-05, "loss": 0.023, "step": 3900 }, { "epoch": 0.2, "learning_rate": 4.567192973549272e-05, "loss": 0.061, "step": 3910 }, { "epoch": 0.2, "learning_rate": 4.565923744732701e-05, "loss": 0.0421, "step": 3920 }, { "epoch": 0.2, "learning_rate": 4.56465451591613e-05, "loss": 0.0295, "step": 3930 }, { "epoch": 0.2, "learning_rate": 4.563385287099558e-05, "loss": 0.163, "step": 3940 }, { "epoch": 0.2, "learning_rate": 4.562116058282987e-05, "loss": 0.0348, "step": 3950 }, { "epoch": 0.2, "learning_rate": 4.560846829466416e-05, "loss": 0.0604, "step": 3960 }, { "epoch": 0.2, "learning_rate": 4.559577600649845e-05, "loss": 0.0617, "step": 3970 }, { "epoch": 0.2, "learning_rate": 4.558308371833274e-05, "loss": 0.0616, "step": 3980 }, { "epoch": 0.2, "learning_rate": 4.557039143016703e-05, "loss": 0.0621, "step": 3990 }, { "epoch": 0.2, "learning_rate": 4.555769914200132e-05, "loss": 0.0839, "step": 4000 }, { "epoch": 0.2, "learning_rate": 4.5545006853835606e-05, "loss": 0.0716, "step": 4010 }, { "epoch": 0.2, "learning_rate": 4.55323145656699e-05, "loss": 0.0583, "step": 4020 }, { "epoch": 0.2, "learning_rate": 4.551962227750419e-05, "loss": 0.0902, "step": 4030 }, { "epoch": 0.2, "learning_rate": 4.550692998933848e-05, "loss": 0.0479, "step": 4040 }, { "epoch": 0.2, "learning_rate": 4.549423770117277e-05, "loss": 0.0712, "step": 4050 }, { "epoch": 0.2, "learning_rate": 4.548154541300706e-05, "loss": 0.0474, "step": 4060 }, { "epoch": 0.2, "learning_rate": 4.5468853124841346e-05, "loss": 0.0618, "step": 4070 }, { "epoch": 0.2, "learning_rate": 4.5456160836675636e-05, "loss": 0.036, "step": 4080 }, { "epoch": 0.21, "learning_rate": 4.5443468548509926e-05, "loss": 0.0811, "step": 4090 }, { "epoch": 0.21, "learning_rate": 4.5430776260344216e-05, "loss": 0.0497, "step": 4100 }, { "epoch": 0.21, "learning_rate": 4.5418083972178506e-05, "loss": 0.043, "step": 4110 }, { "epoch": 0.21, "learning_rate": 4.5405391684012796e-05, "loss": 0.0855, "step": 4120 }, { "epoch": 0.21, "learning_rate": 4.5392699395847086e-05, "loss": 0.0398, "step": 4130 }, { "epoch": 0.21, "learning_rate": 4.5380007107681376e-05, "loss": 0.1331, "step": 4140 }, { "epoch": 0.21, "learning_rate": 4.5367314819515666e-05, "loss": 0.0476, "step": 4150 }, { "epoch": 0.21, "learning_rate": 4.5354622531349956e-05, "loss": 0.0653, "step": 4160 }, { "epoch": 0.21, "learning_rate": 4.5341930243184246e-05, "loss": 0.0507, "step": 4170 }, { "epoch": 0.21, "learning_rate": 4.5329237955018536e-05, "loss": 0.0802, "step": 4180 }, { "epoch": 0.21, "learning_rate": 4.531654566685282e-05, "loss": 0.0223, "step": 4190 }, { "epoch": 0.21, "learning_rate": 4.530385337868711e-05, "loss": 0.0801, "step": 4200 }, { "epoch": 0.21, "learning_rate": 4.52911610905214e-05, "loss": 0.0678, "step": 4210 }, { "epoch": 0.21, "learning_rate": 4.527846880235569e-05, "loss": 0.0336, "step": 4220 }, { "epoch": 0.21, "learning_rate": 4.526577651418998e-05, "loss": 0.0194, "step": 4230 }, { "epoch": 0.21, "learning_rate": 4.525308422602427e-05, "loss": 0.0537, "step": 4240 }, { "epoch": 0.21, "learning_rate": 4.524039193785856e-05, "loss": 0.0425, "step": 4250 }, { "epoch": 0.21, "learning_rate": 4.522769964969285e-05, "loss": 0.0648, "step": 4260 }, { "epoch": 0.21, "learning_rate": 4.521500736152714e-05, "loss": 0.0449, "step": 4270 }, { "epoch": 0.21, "learning_rate": 4.520231507336143e-05, "loss": 0.0807, "step": 4280 }, { "epoch": 0.22, "learning_rate": 4.518962278519572e-05, "loss": 0.0248, "step": 4290 }, { "epoch": 0.22, "learning_rate": 4.517693049703001e-05, "loss": 0.0414, "step": 4300 }, { "epoch": 0.22, "learning_rate": 4.51642382088643e-05, "loss": 0.1042, "step": 4310 }, { "epoch": 0.22, "learning_rate": 4.515154592069858e-05, "loss": 0.0342, "step": 4320 }, { "epoch": 0.22, "learning_rate": 4.513885363253287e-05, "loss": 0.0506, "step": 4330 }, { "epoch": 0.22, "learning_rate": 4.512616134436716e-05, "loss": 0.0535, "step": 4340 }, { "epoch": 0.22, "learning_rate": 4.511346905620145e-05, "loss": 0.0705, "step": 4350 }, { "epoch": 0.22, "learning_rate": 4.510077676803575e-05, "loss": 0.0536, "step": 4360 }, { "epoch": 0.22, "learning_rate": 4.508808447987003e-05, "loss": 0.0766, "step": 4370 }, { "epoch": 0.22, "learning_rate": 4.507539219170432e-05, "loss": 0.071, "step": 4380 }, { "epoch": 0.22, "learning_rate": 4.506269990353861e-05, "loss": 0.0393, "step": 4390 }, { "epoch": 0.22, "learning_rate": 4.50500076153729e-05, "loss": 0.0713, "step": 4400 }, { "epoch": 0.22, "learning_rate": 4.503731532720719e-05, "loss": 0.0762, "step": 4410 }, { "epoch": 0.22, "learning_rate": 4.502462303904148e-05, "loss": 0.0434, "step": 4420 }, { "epoch": 0.22, "learning_rate": 4.501193075087577e-05, "loss": 0.0604, "step": 4430 }, { "epoch": 0.22, "learning_rate": 4.4999238462710055e-05, "loss": 0.0461, "step": 4440 }, { "epoch": 0.22, "learning_rate": 4.4986546174544345e-05, "loss": 0.0398, "step": 4450 }, { "epoch": 0.22, "learning_rate": 4.4973853886378635e-05, "loss": 0.0781, "step": 4460 }, { "epoch": 0.22, "learning_rate": 4.4961161598212925e-05, "loss": 0.0215, "step": 4470 }, { "epoch": 0.22, "learning_rate": 4.494846931004722e-05, "loss": 0.0268, "step": 4480 }, { "epoch": 0.23, "learning_rate": 4.493577702188151e-05, "loss": 0.0956, "step": 4490 }, { "epoch": 0.23, "learning_rate": 4.4923084733715795e-05, "loss": 0.042, "step": 4500 }, { "epoch": 0.23, "learning_rate": 4.4910392445550085e-05, "loss": 0.0803, "step": 4510 }, { "epoch": 0.23, "learning_rate": 4.4897700157384375e-05, "loss": 0.0338, "step": 4520 }, { "epoch": 0.23, "learning_rate": 4.4885007869218665e-05, "loss": 0.0621, "step": 4530 }, { "epoch": 0.23, "learning_rate": 4.4872315581052955e-05, "loss": 0.0907, "step": 4540 }, { "epoch": 0.23, "learning_rate": 4.4859623292887245e-05, "loss": 0.0317, "step": 4550 }, { "epoch": 0.23, "learning_rate": 4.4846931004721535e-05, "loss": 0.0484, "step": 4560 }, { "epoch": 0.23, "learning_rate": 4.483423871655582e-05, "loss": 0.0379, "step": 4570 }, { "epoch": 0.23, "learning_rate": 4.482154642839011e-05, "loss": 0.0407, "step": 4580 }, { "epoch": 0.23, "learning_rate": 4.4808854140224405e-05, "loss": 0.0787, "step": 4590 }, { "epoch": 0.23, "learning_rate": 4.4796161852058695e-05, "loss": 0.0306, "step": 4600 }, { "epoch": 0.23, "learning_rate": 4.4783469563892985e-05, "loss": 0.0752, "step": 4610 }, { "epoch": 0.23, "learning_rate": 4.477077727572727e-05, "loss": 0.0463, "step": 4620 }, { "epoch": 0.23, "learning_rate": 4.475808498756156e-05, "loss": 0.069, "step": 4630 }, { "epoch": 0.23, "learning_rate": 4.474539269939585e-05, "loss": 0.0449, "step": 4640 }, { "epoch": 0.23, "learning_rate": 4.473270041123014e-05, "loss": 0.0256, "step": 4650 }, { "epoch": 0.23, "learning_rate": 4.472000812306443e-05, "loss": 0.0321, "step": 4660 }, { "epoch": 0.23, "learning_rate": 4.470731583489872e-05, "loss": 0.0272, "step": 4670 }, { "epoch": 0.23, "learning_rate": 4.469462354673301e-05, "loss": 0.0134, "step": 4680 }, { "epoch": 0.24, "learning_rate": 4.468193125856729e-05, "loss": 0.0315, "step": 4690 }, { "epoch": 0.24, "learning_rate": 4.466923897040158e-05, "loss": 0.0347, "step": 4700 }, { "epoch": 0.24, "learning_rate": 4.465654668223588e-05, "loss": 0.0248, "step": 4710 }, { "epoch": 0.24, "learning_rate": 4.464385439407017e-05, "loss": 0.0449, "step": 4720 }, { "epoch": 0.24, "learning_rate": 4.463116210590446e-05, "loss": 0.0544, "step": 4730 }, { "epoch": 0.24, "learning_rate": 4.461846981773875e-05, "loss": 0.0619, "step": 4740 }, { "epoch": 0.24, "learning_rate": 4.460577752957303e-05, "loss": 0.06, "step": 4750 }, { "epoch": 0.24, "learning_rate": 4.459308524140732e-05, "loss": 0.0226, "step": 4760 }, { "epoch": 0.24, "learning_rate": 4.458039295324161e-05, "loss": 0.0651, "step": 4770 }, { "epoch": 0.24, "learning_rate": 4.45677006650759e-05, "loss": 0.0275, "step": 4780 }, { "epoch": 0.24, "learning_rate": 4.455500837691019e-05, "loss": 0.0665, "step": 4790 }, { "epoch": 0.24, "learning_rate": 4.454231608874448e-05, "loss": 0.0418, "step": 4800 }, { "epoch": 0.24, "learning_rate": 4.452962380057877e-05, "loss": 0.0617, "step": 4810 }, { "epoch": 0.24, "learning_rate": 4.451693151241306e-05, "loss": 0.0461, "step": 4820 }, { "epoch": 0.24, "learning_rate": 4.450423922424735e-05, "loss": 0.0817, "step": 4830 }, { "epoch": 0.24, "learning_rate": 4.449154693608164e-05, "loss": 0.0585, "step": 4840 }, { "epoch": 0.24, "learning_rate": 4.447885464791593e-05, "loss": 0.0177, "step": 4850 }, { "epoch": 0.24, "learning_rate": 4.446616235975022e-05, "loss": 0.0258, "step": 4860 }, { "epoch": 0.24, "learning_rate": 4.4453470071584504e-05, "loss": 0.0249, "step": 4870 }, { "epoch": 0.24, "learning_rate": 4.4440777783418794e-05, "loss": 0.0623, "step": 4880 }, { "epoch": 0.25, "learning_rate": 4.4428085495253084e-05, "loss": 0.0806, "step": 4890 }, { "epoch": 0.25, "learning_rate": 4.4415393207087374e-05, "loss": 0.0514, "step": 4900 }, { "epoch": 0.25, "learning_rate": 4.4402700918921664e-05, "loss": 0.0534, "step": 4910 }, { "epoch": 0.25, "learning_rate": 4.4390008630755954e-05, "loss": 0.0334, "step": 4920 }, { "epoch": 0.25, "learning_rate": 4.4377316342590244e-05, "loss": 0.0726, "step": 4930 }, { "epoch": 0.25, "learning_rate": 4.4364624054424534e-05, "loss": 0.0617, "step": 4940 }, { "epoch": 0.25, "learning_rate": 4.4351931766258824e-05, "loss": 0.0827, "step": 4950 }, { "epoch": 0.25, "learning_rate": 4.4339239478093114e-05, "loss": 0.0374, "step": 4960 }, { "epoch": 0.25, "learning_rate": 4.4326547189927404e-05, "loss": 0.034, "step": 4970 }, { "epoch": 0.25, "learning_rate": 4.4313854901761694e-05, "loss": 0.0657, "step": 4980 }, { "epoch": 0.25, "learning_rate": 4.4301162613595984e-05, "loss": 0.0664, "step": 4990 }, { "epoch": 0.25, "learning_rate": 4.428847032543027e-05, "loss": 0.0383, "step": 5000 }, { "epoch": 0.25, "learning_rate": 4.427577803726456e-05, "loss": 0.0387, "step": 5010 }, { "epoch": 0.25, "learning_rate": 4.426308574909885e-05, "loss": 0.0573, "step": 5020 }, { "epoch": 0.25, "learning_rate": 4.425039346093314e-05, "loss": 0.0384, "step": 5030 }, { "epoch": 0.25, "learning_rate": 4.423770117276743e-05, "loss": 0.1072, "step": 5040 }, { "epoch": 0.25, "learning_rate": 4.422500888460172e-05, "loss": 0.0324, "step": 5050 }, { "epoch": 0.25, "learning_rate": 4.421231659643601e-05, "loss": 0.0226, "step": 5060 }, { "epoch": 0.25, "learning_rate": 4.41996243082703e-05, "loss": 0.0413, "step": 5070 }, { "epoch": 0.25, "learning_rate": 4.418693202010459e-05, "loss": 0.0464, "step": 5080 }, { "epoch": 0.26, "learning_rate": 4.417423973193888e-05, "loss": 0.0459, "step": 5090 }, { "epoch": 0.26, "learning_rate": 4.416154744377317e-05, "loss": 0.0607, "step": 5100 }, { "epoch": 0.26, "learning_rate": 4.414885515560746e-05, "loss": 0.0343, "step": 5110 }, { "epoch": 0.26, "learning_rate": 4.413616286744174e-05, "loss": 0.0496, "step": 5120 }, { "epoch": 0.26, "learning_rate": 4.412347057927603e-05, "loss": 0.0667, "step": 5130 }, { "epoch": 0.26, "learning_rate": 4.411077829111032e-05, "loss": 0.0549, "step": 5140 }, { "epoch": 0.26, "learning_rate": 4.409808600294461e-05, "loss": 0.0651, "step": 5150 }, { "epoch": 0.26, "learning_rate": 4.40853937147789e-05, "loss": 0.046, "step": 5160 }, { "epoch": 0.26, "learning_rate": 4.40727014266132e-05, "loss": 0.028, "step": 5170 }, { "epoch": 0.26, "learning_rate": 4.406000913844748e-05, "loss": 0.0406, "step": 5180 }, { "epoch": 0.26, "learning_rate": 4.404731685028177e-05, "loss": 0.0238, "step": 5190 }, { "epoch": 0.26, "learning_rate": 4.403462456211606e-05, "loss": 0.0672, "step": 5200 }, { "epoch": 0.26, "learning_rate": 4.402193227395035e-05, "loss": 0.0941, "step": 5210 }, { "epoch": 0.26, "learning_rate": 4.400923998578464e-05, "loss": 0.0484, "step": 5220 }, { "epoch": 0.26, "learning_rate": 4.399654769761893e-05, "loss": 0.057, "step": 5230 }, { "epoch": 0.26, "learning_rate": 4.398385540945322e-05, "loss": 0.0862, "step": 5240 }, { "epoch": 0.26, "learning_rate": 4.3971163121287504e-05, "loss": 0.0786, "step": 5250 }, { "epoch": 0.26, "learning_rate": 4.3958470833121794e-05, "loss": 0.0525, "step": 5260 }, { "epoch": 0.26, "learning_rate": 4.3945778544956084e-05, "loss": 0.0586, "step": 5270 }, { "epoch": 0.26, "learning_rate": 4.393308625679038e-05, "loss": 0.0186, "step": 5280 }, { "epoch": 0.27, "learning_rate": 4.392039396862467e-05, "loss": 0.0526, "step": 5290 }, { "epoch": 0.27, "learning_rate": 4.3907701680458954e-05, "loss": 0.0918, "step": 5300 }, { "epoch": 0.27, "learning_rate": 4.3895009392293244e-05, "loss": 0.0737, "step": 5310 }, { "epoch": 0.27, "learning_rate": 4.3882317104127534e-05, "loss": 0.0385, "step": 5320 }, { "epoch": 0.27, "learning_rate": 4.3869624815961824e-05, "loss": 0.041, "step": 5330 }, { "epoch": 0.27, "learning_rate": 4.3856932527796114e-05, "loss": 0.0596, "step": 5340 }, { "epoch": 0.27, "learning_rate": 4.3844240239630404e-05, "loss": 0.0368, "step": 5350 }, { "epoch": 0.27, "learning_rate": 4.3831547951464694e-05, "loss": 0.0417, "step": 5360 }, { "epoch": 0.27, "learning_rate": 4.381885566329898e-05, "loss": 0.0337, "step": 5370 }, { "epoch": 0.27, "learning_rate": 4.380616337513327e-05, "loss": 0.0754, "step": 5380 }, { "epoch": 0.27, "learning_rate": 4.379347108696756e-05, "loss": 0.0135, "step": 5390 }, { "epoch": 0.27, "learning_rate": 4.3780778798801854e-05, "loss": 0.0486, "step": 5400 }, { "epoch": 0.27, "learning_rate": 4.3768086510636144e-05, "loss": 0.0552, "step": 5410 }, { "epoch": 0.27, "learning_rate": 4.3755394222470434e-05, "loss": 0.0768, "step": 5420 }, { "epoch": 0.27, "learning_rate": 4.374270193430472e-05, "loss": 0.1063, "step": 5430 }, { "epoch": 0.27, "learning_rate": 4.373000964613901e-05, "loss": 0.0439, "step": 5440 }, { "epoch": 0.27, "learning_rate": 4.37173173579733e-05, "loss": 0.0545, "step": 5450 }, { "epoch": 0.27, "learning_rate": 4.370462506980759e-05, "loss": 0.048, "step": 5460 }, { "epoch": 0.27, "learning_rate": 4.369193278164188e-05, "loss": 0.0587, "step": 5470 }, { "epoch": 0.27, "learning_rate": 4.367924049347617e-05, "loss": 0.0513, "step": 5480 }, { "epoch": 0.28, "learning_rate": 4.366654820531046e-05, "loss": 0.0899, "step": 5490 }, { "epoch": 0.28, "learning_rate": 4.365385591714474e-05, "loss": 0.034, "step": 5500 }, { "epoch": 0.28, "learning_rate": 4.364116362897904e-05, "loss": 0.0246, "step": 5510 }, { "epoch": 0.28, "learning_rate": 4.362847134081333e-05, "loss": 0.0923, "step": 5520 }, { "epoch": 0.28, "learning_rate": 4.361577905264762e-05, "loss": 0.1321, "step": 5530 }, { "epoch": 0.28, "learning_rate": 4.360308676448191e-05, "loss": 0.0365, "step": 5540 }, { "epoch": 0.28, "learning_rate": 4.359039447631619e-05, "loss": 0.0376, "step": 5550 }, { "epoch": 0.28, "learning_rate": 4.357770218815048e-05, "loss": 0.0116, "step": 5560 }, { "epoch": 0.28, "learning_rate": 4.356500989998477e-05, "loss": 0.0684, "step": 5570 }, { "epoch": 0.28, "learning_rate": 4.355231761181906e-05, "loss": 0.0593, "step": 5580 }, { "epoch": 0.28, "learning_rate": 4.353962532365335e-05, "loss": 0.0549, "step": 5590 }, { "epoch": 0.28, "learning_rate": 4.352693303548764e-05, "loss": 0.0818, "step": 5600 }, { "epoch": 0.28, "learning_rate": 4.351424074732193e-05, "loss": 0.0816, "step": 5610 }, { "epoch": 0.28, "learning_rate": 4.350154845915621e-05, "loss": 0.0523, "step": 5620 }, { "epoch": 0.28, "learning_rate": 4.348885617099051e-05, "loss": 0.0738, "step": 5630 }, { "epoch": 0.28, "learning_rate": 4.34761638828248e-05, "loss": 0.0442, "step": 5640 }, { "epoch": 0.28, "learning_rate": 4.346347159465909e-05, "loss": 0.0461, "step": 5650 }, { "epoch": 0.28, "learning_rate": 4.345077930649338e-05, "loss": 0.0304, "step": 5660 }, { "epoch": 0.28, "learning_rate": 4.343808701832767e-05, "loss": 0.0612, "step": 5670 }, { "epoch": 0.28, "learning_rate": 4.342539473016195e-05, "loss": 0.0366, "step": 5680 }, { "epoch": 0.29, "learning_rate": 4.341270244199624e-05, "loss": 0.1129, "step": 5690 }, { "epoch": 0.29, "learning_rate": 4.340001015383053e-05, "loss": 0.1273, "step": 5700 }, { "epoch": 0.29, "learning_rate": 4.338731786566482e-05, "loss": 0.0445, "step": 5710 }, { "epoch": 0.29, "learning_rate": 4.337462557749911e-05, "loss": 0.0301, "step": 5720 }, { "epoch": 0.29, "learning_rate": 4.33619332893334e-05, "loss": 0.0534, "step": 5730 }, { "epoch": 0.29, "learning_rate": 4.334924100116769e-05, "loss": 0.0431, "step": 5740 }, { "epoch": 0.29, "learning_rate": 4.333654871300198e-05, "loss": 0.0478, "step": 5750 }, { "epoch": 0.29, "learning_rate": 4.332385642483627e-05, "loss": 0.0302, "step": 5760 }, { "epoch": 0.29, "learning_rate": 4.331116413667056e-05, "loss": 0.0484, "step": 5770 }, { "epoch": 0.29, "learning_rate": 4.329847184850485e-05, "loss": 0.0779, "step": 5780 }, { "epoch": 0.29, "learning_rate": 4.328577956033914e-05, "loss": 0.0443, "step": 5790 }, { "epoch": 0.29, "learning_rate": 4.3273087272173426e-05, "loss": 0.0607, "step": 5800 }, { "epoch": 0.29, "learning_rate": 4.3260394984007716e-05, "loss": 0.0507, "step": 5810 }, { "epoch": 0.29, "learning_rate": 4.3247702695842006e-05, "loss": 0.0695, "step": 5820 }, { "epoch": 0.29, "learning_rate": 4.3235010407676296e-05, "loss": 0.0342, "step": 5830 }, { "epoch": 0.29, "learning_rate": 4.3222318119510586e-05, "loss": 0.0854, "step": 5840 }, { "epoch": 0.29, "learning_rate": 4.3209625831344876e-05, "loss": 0.0566, "step": 5850 }, { "epoch": 0.29, "learning_rate": 4.3196933543179166e-05, "loss": 0.074, "step": 5860 }, { "epoch": 0.29, "learning_rate": 4.3184241255013456e-05, "loss": 0.0512, "step": 5870 }, { "epoch": 0.29, "learning_rate": 4.3171548966847746e-05, "loss": 0.0682, "step": 5880 }, { "epoch": 0.3, "learning_rate": 4.3158856678682036e-05, "loss": 0.0641, "step": 5890 }, { "epoch": 0.3, "learning_rate": 4.3146164390516326e-05, "loss": 0.0693, "step": 5900 }, { "epoch": 0.3, "learning_rate": 4.3133472102350616e-05, "loss": 0.0264, "step": 5910 }, { "epoch": 0.3, "learning_rate": 4.3120779814184906e-05, "loss": 0.0634, "step": 5920 }, { "epoch": 0.3, "learning_rate": 4.310808752601919e-05, "loss": 0.0941, "step": 5930 }, { "epoch": 0.3, "learning_rate": 4.309539523785348e-05, "loss": 0.1128, "step": 5940 }, { "epoch": 0.3, "learning_rate": 4.308270294968777e-05, "loss": 0.0247, "step": 5950 }, { "epoch": 0.3, "learning_rate": 4.307001066152206e-05, "loss": 0.0643, "step": 5960 }, { "epoch": 0.3, "learning_rate": 4.3057318373356356e-05, "loss": 0.035, "step": 5970 }, { "epoch": 0.3, "learning_rate": 4.304462608519064e-05, "loss": 0.0409, "step": 5980 }, { "epoch": 0.3, "learning_rate": 4.303193379702493e-05, "loss": 0.0485, "step": 5990 }, { "epoch": 0.3, "learning_rate": 4.301924150885922e-05, "loss": 0.0445, "step": 6000 }, { "epoch": 0.3, "learning_rate": 4.300654922069351e-05, "loss": 0.0217, "step": 6010 }, { "epoch": 0.3, "learning_rate": 4.29938569325278e-05, "loss": 0.0441, "step": 6020 }, { "epoch": 0.3, "learning_rate": 4.298116464436209e-05, "loss": 0.0427, "step": 6030 }, { "epoch": 0.3, "learning_rate": 4.296847235619638e-05, "loss": 0.0559, "step": 6040 }, { "epoch": 0.3, "learning_rate": 4.295578006803066e-05, "loss": 0.0963, "step": 6050 }, { "epoch": 0.3, "learning_rate": 4.294308777986495e-05, "loss": 0.0734, "step": 6060 }, { "epoch": 0.3, "learning_rate": 4.293039549169924e-05, "loss": 0.1637, "step": 6070 }, { "epoch": 0.3, "learning_rate": 4.291770320353353e-05, "loss": 0.0197, "step": 6080 }, { "epoch": 0.31, "learning_rate": 4.290501091536783e-05, "loss": 0.0442, "step": 6090 }, { "epoch": 0.31, "learning_rate": 4.289231862720212e-05, "loss": 0.059, "step": 6100 }, { "epoch": 0.31, "learning_rate": 4.28796263390364e-05, "loss": 0.0589, "step": 6110 }, { "epoch": 0.31, "learning_rate": 4.286693405087069e-05, "loss": 0.0413, "step": 6120 }, { "epoch": 0.31, "learning_rate": 4.285424176270498e-05, "loss": 0.0732, "step": 6130 }, { "epoch": 0.31, "learning_rate": 4.284154947453927e-05, "loss": 0.055, "step": 6140 }, { "epoch": 0.31, "learning_rate": 4.282885718637356e-05, "loss": 0.0998, "step": 6150 }, { "epoch": 0.31, "learning_rate": 4.281616489820785e-05, "loss": 0.0628, "step": 6160 }, { "epoch": 0.31, "learning_rate": 4.280347261004214e-05, "loss": 0.0633, "step": 6170 }, { "epoch": 0.31, "learning_rate": 4.2790780321876425e-05, "loss": 0.0511, "step": 6180 }, { "epoch": 0.31, "learning_rate": 4.2778088033710715e-05, "loss": 0.049, "step": 6190 }, { "epoch": 0.31, "learning_rate": 4.276539574554501e-05, "loss": 0.0466, "step": 6200 }, { "epoch": 0.31, "learning_rate": 4.27527034573793e-05, "loss": 0.086, "step": 6210 }, { "epoch": 0.31, "learning_rate": 4.274001116921359e-05, "loss": 0.0484, "step": 6220 }, { "epoch": 0.31, "learning_rate": 4.2727318881047875e-05, "loss": 0.0407, "step": 6230 }, { "epoch": 0.31, "learning_rate": 4.2714626592882165e-05, "loss": 0.0903, "step": 6240 }, { "epoch": 0.31, "learning_rate": 4.2701934304716455e-05, "loss": 0.0604, "step": 6250 }, { "epoch": 0.31, "learning_rate": 4.2689242016550745e-05, "loss": 0.0496, "step": 6260 }, { "epoch": 0.31, "learning_rate": 4.2676549728385035e-05, "loss": 0.048, "step": 6270 }, { "epoch": 0.31, "learning_rate": 4.2663857440219325e-05, "loss": 0.0465, "step": 6280 }, { "epoch": 0.32, "learning_rate": 4.2651165152053615e-05, "loss": 0.0075, "step": 6290 }, { "epoch": 0.32, "learning_rate": 4.26384728638879e-05, "loss": 0.0551, "step": 6300 }, { "epoch": 0.32, "learning_rate": 4.262578057572219e-05, "loss": 0.0965, "step": 6310 }, { "epoch": 0.32, "learning_rate": 4.2613088287556485e-05, "loss": 0.0368, "step": 6320 }, { "epoch": 0.32, "learning_rate": 4.2600395999390775e-05, "loss": 0.0919, "step": 6330 }, { "epoch": 0.32, "learning_rate": 4.2587703711225065e-05, "loss": 0.0525, "step": 6340 }, { "epoch": 0.32, "learning_rate": 4.2575011423059355e-05, "loss": 0.0894, "step": 6350 }, { "epoch": 0.32, "learning_rate": 4.256231913489364e-05, "loss": 0.0428, "step": 6360 }, { "epoch": 0.32, "learning_rate": 4.254962684672793e-05, "loss": 0.0742, "step": 6370 }, { "epoch": 0.32, "learning_rate": 4.253693455856222e-05, "loss": 0.106, "step": 6380 }, { "epoch": 0.32, "learning_rate": 4.252424227039651e-05, "loss": 0.0664, "step": 6390 }, { "epoch": 0.32, "learning_rate": 4.25115499822308e-05, "loss": 0.0473, "step": 6400 }, { "epoch": 0.32, "learning_rate": 4.249885769406509e-05, "loss": 0.0706, "step": 6410 }, { "epoch": 0.32, "learning_rate": 4.248616540589938e-05, "loss": 0.044, "step": 6420 }, { "epoch": 0.32, "learning_rate": 4.247347311773367e-05, "loss": 0.1, "step": 6430 }, { "epoch": 0.32, "learning_rate": 4.246078082956796e-05, "loss": 0.0769, "step": 6440 }, { "epoch": 0.32, "learning_rate": 4.244808854140225e-05, "loss": 0.0773, "step": 6450 }, { "epoch": 0.32, "learning_rate": 4.243539625323654e-05, "loss": 0.0303, "step": 6460 }, { "epoch": 0.32, "learning_rate": 4.242270396507083e-05, "loss": 0.0567, "step": 6470 }, { "epoch": 0.32, "learning_rate": 4.241001167690511e-05, "loss": 0.0934, "step": 6480 }, { "epoch": 0.33, "learning_rate": 4.23973193887394e-05, "loss": 0.028, "step": 6490 }, { "epoch": 0.33, "learning_rate": 4.238462710057369e-05, "loss": 0.0463, "step": 6500 }, { "epoch": 0.33, "learning_rate": 4.237193481240798e-05, "loss": 0.063, "step": 6510 }, { "epoch": 0.33, "learning_rate": 4.235924252424227e-05, "loss": 0.0774, "step": 6520 }, { "epoch": 0.33, "learning_rate": 4.234655023607656e-05, "loss": 0.0518, "step": 6530 }, { "epoch": 0.33, "learning_rate": 4.233385794791085e-05, "loss": 0.045, "step": 6540 }, { "epoch": 0.33, "learning_rate": 4.232116565974514e-05, "loss": 0.0248, "step": 6550 }, { "epoch": 0.33, "learning_rate": 4.230847337157943e-05, "loss": 0.0407, "step": 6560 }, { "epoch": 0.33, "learning_rate": 4.229578108341372e-05, "loss": 0.0305, "step": 6570 }, { "epoch": 0.33, "learning_rate": 4.228308879524801e-05, "loss": 0.0884, "step": 6580 }, { "epoch": 0.33, "learning_rate": 4.22703965070823e-05, "loss": 0.0597, "step": 6590 }, { "epoch": 0.33, "learning_rate": 4.225770421891659e-05, "loss": 0.0548, "step": 6600 }, { "epoch": 0.33, "learning_rate": 4.2245011930750875e-05, "loss": 0.0454, "step": 6610 }, { "epoch": 0.33, "learning_rate": 4.2232319642585165e-05, "loss": 0.0458, "step": 6620 }, { "epoch": 0.33, "learning_rate": 4.2219627354419455e-05, "loss": 0.0549, "step": 6630 }, { "epoch": 0.33, "learning_rate": 4.2206935066253745e-05, "loss": 0.025, "step": 6640 }, { "epoch": 0.33, "learning_rate": 4.2194242778088035e-05, "loss": 0.0649, "step": 6650 }, { "epoch": 0.33, "learning_rate": 4.2181550489922325e-05, "loss": 0.0346, "step": 6660 }, { "epoch": 0.33, "learning_rate": 4.2168858201756615e-05, "loss": 0.0848, "step": 6670 }, { "epoch": 0.33, "learning_rate": 4.2156165913590905e-05, "loss": 0.0521, "step": 6680 }, { "epoch": 0.34, "learning_rate": 4.2143473625425195e-05, "loss": 0.0424, "step": 6690 }, { "epoch": 0.34, "learning_rate": 4.2130781337259485e-05, "loss": 0.024, "step": 6700 }, { "epoch": 0.34, "learning_rate": 4.2118089049093775e-05, "loss": 0.0483, "step": 6710 }, { "epoch": 0.34, "learning_rate": 4.2105396760928065e-05, "loss": 0.0839, "step": 6720 }, { "epoch": 0.34, "learning_rate": 4.209270447276235e-05, "loss": 0.1072, "step": 6730 }, { "epoch": 0.34, "learning_rate": 4.208001218459664e-05, "loss": 0.0456, "step": 6740 }, { "epoch": 0.34, "learning_rate": 4.206731989643093e-05, "loss": 0.0645, "step": 6750 }, { "epoch": 0.34, "learning_rate": 4.205462760826522e-05, "loss": 0.0133, "step": 6760 }, { "epoch": 0.34, "learning_rate": 4.204193532009951e-05, "loss": 0.0419, "step": 6770 }, { "epoch": 0.34, "learning_rate": 4.2029243031933804e-05, "loss": 0.0677, "step": 6780 }, { "epoch": 0.34, "learning_rate": 4.201655074376809e-05, "loss": 0.0494, "step": 6790 }, { "epoch": 0.34, "learning_rate": 4.200385845560238e-05, "loss": 0.0329, "step": 6800 }, { "epoch": 0.34, "learning_rate": 4.199116616743667e-05, "loss": 0.0448, "step": 6810 }, { "epoch": 0.34, "learning_rate": 4.197847387927096e-05, "loss": 0.0823, "step": 6820 }, { "epoch": 0.34, "learning_rate": 4.196578159110525e-05, "loss": 0.0489, "step": 6830 }, { "epoch": 0.34, "learning_rate": 4.195308930293954e-05, "loss": 0.0464, "step": 6840 }, { "epoch": 0.34, "learning_rate": 4.194039701477383e-05, "loss": 0.0439, "step": 6850 }, { "epoch": 0.34, "learning_rate": 4.192770472660811e-05, "loss": 0.0269, "step": 6860 }, { "epoch": 0.34, "learning_rate": 4.19150124384424e-05, "loss": 0.0406, "step": 6870 }, { "epoch": 0.34, "learning_rate": 4.190232015027669e-05, "loss": 0.0722, "step": 6880 }, { "epoch": 0.35, "learning_rate": 4.188962786211099e-05, "loss": 0.0895, "step": 6890 }, { "epoch": 0.35, "learning_rate": 4.187693557394528e-05, "loss": 0.064, "step": 6900 }, { "epoch": 0.35, "learning_rate": 4.186424328577956e-05, "loss": 0.0432, "step": 6910 }, { "epoch": 0.35, "learning_rate": 4.185155099761385e-05, "loss": 0.044, "step": 6920 }, { "epoch": 0.35, "learning_rate": 4.183885870944814e-05, "loss": 0.0416, "step": 6930 }, { "epoch": 0.35, "learning_rate": 4.182616642128243e-05, "loss": 0.0629, "step": 6940 }, { "epoch": 0.35, "learning_rate": 4.181347413311672e-05, "loss": 0.0494, "step": 6950 }, { "epoch": 0.35, "learning_rate": 4.180078184495101e-05, "loss": 0.0526, "step": 6960 }, { "epoch": 0.35, "learning_rate": 4.17880895567853e-05, "loss": 0.038, "step": 6970 }, { "epoch": 0.35, "learning_rate": 4.1775397268619584e-05, "loss": 0.09, "step": 6980 }, { "epoch": 0.35, "learning_rate": 4.1762704980453874e-05, "loss": 0.051, "step": 6990 }, { "epoch": 0.35, "learning_rate": 4.1750012692288164e-05, "loss": 0.0675, "step": 7000 }, { "epoch": 0.35, "learning_rate": 4.173732040412246e-05, "loss": 0.0673, "step": 7010 }, { "epoch": 0.35, "learning_rate": 4.172462811595675e-05, "loss": 0.0889, "step": 7020 }, { "epoch": 0.35, "learning_rate": 4.171193582779104e-05, "loss": 0.0366, "step": 7030 }, { "epoch": 0.35, "learning_rate": 4.1699243539625324e-05, "loss": 0.06, "step": 7040 }, { "epoch": 0.35, "learning_rate": 4.1686551251459614e-05, "loss": 0.1069, "step": 7050 }, { "epoch": 0.35, "learning_rate": 4.1673858963293904e-05, "loss": 0.0763, "step": 7060 }, { "epoch": 0.35, "learning_rate": 4.1661166675128194e-05, "loss": 0.0309, "step": 7070 }, { "epoch": 0.35, "learning_rate": 4.1648474386962484e-05, "loss": 0.0807, "step": 7080 }, { "epoch": 0.36, "learning_rate": 4.1635782098796774e-05, "loss": 0.1136, "step": 7090 }, { "epoch": 0.36, "learning_rate": 4.1623089810631064e-05, "loss": 0.0559, "step": 7100 }, { "epoch": 0.36, "learning_rate": 4.161039752246535e-05, "loss": 0.0338, "step": 7110 }, { "epoch": 0.36, "learning_rate": 4.1597705234299644e-05, "loss": 0.0598, "step": 7120 }, { "epoch": 0.36, "learning_rate": 4.1585012946133934e-05, "loss": 0.0523, "step": 7130 }, { "epoch": 0.36, "learning_rate": 4.1572320657968224e-05, "loss": 0.1101, "step": 7140 }, { "epoch": 0.36, "learning_rate": 4.1559628369802514e-05, "loss": 0.108, "step": 7150 }, { "epoch": 0.36, "learning_rate": 4.15469360816368e-05, "loss": 0.0207, "step": 7160 }, { "epoch": 0.36, "learning_rate": 4.153424379347109e-05, "loss": 0.0296, "step": 7170 }, { "epoch": 0.36, "learning_rate": 4.152155150530538e-05, "loss": 0.0698, "step": 7180 }, { "epoch": 0.36, "learning_rate": 4.150885921713967e-05, "loss": 0.0352, "step": 7190 }, { "epoch": 0.36, "learning_rate": 4.149616692897396e-05, "loss": 0.0704, "step": 7200 }, { "epoch": 0.36, "learning_rate": 4.148347464080825e-05, "loss": 0.0496, "step": 7210 }, { "epoch": 0.36, "learning_rate": 4.147078235264254e-05, "loss": 0.0324, "step": 7220 }, { "epoch": 0.36, "learning_rate": 4.145809006447682e-05, "loss": 0.0546, "step": 7230 }, { "epoch": 0.36, "learning_rate": 4.144539777631112e-05, "loss": 0.052, "step": 7240 }, { "epoch": 0.36, "learning_rate": 4.143270548814541e-05, "loss": 0.0706, "step": 7250 }, { "epoch": 0.36, "learning_rate": 4.14200131999797e-05, "loss": 0.0602, "step": 7260 }, { "epoch": 0.36, "learning_rate": 4.140732091181399e-05, "loss": 0.0346, "step": 7270 }, { "epoch": 0.36, "learning_rate": 4.139462862364828e-05, "loss": 0.0459, "step": 7280 }, { "epoch": 0.37, "learning_rate": 4.138193633548256e-05, "loss": 0.0514, "step": 7290 }, { "epoch": 0.37, "learning_rate": 4.136924404731685e-05, "loss": 0.0706, "step": 7300 }, { "epoch": 0.37, "learning_rate": 4.135655175915114e-05, "loss": 0.0422, "step": 7310 }, { "epoch": 0.37, "learning_rate": 4.134385947098543e-05, "loss": 0.0575, "step": 7320 }, { "epoch": 0.37, "learning_rate": 4.133116718281972e-05, "loss": 0.0588, "step": 7330 }, { "epoch": 0.37, "learning_rate": 4.131847489465401e-05, "loss": 0.033, "step": 7340 }, { "epoch": 0.37, "learning_rate": 4.13057826064883e-05, "loss": 0.0754, "step": 7350 }, { "epoch": 0.37, "learning_rate": 4.129309031832259e-05, "loss": 0.0533, "step": 7360 }, { "epoch": 0.37, "learning_rate": 4.128039803015688e-05, "loss": 0.0434, "step": 7370 }, { "epoch": 0.37, "learning_rate": 4.126770574199117e-05, "loss": 0.066, "step": 7380 }, { "epoch": 0.37, "learning_rate": 4.125501345382546e-05, "loss": 0.0674, "step": 7390 }, { "epoch": 0.37, "learning_rate": 4.124232116565975e-05, "loss": 0.0449, "step": 7400 }, { "epoch": 0.37, "learning_rate": 4.122962887749403e-05, "loss": 0.04, "step": 7410 }, { "epoch": 0.37, "learning_rate": 4.121693658932832e-05, "loss": 0.1155, "step": 7420 }, { "epoch": 0.37, "learning_rate": 4.120424430116261e-05, "loss": 0.0373, "step": 7430 }, { "epoch": 0.37, "learning_rate": 4.11915520129969e-05, "loss": 0.0631, "step": 7440 }, { "epoch": 0.37, "learning_rate": 4.117885972483119e-05, "loss": 0.066, "step": 7450 }, { "epoch": 0.37, "learning_rate": 4.116616743666548e-05, "loss": 0.0641, "step": 7460 }, { "epoch": 0.37, "learning_rate": 4.115347514849977e-05, "loss": 0.0427, "step": 7470 }, { "epoch": 0.37, "learning_rate": 4.114078286033406e-05, "loss": 0.0277, "step": 7480 }, { "epoch": 0.38, "learning_rate": 4.112809057216835e-05, "loss": 0.0499, "step": 7490 }, { "epoch": 0.38, "learning_rate": 4.111539828400264e-05, "loss": 0.0403, "step": 7500 }, { "epoch": 0.38, "learning_rate": 4.110270599583693e-05, "loss": 0.0886, "step": 7510 }, { "epoch": 0.38, "learning_rate": 4.109001370767122e-05, "loss": 0.0266, "step": 7520 }, { "epoch": 0.38, "learning_rate": 4.107732141950551e-05, "loss": 0.0563, "step": 7530 }, { "epoch": 0.38, "learning_rate": 4.1064629131339796e-05, "loss": 0.0566, "step": 7540 }, { "epoch": 0.38, "learning_rate": 4.1051936843174086e-05, "loss": 0.0538, "step": 7550 }, { "epoch": 0.38, "learning_rate": 4.1039244555008376e-05, "loss": 0.0871, "step": 7560 }, { "epoch": 0.38, "learning_rate": 4.1026552266842666e-05, "loss": 0.0531, "step": 7570 }, { "epoch": 0.38, "learning_rate": 4.101385997867696e-05, "loss": 0.0581, "step": 7580 }, { "epoch": 0.38, "learning_rate": 4.1001167690511246e-05, "loss": 0.038, "step": 7590 }, { "epoch": 0.38, "learning_rate": 4.0988475402345536e-05, "loss": 0.0907, "step": 7600 }, { "epoch": 0.38, "learning_rate": 4.0975783114179826e-05, "loss": 0.0345, "step": 7610 }, { "epoch": 0.38, "learning_rate": 4.0963090826014116e-05, "loss": 0.0547, "step": 7620 }, { "epoch": 0.38, "learning_rate": 4.0950398537848406e-05, "loss": 0.0409, "step": 7630 }, { "epoch": 0.38, "learning_rate": 4.0937706249682696e-05, "loss": 0.0437, "step": 7640 }, { "epoch": 0.38, "learning_rate": 4.0925013961516986e-05, "loss": 0.0572, "step": 7650 }, { "epoch": 0.38, "learning_rate": 4.091232167335127e-05, "loss": 0.0988, "step": 7660 }, { "epoch": 0.38, "learning_rate": 4.089962938518556e-05, "loss": 0.062, "step": 7670 }, { "epoch": 0.39, "learning_rate": 4.088693709701985e-05, "loss": 0.0369, "step": 7680 }, { "epoch": 0.39, "learning_rate": 4.087424480885414e-05, "loss": 0.0681, "step": 7690 }, { "epoch": 0.39, "learning_rate": 4.0861552520688436e-05, "loss": 0.0633, "step": 7700 }, { "epoch": 0.39, "learning_rate": 4.0848860232522726e-05, "loss": 0.0435, "step": 7710 }, { "epoch": 0.39, "learning_rate": 4.083616794435701e-05, "loss": 0.0444, "step": 7720 }, { "epoch": 0.39, "learning_rate": 4.08234756561913e-05, "loss": 0.0587, "step": 7730 }, { "epoch": 0.39, "learning_rate": 4.081078336802559e-05, "loss": 0.119, "step": 7740 }, { "epoch": 0.39, "learning_rate": 4.079809107985988e-05, "loss": 0.0973, "step": 7750 }, { "epoch": 0.39, "learning_rate": 4.078539879169417e-05, "loss": 0.0589, "step": 7760 }, { "epoch": 0.39, "learning_rate": 4.077270650352846e-05, "loss": 0.0909, "step": 7770 }, { "epoch": 0.39, "learning_rate": 4.076001421536275e-05, "loss": 0.0605, "step": 7780 }, { "epoch": 0.39, "learning_rate": 4.074732192719703e-05, "loss": 0.0478, "step": 7790 }, { "epoch": 0.39, "learning_rate": 4.073462963903132e-05, "loss": 0.0539, "step": 7800 }, { "epoch": 0.39, "learning_rate": 4.072193735086561e-05, "loss": 0.048, "step": 7810 }, { "epoch": 0.39, "learning_rate": 4.070924506269991e-05, "loss": 0.0628, "step": 7820 }, { "epoch": 0.39, "learning_rate": 4.06965527745342e-05, "loss": 0.0679, "step": 7830 }, { "epoch": 0.39, "learning_rate": 4.068386048636848e-05, "loss": 0.0792, "step": 7840 }, { "epoch": 0.39, "learning_rate": 4.067116819820277e-05, "loss": 0.07, "step": 7850 }, { "epoch": 0.39, "learning_rate": 4.065847591003706e-05, "loss": 0.0344, "step": 7860 }, { "epoch": 0.39, "learning_rate": 4.064578362187135e-05, "loss": 0.0388, "step": 7870 }, { "epoch": 0.4, "learning_rate": 4.063309133370564e-05, "loss": 0.0687, "step": 7880 }, { "epoch": 0.4, "learning_rate": 4.062039904553993e-05, "loss": 0.0533, "step": 7890 }, { "epoch": 0.4, "learning_rate": 4.060770675737422e-05, "loss": 0.0344, "step": 7900 }, { "epoch": 0.4, "learning_rate": 4.0595014469208506e-05, "loss": 0.083, "step": 7910 }, { "epoch": 0.4, "learning_rate": 4.0582322181042796e-05, "loss": 0.0425, "step": 7920 }, { "epoch": 0.4, "learning_rate": 4.056962989287709e-05, "loss": 0.1469, "step": 7930 }, { "epoch": 0.4, "learning_rate": 4.055693760471138e-05, "loss": 0.0704, "step": 7940 }, { "epoch": 0.4, "learning_rate": 4.054424531654567e-05, "loss": 0.0652, "step": 7950 }, { "epoch": 0.4, "learning_rate": 4.053155302837996e-05, "loss": 0.0712, "step": 7960 }, { "epoch": 0.4, "learning_rate": 4.0518860740214246e-05, "loss": 0.0508, "step": 7970 }, { "epoch": 0.4, "learning_rate": 4.0506168452048536e-05, "loss": 0.0514, "step": 7980 }, { "epoch": 0.4, "learning_rate": 4.0493476163882826e-05, "loss": 0.0987, "step": 7990 }, { "epoch": 0.4, "learning_rate": 4.0480783875717116e-05, "loss": 0.0881, "step": 8000 }, { "epoch": 0.4, "learning_rate": 4.0468091587551406e-05, "loss": 0.0912, "step": 8010 }, { "epoch": 0.4, "learning_rate": 4.0455399299385696e-05, "loss": 0.0985, "step": 8020 }, { "epoch": 0.4, "learning_rate": 4.0442707011219985e-05, "loss": 0.0719, "step": 8030 }, { "epoch": 0.4, "learning_rate": 4.043001472305427e-05, "loss": 0.0423, "step": 8040 }, { "epoch": 0.4, "learning_rate": 4.0417322434888565e-05, "loss": 0.0827, "step": 8050 }, { "epoch": 0.4, "learning_rate": 4.0404630146722855e-05, "loss": 0.0658, "step": 8060 }, { "epoch": 0.4, "learning_rate": 4.0391937858557145e-05, "loss": 0.0504, "step": 8070 }, { "epoch": 0.41, "learning_rate": 4.0379245570391435e-05, "loss": 0.0499, "step": 8080 }, { "epoch": 0.41, "learning_rate": 4.036655328222572e-05, "loss": 0.1088, "step": 8090 }, { "epoch": 0.41, "learning_rate": 4.035386099406001e-05, "loss": 0.0507, "step": 8100 }, { "epoch": 0.41, "learning_rate": 4.03411687058943e-05, "loss": 0.068, "step": 8110 }, { "epoch": 0.41, "learning_rate": 4.032847641772859e-05, "loss": 0.0659, "step": 8120 }, { "epoch": 0.41, "learning_rate": 4.031578412956288e-05, "loss": 0.0732, "step": 8130 }, { "epoch": 0.41, "learning_rate": 4.030309184139717e-05, "loss": 0.0438, "step": 8140 }, { "epoch": 0.41, "learning_rate": 4.029039955323146e-05, "loss": 0.0274, "step": 8150 }, { "epoch": 0.41, "learning_rate": 4.027770726506575e-05, "loss": 0.0523, "step": 8160 }, { "epoch": 0.41, "learning_rate": 4.026501497690004e-05, "loss": 0.0277, "step": 8170 }, { "epoch": 0.41, "learning_rate": 4.025232268873433e-05, "loss": 0.0569, "step": 8180 }, { "epoch": 0.41, "learning_rate": 4.023963040056862e-05, "loss": 0.0719, "step": 8190 }, { "epoch": 0.41, "learning_rate": 4.022693811240291e-05, "loss": 0.0585, "step": 8200 }, { "epoch": 0.41, "learning_rate": 4.02142458242372e-05, "loss": 0.0677, "step": 8210 }, { "epoch": 0.41, "learning_rate": 4.020155353607148e-05, "loss": 0.0404, "step": 8220 }, { "epoch": 0.41, "learning_rate": 4.018886124790577e-05, "loss": 0.0542, "step": 8230 }, { "epoch": 0.41, "learning_rate": 4.017616895974006e-05, "loss": 0.0484, "step": 8240 }, { "epoch": 0.41, "learning_rate": 4.016347667157435e-05, "loss": 0.0281, "step": 8250 }, { "epoch": 0.41, "learning_rate": 4.015078438340864e-05, "loss": 0.0218, "step": 8260 }, { "epoch": 0.41, "learning_rate": 4.013809209524293e-05, "loss": 0.0828, "step": 8270 }, { "epoch": 0.42, "learning_rate": 4.012539980707722e-05, "loss": 0.0302, "step": 8280 }, { "epoch": 0.42, "learning_rate": 4.011270751891151e-05, "loss": 0.088, "step": 8290 }, { "epoch": 0.42, "learning_rate": 4.01000152307458e-05, "loss": 0.0617, "step": 8300 }, { "epoch": 0.42, "learning_rate": 4.008732294258009e-05, "loss": 0.0234, "step": 8310 }, { "epoch": 0.42, "learning_rate": 4.007463065441438e-05, "loss": 0.0598, "step": 8320 }, { "epoch": 0.42, "learning_rate": 4.006193836624867e-05, "loss": 0.0211, "step": 8330 }, { "epoch": 0.42, "learning_rate": 4.0049246078082955e-05, "loss": 0.049, "step": 8340 }, { "epoch": 0.42, "learning_rate": 4.0036553789917245e-05, "loss": 0.0994, "step": 8350 }, { "epoch": 0.42, "learning_rate": 4.0023861501751535e-05, "loss": 0.055, "step": 8360 }, { "epoch": 0.42, "learning_rate": 4.0011169213585825e-05, "loss": 0.0585, "step": 8370 }, { "epoch": 0.42, "learning_rate": 3.9998476925420115e-05, "loss": 0.0596, "step": 8380 }, { "epoch": 0.42, "learning_rate": 3.998578463725441e-05, "loss": 0.0301, "step": 8390 }, { "epoch": 0.42, "learning_rate": 3.9973092349088695e-05, "loss": 0.0463, "step": 8400 }, { "epoch": 0.42, "learning_rate": 3.9960400060922985e-05, "loss": 0.0421, "step": 8410 }, { "epoch": 0.42, "learning_rate": 3.9947707772757275e-05, "loss": 0.0227, "step": 8420 }, { "epoch": 0.42, "learning_rate": 3.9935015484591565e-05, "loss": 0.0291, "step": 8430 }, { "epoch": 0.42, "learning_rate": 3.9922323196425855e-05, "loss": 0.1088, "step": 8440 }, { "epoch": 0.42, "learning_rate": 3.9909630908260145e-05, "loss": 0.0349, "step": 8450 }, { "epoch": 0.42, "learning_rate": 3.9896938620094435e-05, "loss": 0.0334, "step": 8460 }, { "epoch": 0.42, "learning_rate": 3.988424633192872e-05, "loss": 0.0572, "step": 8470 }, { "epoch": 0.43, "learning_rate": 3.987155404376301e-05, "loss": 0.0863, "step": 8480 }, { "epoch": 0.43, "learning_rate": 3.98588617555973e-05, "loss": 0.0345, "step": 8490 }, { "epoch": 0.43, "learning_rate": 3.984616946743159e-05, "loss": 0.0319, "step": 8500 }, { "epoch": 0.43, "learning_rate": 3.9833477179265885e-05, "loss": 0.0303, "step": 8510 }, { "epoch": 0.43, "learning_rate": 3.9820784891100175e-05, "loss": 0.0342, "step": 8520 }, { "epoch": 0.43, "learning_rate": 3.980809260293446e-05, "loss": 0.1098, "step": 8530 }, { "epoch": 0.43, "learning_rate": 3.979540031476875e-05, "loss": 0.0595, "step": 8540 }, { "epoch": 0.43, "learning_rate": 3.978270802660304e-05, "loss": 0.0292, "step": 8550 }, { "epoch": 0.43, "learning_rate": 3.977001573843733e-05, "loss": 0.0415, "step": 8560 }, { "epoch": 0.43, "learning_rate": 3.975732345027162e-05, "loss": 0.0376, "step": 8570 }, { "epoch": 0.43, "learning_rate": 3.974463116210591e-05, "loss": 0.0414, "step": 8580 }, { "epoch": 0.43, "learning_rate": 3.973193887394019e-05, "loss": 0.0492, "step": 8590 }, { "epoch": 0.43, "learning_rate": 3.971924658577448e-05, "loss": 0.0633, "step": 8600 }, { "epoch": 0.43, "learning_rate": 3.970655429760877e-05, "loss": 0.0461, "step": 8610 }, { "epoch": 0.43, "learning_rate": 3.969386200944307e-05, "loss": 0.0519, "step": 8620 }, { "epoch": 0.43, "learning_rate": 3.968116972127736e-05, "loss": 0.0316, "step": 8630 }, { "epoch": 0.43, "learning_rate": 3.966847743311165e-05, "loss": 0.0574, "step": 8640 }, { "epoch": 0.43, "learning_rate": 3.965578514494593e-05, "loss": 0.0215, "step": 8650 }, { "epoch": 0.43, "learning_rate": 3.964309285678022e-05, "loss": 0.0398, "step": 8660 }, { "epoch": 0.43, "learning_rate": 3.963040056861451e-05, "loss": 0.0663, "step": 8670 }, { "epoch": 0.44, "learning_rate": 3.96177082804488e-05, "loss": 0.0671, "step": 8680 }, { "epoch": 0.44, "learning_rate": 3.960501599228309e-05, "loss": 0.0755, "step": 8690 }, { "epoch": 0.44, "learning_rate": 3.959232370411738e-05, "loss": 0.0739, "step": 8700 }, { "epoch": 0.44, "learning_rate": 3.957963141595167e-05, "loss": 0.0404, "step": 8710 }, { "epoch": 0.44, "learning_rate": 3.9566939127785954e-05, "loss": 0.056, "step": 8720 }, { "epoch": 0.44, "learning_rate": 3.9554246839620244e-05, "loss": 0.0451, "step": 8730 }, { "epoch": 0.44, "learning_rate": 3.954155455145454e-05, "loss": 0.0819, "step": 8740 }, { "epoch": 0.44, "learning_rate": 3.952886226328883e-05, "loss": 0.0799, "step": 8750 }, { "epoch": 0.44, "learning_rate": 3.951616997512312e-05, "loss": 0.0514, "step": 8760 }, { "epoch": 0.44, "learning_rate": 3.9503477686957404e-05, "loss": 0.0504, "step": 8770 }, { "epoch": 0.44, "learning_rate": 3.9490785398791694e-05, "loss": 0.0786, "step": 8780 }, { "epoch": 0.44, "learning_rate": 3.9478093110625984e-05, "loss": 0.0347, "step": 8790 }, { "epoch": 0.44, "learning_rate": 3.9465400822460274e-05, "loss": 0.0353, "step": 8800 }, { "epoch": 0.44, "learning_rate": 3.9452708534294564e-05, "loss": 0.0484, "step": 8810 }, { "epoch": 0.44, "learning_rate": 3.9440016246128854e-05, "loss": 0.0349, "step": 8820 }, { "epoch": 0.44, "learning_rate": 3.9427323957963144e-05, "loss": 0.0717, "step": 8830 }, { "epoch": 0.44, "learning_rate": 3.941463166979743e-05, "loss": 0.051, "step": 8840 }, { "epoch": 0.44, "learning_rate": 3.9401939381631724e-05, "loss": 0.0665, "step": 8850 }, { "epoch": 0.44, "learning_rate": 3.9389247093466014e-05, "loss": 0.0755, "step": 8860 }, { "epoch": 0.44, "learning_rate": 3.9376554805300304e-05, "loss": 0.0897, "step": 8870 }, { "epoch": 0.45, "learning_rate": 3.9363862517134594e-05, "loss": 0.0243, "step": 8880 }, { "epoch": 0.45, "learning_rate": 3.9351170228968884e-05, "loss": 0.0326, "step": 8890 }, { "epoch": 0.45, "learning_rate": 3.933847794080317e-05, "loss": 0.0967, "step": 8900 }, { "epoch": 0.45, "learning_rate": 3.932578565263746e-05, "loss": 0.0474, "step": 8910 }, { "epoch": 0.45, "learning_rate": 3.931309336447175e-05, "loss": 0.0353, "step": 8920 }, { "epoch": 0.45, "learning_rate": 3.930040107630604e-05, "loss": 0.0599, "step": 8930 }, { "epoch": 0.45, "learning_rate": 3.928770878814033e-05, "loss": 0.077, "step": 8940 }, { "epoch": 0.45, "learning_rate": 3.927501649997462e-05, "loss": 0.0149, "step": 8950 }, { "epoch": 0.45, "learning_rate": 3.926232421180891e-05, "loss": 0.0534, "step": 8960 }, { "epoch": 0.45, "learning_rate": 3.92496319236432e-05, "loss": 0.0598, "step": 8970 }, { "epoch": 0.45, "learning_rate": 3.923693963547749e-05, "loss": 0.0491, "step": 8980 }, { "epoch": 0.45, "learning_rate": 3.922424734731178e-05, "loss": 0.0566, "step": 8990 }, { "epoch": 0.45, "learning_rate": 3.921155505914607e-05, "loss": 0.0339, "step": 9000 }, { "epoch": 0.45, "learning_rate": 3.919886277098036e-05, "loss": 0.0872, "step": 9010 }, { "epoch": 0.45, "learning_rate": 3.918617048281464e-05, "loss": 0.1197, "step": 9020 }, { "epoch": 0.45, "learning_rate": 3.917347819464893e-05, "loss": 0.0526, "step": 9030 }, { "epoch": 0.45, "learning_rate": 3.916078590648322e-05, "loss": 0.0919, "step": 9040 }, { "epoch": 0.45, "learning_rate": 3.914809361831751e-05, "loss": 0.0552, "step": 9050 }, { "epoch": 0.45, "learning_rate": 3.91354013301518e-05, "loss": 0.1246, "step": 9060 }, { "epoch": 0.45, "learning_rate": 3.912270904198609e-05, "loss": 0.1218, "step": 9070 }, { "epoch": 0.46, "learning_rate": 3.911001675382038e-05, "loss": 0.0394, "step": 9080 }, { "epoch": 0.46, "learning_rate": 3.909732446565467e-05, "loss": 0.0297, "step": 9090 }, { "epoch": 0.46, "learning_rate": 3.908463217748896e-05, "loss": 0.149, "step": 9100 }, { "epoch": 0.46, "learning_rate": 3.907193988932325e-05, "loss": 0.0838, "step": 9110 }, { "epoch": 0.46, "learning_rate": 3.905924760115754e-05, "loss": 0.0736, "step": 9120 }, { "epoch": 0.46, "learning_rate": 3.904655531299183e-05, "loss": 0.0615, "step": 9130 }, { "epoch": 0.46, "learning_rate": 3.903386302482612e-05, "loss": 0.0439, "step": 9140 }, { "epoch": 0.46, "learning_rate": 3.9021170736660403e-05, "loss": 0.0838, "step": 9150 }, { "epoch": 0.46, "learning_rate": 3.9008478448494693e-05, "loss": 0.0322, "step": 9160 }, { "epoch": 0.46, "learning_rate": 3.899578616032898e-05, "loss": 0.0519, "step": 9170 }, { "epoch": 0.46, "learning_rate": 3.898309387216327e-05, "loss": 0.0357, "step": 9180 }, { "epoch": 0.46, "learning_rate": 3.897040158399756e-05, "loss": 0.0555, "step": 9190 }, { "epoch": 0.46, "learning_rate": 3.895770929583186e-05, "loss": 0.0929, "step": 9200 }, { "epoch": 0.46, "learning_rate": 3.894501700766614e-05, "loss": 0.0766, "step": 9210 }, { "epoch": 0.46, "learning_rate": 3.893232471950043e-05, "loss": 0.1127, "step": 9220 }, { "epoch": 0.46, "learning_rate": 3.891963243133472e-05, "loss": 0.0685, "step": 9230 }, { "epoch": 0.46, "learning_rate": 3.890694014316901e-05, "loss": 0.0444, "step": 9240 }, { "epoch": 0.46, "learning_rate": 3.88942478550033e-05, "loss": 0.1337, "step": 9250 }, { "epoch": 0.46, "learning_rate": 3.888155556683759e-05, "loss": 0.0415, "step": 9260 }, { "epoch": 0.46, "learning_rate": 3.8868863278671877e-05, "loss": 0.1125, "step": 9270 }, { "epoch": 0.47, "learning_rate": 3.8856170990506166e-05, "loss": 0.0459, "step": 9280 }, { "epoch": 0.47, "learning_rate": 3.8843478702340456e-05, "loss": 0.0523, "step": 9290 }, { "epoch": 0.47, "learning_rate": 3.8830786414174746e-05, "loss": 0.0511, "step": 9300 }, { "epoch": 0.47, "learning_rate": 3.881809412600904e-05, "loss": 0.0431, "step": 9310 }, { "epoch": 0.47, "learning_rate": 3.880540183784333e-05, "loss": 0.104, "step": 9320 }, { "epoch": 0.47, "learning_rate": 3.8792709549677616e-05, "loss": 0.066, "step": 9330 }, { "epoch": 0.47, "learning_rate": 3.8780017261511906e-05, "loss": 0.0304, "step": 9340 }, { "epoch": 0.47, "learning_rate": 3.8767324973346196e-05, "loss": 0.0357, "step": 9350 }, { "epoch": 0.47, "learning_rate": 3.8754632685180486e-05, "loss": 0.0456, "step": 9360 }, { "epoch": 0.47, "learning_rate": 3.8741940397014776e-05, "loss": 0.0524, "step": 9370 }, { "epoch": 0.47, "learning_rate": 3.8729248108849066e-05, "loss": 0.0379, "step": 9380 }, { "epoch": 0.47, "learning_rate": 3.8716555820683356e-05, "loss": 0.0633, "step": 9390 }, { "epoch": 0.47, "learning_rate": 3.870386353251764e-05, "loss": 0.0395, "step": 9400 }, { "epoch": 0.47, "learning_rate": 3.869117124435193e-05, "loss": 0.0639, "step": 9410 }, { "epoch": 0.47, "learning_rate": 3.867847895618622e-05, "loss": 0.0544, "step": 9420 }, { "epoch": 0.47, "learning_rate": 3.8665786668020516e-05, "loss": 0.0394, "step": 9430 }, { "epoch": 0.47, "learning_rate": 3.8653094379854806e-05, "loss": 0.0328, "step": 9440 }, { "epoch": 0.47, "learning_rate": 3.8640402091689096e-05, "loss": 0.028, "step": 9450 }, { "epoch": 0.47, "learning_rate": 3.862770980352338e-05, "loss": 0.0387, "step": 9460 }, { "epoch": 0.47, "learning_rate": 3.861501751535767e-05, "loss": 0.0619, "step": 9470 }, { "epoch": 0.48, "learning_rate": 3.860232522719196e-05, "loss": 0.0406, "step": 9480 }, { "epoch": 0.48, "learning_rate": 3.858963293902625e-05, "loss": 0.0521, "step": 9490 }, { "epoch": 0.48, "learning_rate": 3.857694065086054e-05, "loss": 0.0738, "step": 9500 }, { "epoch": 0.48, "learning_rate": 3.856424836269483e-05, "loss": 0.0929, "step": 9510 }, { "epoch": 0.48, "learning_rate": 3.855155607452911e-05, "loss": 0.0768, "step": 9520 }, { "epoch": 0.48, "learning_rate": 3.85388637863634e-05, "loss": 0.1154, "step": 9530 }, { "epoch": 0.48, "learning_rate": 3.85261714981977e-05, "loss": 0.0675, "step": 9540 }, { "epoch": 0.48, "learning_rate": 3.851347921003199e-05, "loss": 0.0553, "step": 9550 }, { "epoch": 0.48, "learning_rate": 3.850078692186628e-05, "loss": 0.102, "step": 9560 }, { "epoch": 0.48, "learning_rate": 3.848809463370057e-05, "loss": 0.0591, "step": 9570 }, { "epoch": 0.48, "learning_rate": 3.847540234553485e-05, "loss": 0.0297, "step": 9580 }, { "epoch": 0.48, "learning_rate": 3.846271005736914e-05, "loss": 0.0541, "step": 9590 }, { "epoch": 0.48, "learning_rate": 3.845001776920343e-05, "loss": 0.0418, "step": 9600 }, { "epoch": 0.48, "learning_rate": 3.843732548103772e-05, "loss": 0.0769, "step": 9610 }, { "epoch": 0.48, "learning_rate": 3.842463319287201e-05, "loss": 0.0314, "step": 9620 }, { "epoch": 0.48, "learning_rate": 3.84119409047063e-05, "loss": 0.0215, "step": 9630 }, { "epoch": 0.48, "learning_rate": 3.839924861654059e-05, "loss": 0.0581, "step": 9640 }, { "epoch": 0.48, "learning_rate": 3.8386556328374876e-05, "loss": 0.0231, "step": 9650 }, { "epoch": 0.48, "learning_rate": 3.837386404020917e-05, "loss": 0.0259, "step": 9660 }, { "epoch": 0.48, "learning_rate": 3.836117175204346e-05, "loss": 0.0196, "step": 9670 }, { "epoch": 0.49, "learning_rate": 3.834847946387775e-05, "loss": 0.086, "step": 9680 }, { "epoch": 0.49, "learning_rate": 3.833578717571204e-05, "loss": 0.0475, "step": 9690 }, { "epoch": 0.49, "learning_rate": 3.832309488754633e-05, "loss": 0.0588, "step": 9700 }, { "epoch": 0.49, "learning_rate": 3.8310402599380616e-05, "loss": 0.0555, "step": 9710 }, { "epoch": 0.49, "learning_rate": 3.8297710311214906e-05, "loss": 0.118, "step": 9720 }, { "epoch": 0.49, "learning_rate": 3.8285018023049196e-05, "loss": 0.0863, "step": 9730 }, { "epoch": 0.49, "learning_rate": 3.8272325734883486e-05, "loss": 0.0431, "step": 9740 }, { "epoch": 0.49, "learning_rate": 3.8259633446717776e-05, "loss": 0.0699, "step": 9750 }, { "epoch": 0.49, "learning_rate": 3.8246941158552066e-05, "loss": 0.0489, "step": 9760 }, { "epoch": 0.49, "learning_rate": 3.8234248870386356e-05, "loss": 0.0783, "step": 9770 }, { "epoch": 0.49, "learning_rate": 3.8221556582220646e-05, "loss": 0.0844, "step": 9780 }, { "epoch": 0.49, "learning_rate": 3.8208864294054936e-05, "loss": 0.0659, "step": 9790 }, { "epoch": 0.49, "learning_rate": 3.8196172005889226e-05, "loss": 0.0492, "step": 9800 }, { "epoch": 0.49, "learning_rate": 3.8183479717723516e-05, "loss": 0.059, "step": 9810 }, { "epoch": 0.49, "learning_rate": 3.8170787429557806e-05, "loss": 0.0315, "step": 9820 }, { "epoch": 0.49, "learning_rate": 3.815809514139209e-05, "loss": 0.0546, "step": 9830 }, { "epoch": 0.49, "learning_rate": 3.814540285322638e-05, "loss": 0.0735, "step": 9840 }, { "epoch": 0.49, "learning_rate": 3.813271056506067e-05, "loss": 0.0703, "step": 9850 }, { "epoch": 0.49, "learning_rate": 3.812001827689496e-05, "loss": 0.088, "step": 9860 }, { "epoch": 0.49, "learning_rate": 3.810732598872925e-05, "loss": 0.0796, "step": 9870 }, { "epoch": 0.5, "learning_rate": 3.809463370056354e-05, "loss": 0.0753, "step": 9880 }, { "epoch": 0.5, "learning_rate": 3.808194141239783e-05, "loss": 0.073, "step": 9890 }, { "epoch": 0.5, "learning_rate": 3.806924912423212e-05, "loss": 0.08, "step": 9900 }, { "epoch": 0.5, "learning_rate": 3.805655683606641e-05, "loss": 0.0792, "step": 9910 }, { "epoch": 0.5, "learning_rate": 3.80438645479007e-05, "loss": 0.0876, "step": 9920 }, { "epoch": 0.5, "learning_rate": 3.803117225973499e-05, "loss": 0.0625, "step": 9930 }, { "epoch": 0.5, "learning_rate": 3.801847997156928e-05, "loss": 0.0297, "step": 9940 }, { "epoch": 0.5, "learning_rate": 3.800578768340356e-05, "loss": 0.0649, "step": 9950 }, { "epoch": 0.5, "learning_rate": 3.799309539523785e-05, "loss": 0.0381, "step": 9960 }, { "epoch": 0.5, "learning_rate": 3.798040310707214e-05, "loss": 0.0721, "step": 9970 }, { "epoch": 0.5, "learning_rate": 3.796771081890643e-05, "loss": 0.0565, "step": 9980 }, { "epoch": 0.5, "learning_rate": 3.795501853074072e-05, "loss": 0.0795, "step": 9990 }, { "epoch": 0.5, "learning_rate": 3.794232624257502e-05, "loss": 0.0514, "step": 10000 }, { "epoch": 0.5, "learning_rate": 3.79296339544093e-05, "loss": 0.0599, "step": 10010 }, { "epoch": 0.5, "learning_rate": 3.791694166624359e-05, "loss": 0.1074, "step": 10020 }, { "epoch": 0.5, "learning_rate": 3.790424937807788e-05, "loss": 0.0994, "step": 10030 }, { "epoch": 0.5, "learning_rate": 3.789155708991217e-05, "loss": 0.0205, "step": 10040 }, { "epoch": 0.5, "learning_rate": 3.787886480174646e-05, "loss": 0.0472, "step": 10050 }, { "epoch": 0.5, "learning_rate": 3.786617251358075e-05, "loss": 0.0703, "step": 10060 }, { "epoch": 0.5, "learning_rate": 3.785348022541504e-05, "loss": 0.0573, "step": 10070 }, { "epoch": 0.51, "learning_rate": 3.7840787937249325e-05, "loss": 0.0725, "step": 10080 }, { "epoch": 0.51, "learning_rate": 3.7828095649083615e-05, "loss": 0.0332, "step": 10090 }, { "epoch": 0.51, "learning_rate": 3.7815403360917905e-05, "loss": 0.0616, "step": 10100 }, { "epoch": 0.51, "learning_rate": 3.7802711072752195e-05, "loss": 0.0536, "step": 10110 }, { "epoch": 0.51, "learning_rate": 3.779001878458649e-05, "loss": 0.0517, "step": 10120 }, { "epoch": 0.51, "learning_rate": 3.777732649642078e-05, "loss": 0.0501, "step": 10130 }, { "epoch": 0.51, "learning_rate": 3.7764634208255065e-05, "loss": 0.0242, "step": 10140 }, { "epoch": 0.51, "learning_rate": 3.7751941920089355e-05, "loss": 0.0688, "step": 10150 }, { "epoch": 0.51, "learning_rate": 3.7739249631923645e-05, "loss": 0.0311, "step": 10160 }, { "epoch": 0.51, "learning_rate": 3.7726557343757935e-05, "loss": 0.0858, "step": 10170 }, { "epoch": 0.51, "learning_rate": 3.7713865055592225e-05, "loss": 0.0365, "step": 10180 }, { "epoch": 0.51, "learning_rate": 3.7701172767426515e-05, "loss": 0.0429, "step": 10190 }, { "epoch": 0.51, "learning_rate": 3.76884804792608e-05, "loss": 0.0739, "step": 10200 }, { "epoch": 0.51, "learning_rate": 3.767578819109509e-05, "loss": 0.0586, "step": 10210 }, { "epoch": 0.51, "learning_rate": 3.766309590292938e-05, "loss": 0.081, "step": 10220 }, { "epoch": 0.51, "learning_rate": 3.7650403614763675e-05, "loss": 0.0306, "step": 10230 }, { "epoch": 0.51, "learning_rate": 3.7637711326597965e-05, "loss": 0.0536, "step": 10240 }, { "epoch": 0.51, "learning_rate": 3.7625019038432255e-05, "loss": 0.126, "step": 10250 }, { "epoch": 0.51, "learning_rate": 3.761232675026654e-05, "loss": 0.0765, "step": 10260 }, { "epoch": 0.51, "learning_rate": 3.759963446210083e-05, "loss": 0.0353, "step": 10270 }, { "epoch": 0.52, "learning_rate": 3.758694217393512e-05, "loss": 0.0876, "step": 10280 }, { "epoch": 0.52, "learning_rate": 3.757424988576941e-05, "loss": 0.042, "step": 10290 }, { "epoch": 0.52, "learning_rate": 3.75615575976037e-05, "loss": 0.0904, "step": 10300 }, { "epoch": 0.52, "learning_rate": 3.754886530943799e-05, "loss": 0.055, "step": 10310 }, { "epoch": 0.52, "learning_rate": 3.753617302127228e-05, "loss": 0.0548, "step": 10320 }, { "epoch": 0.52, "learning_rate": 3.752348073310656e-05, "loss": 0.0724, "step": 10330 }, { "epoch": 0.52, "learning_rate": 3.751078844494085e-05, "loss": 0.0483, "step": 10340 }, { "epoch": 0.52, "learning_rate": 3.749809615677515e-05, "loss": 0.0586, "step": 10350 }, { "epoch": 0.52, "learning_rate": 3.748540386860944e-05, "loss": 0.0622, "step": 10360 }, { "epoch": 0.52, "learning_rate": 3.747271158044373e-05, "loss": 0.0592, "step": 10370 }, { "epoch": 0.52, "learning_rate": 3.746001929227802e-05, "loss": 0.0437, "step": 10380 }, { "epoch": 0.52, "learning_rate": 3.74473270041123e-05, "loss": 0.0624, "step": 10390 }, { "epoch": 0.52, "learning_rate": 3.743463471594659e-05, "loss": 0.0338, "step": 10400 }, { "epoch": 0.52, "learning_rate": 3.742194242778088e-05, "loss": 0.0274, "step": 10410 }, { "epoch": 0.52, "learning_rate": 3.740925013961517e-05, "loss": 0.0139, "step": 10420 }, { "epoch": 0.52, "learning_rate": 3.739655785144946e-05, "loss": 0.0856, "step": 10430 }, { "epoch": 0.52, "learning_rate": 3.738386556328375e-05, "loss": 0.0663, "step": 10440 }, { "epoch": 0.52, "learning_rate": 3.7371173275118034e-05, "loss": 0.0851, "step": 10450 }, { "epoch": 0.52, "learning_rate": 3.735848098695233e-05, "loss": 0.0619, "step": 10460 }, { "epoch": 0.52, "learning_rate": 3.734578869878662e-05, "loss": 0.0591, "step": 10470 }, { "epoch": 0.53, "learning_rate": 3.733309641062091e-05, "loss": 0.0792, "step": 10480 }, { "epoch": 0.53, "learning_rate": 3.73204041224552e-05, "loss": 0.0432, "step": 10490 }, { "epoch": 0.53, "learning_rate": 3.730771183428949e-05, "loss": 0.0529, "step": 10500 }, { "epoch": 0.53, "learning_rate": 3.7295019546123774e-05, "loss": 0.0519, "step": 10510 }, { "epoch": 0.53, "learning_rate": 3.7282327257958064e-05, "loss": 0.0194, "step": 10520 }, { "epoch": 0.53, "learning_rate": 3.7269634969792354e-05, "loss": 0.0398, "step": 10530 }, { "epoch": 0.53, "learning_rate": 3.7256942681626644e-05, "loss": 0.096, "step": 10540 }, { "epoch": 0.53, "learning_rate": 3.7244250393460934e-05, "loss": 0.0213, "step": 10550 }, { "epoch": 0.53, "learning_rate": 3.7231558105295224e-05, "loss": 0.0514, "step": 10560 }, { "epoch": 0.53, "learning_rate": 3.7218865817129514e-05, "loss": 0.0534, "step": 10570 }, { "epoch": 0.53, "learning_rate": 3.7206173528963804e-05, "loss": 0.091, "step": 10580 }, { "epoch": 0.53, "learning_rate": 3.7193481240798094e-05, "loss": 0.0445, "step": 10590 }, { "epoch": 0.53, "learning_rate": 3.7180788952632384e-05, "loss": 0.0456, "step": 10600 }, { "epoch": 0.53, "learning_rate": 3.7168096664466674e-05, "loss": 0.0394, "step": 10610 }, { "epoch": 0.53, "learning_rate": 3.7155404376300964e-05, "loss": 0.113, "step": 10620 }, { "epoch": 0.53, "learning_rate": 3.7142712088135254e-05, "loss": 0.0246, "step": 10630 }, { "epoch": 0.53, "learning_rate": 3.713001979996954e-05, "loss": 0.0531, "step": 10640 }, { "epoch": 0.53, "learning_rate": 3.711732751180383e-05, "loss": 0.065, "step": 10650 }, { "epoch": 0.53, "learning_rate": 3.710463522363812e-05, "loss": 0.074, "step": 10660 }, { "epoch": 0.53, "learning_rate": 3.709194293547241e-05, "loss": 0.0637, "step": 10670 }, { "epoch": 0.54, "learning_rate": 3.70792506473067e-05, "loss": 0.0385, "step": 10680 }, { "epoch": 0.54, "learning_rate": 3.706655835914099e-05, "loss": 0.0791, "step": 10690 }, { "epoch": 0.54, "learning_rate": 3.705386607097528e-05, "loss": 0.0517, "step": 10700 }, { "epoch": 0.54, "learning_rate": 3.704117378280957e-05, "loss": 0.0329, "step": 10710 }, { "epoch": 0.54, "learning_rate": 3.702848149464386e-05, "loss": 0.1081, "step": 10720 }, { "epoch": 0.54, "learning_rate": 3.701578920647815e-05, "loss": 0.0684, "step": 10730 }, { "epoch": 0.54, "learning_rate": 3.700309691831244e-05, "loss": 0.0204, "step": 10740 }, { "epoch": 0.54, "learning_rate": 3.699040463014673e-05, "loss": 0.0469, "step": 10750 }, { "epoch": 0.54, "learning_rate": 3.697771234198101e-05, "loss": 0.057, "step": 10760 }, { "epoch": 0.54, "learning_rate": 3.69650200538153e-05, "loss": 0.0412, "step": 10770 }, { "epoch": 0.54, "learning_rate": 3.695232776564959e-05, "loss": 0.0455, "step": 10780 }, { "epoch": 0.54, "learning_rate": 3.693963547748388e-05, "loss": 0.0474, "step": 10790 }, { "epoch": 0.54, "learning_rate": 3.692694318931817e-05, "loss": 0.0426, "step": 10800 }, { "epoch": 0.54, "learning_rate": 3.691425090115247e-05, "loss": 0.0846, "step": 10810 }, { "epoch": 0.54, "learning_rate": 3.690155861298675e-05, "loss": 0.0591, "step": 10820 }, { "epoch": 0.54, "learning_rate": 3.688886632482104e-05, "loss": 0.0396, "step": 10830 }, { "epoch": 0.54, "learning_rate": 3.687617403665533e-05, "loss": 0.0612, "step": 10840 }, { "epoch": 0.54, "learning_rate": 3.686348174848962e-05, "loss": 0.0429, "step": 10850 }, { "epoch": 0.54, "learning_rate": 3.685078946032391e-05, "loss": 0.0641, "step": 10860 }, { "epoch": 0.54, "learning_rate": 3.68380971721582e-05, "loss": 0.0916, "step": 10870 }, { "epoch": 0.55, "learning_rate": 3.682540488399249e-05, "loss": 0.0995, "step": 10880 }, { "epoch": 0.55, "learning_rate": 3.6812712595826774e-05, "loss": 0.0473, "step": 10890 }, { "epoch": 0.55, "learning_rate": 3.6800020307661064e-05, "loss": 0.0633, "step": 10900 }, { "epoch": 0.55, "learning_rate": 3.6787328019495354e-05, "loss": 0.0515, "step": 10910 }, { "epoch": 0.55, "learning_rate": 3.677463573132965e-05, "loss": 0.0491, "step": 10920 }, { "epoch": 0.55, "learning_rate": 3.676194344316394e-05, "loss": 0.0593, "step": 10930 }, { "epoch": 0.55, "learning_rate": 3.6749251154998224e-05, "loss": 0.1076, "step": 10940 }, { "epoch": 0.55, "learning_rate": 3.6736558866832514e-05, "loss": 0.0379, "step": 10950 }, { "epoch": 0.55, "learning_rate": 3.6723866578666804e-05, "loss": 0.084, "step": 10960 }, { "epoch": 0.55, "learning_rate": 3.6711174290501094e-05, "loss": 0.0344, "step": 10970 }, { "epoch": 0.55, "learning_rate": 3.6698482002335384e-05, "loss": 0.0498, "step": 10980 }, { "epoch": 0.55, "learning_rate": 3.6685789714169673e-05, "loss": 0.0595, "step": 10990 }, { "epoch": 0.55, "learning_rate": 3.6673097426003963e-05, "loss": 0.0346, "step": 11000 }, { "epoch": 0.55, "learning_rate": 3.666040513783825e-05, "loss": 0.0494, "step": 11010 }, { "epoch": 0.55, "learning_rate": 3.664771284967254e-05, "loss": 0.0359, "step": 11020 }, { "epoch": 0.55, "learning_rate": 3.663502056150683e-05, "loss": 0.0622, "step": 11030 }, { "epoch": 0.55, "learning_rate": 3.6622328273341123e-05, "loss": 0.0454, "step": 11040 }, { "epoch": 0.55, "learning_rate": 3.6609635985175413e-05, "loss": 0.0974, "step": 11050 }, { "epoch": 0.55, "learning_rate": 3.6596943697009703e-05, "loss": 0.0797, "step": 11060 }, { "epoch": 0.55, "learning_rate": 3.658425140884399e-05, "loss": 0.0236, "step": 11070 }, { "epoch": 0.56, "learning_rate": 3.657155912067828e-05, "loss": 0.0718, "step": 11080 }, { "epoch": 0.56, "learning_rate": 3.655886683251257e-05, "loss": 0.0464, "step": 11090 }, { "epoch": 0.56, "learning_rate": 3.6546174544346857e-05, "loss": 0.0397, "step": 11100 }, { "epoch": 0.56, "learning_rate": 3.6533482256181147e-05, "loss": 0.0474, "step": 11110 }, { "epoch": 0.56, "learning_rate": 3.6520789968015437e-05, "loss": 0.0472, "step": 11120 }, { "epoch": 0.56, "learning_rate": 3.650809767984972e-05, "loss": 0.0389, "step": 11130 }, { "epoch": 0.56, "learning_rate": 3.649540539168401e-05, "loss": 0.0491, "step": 11140 }, { "epoch": 0.56, "learning_rate": 3.6482713103518307e-05, "loss": 0.0483, "step": 11150 }, { "epoch": 0.56, "learning_rate": 3.6470020815352597e-05, "loss": 0.0643, "step": 11160 }, { "epoch": 0.56, "learning_rate": 3.6457328527186887e-05, "loss": 0.0622, "step": 11170 }, { "epoch": 0.56, "learning_rate": 3.6444636239021177e-05, "loss": 0.0358, "step": 11180 }, { "epoch": 0.56, "learning_rate": 3.643194395085546e-05, "loss": 0.1044, "step": 11190 }, { "epoch": 0.56, "learning_rate": 3.641925166268975e-05, "loss": 0.0718, "step": 11200 }, { "epoch": 0.56, "learning_rate": 3.640655937452404e-05, "loss": 0.0625, "step": 11210 }, { "epoch": 0.56, "learning_rate": 3.639386708635833e-05, "loss": 0.0358, "step": 11220 }, { "epoch": 0.56, "learning_rate": 3.638117479819262e-05, "loss": 0.0269, "step": 11230 }, { "epoch": 0.56, "learning_rate": 3.636848251002691e-05, "loss": 0.0407, "step": 11240 }, { "epoch": 0.56, "learning_rate": 3.63557902218612e-05, "loss": 0.1307, "step": 11250 }, { "epoch": 0.56, "learning_rate": 3.634309793369548e-05, "loss": 0.0453, "step": 11260 }, { "epoch": 0.56, "learning_rate": 3.633040564552978e-05, "loss": 0.0945, "step": 11270 }, { "epoch": 0.57, "learning_rate": 3.631771335736407e-05, "loss": 0.0244, "step": 11280 }, { "epoch": 0.57, "learning_rate": 3.630502106919836e-05, "loss": 0.0668, "step": 11290 }, { "epoch": 0.57, "learning_rate": 3.629232878103265e-05, "loss": 0.0518, "step": 11300 }, { "epoch": 0.57, "learning_rate": 3.627963649286694e-05, "loss": 0.1052, "step": 11310 }, { "epoch": 0.57, "learning_rate": 3.626694420470122e-05, "loss": 0.0678, "step": 11320 }, { "epoch": 0.57, "learning_rate": 3.625425191653551e-05, "loss": 0.0723, "step": 11330 }, { "epoch": 0.57, "learning_rate": 3.62415596283698e-05, "loss": 0.0714, "step": 11340 }, { "epoch": 0.57, "learning_rate": 3.622886734020409e-05, "loss": 0.0579, "step": 11350 }, { "epoch": 0.57, "learning_rate": 3.621617505203838e-05, "loss": 0.0583, "step": 11360 }, { "epoch": 0.57, "learning_rate": 3.620348276387267e-05, "loss": 0.0805, "step": 11370 }, { "epoch": 0.57, "learning_rate": 3.619079047570696e-05, "loss": 0.0753, "step": 11380 }, { "epoch": 0.57, "learning_rate": 3.617809818754125e-05, "loss": 0.052, "step": 11390 }, { "epoch": 0.57, "learning_rate": 3.616540589937554e-05, "loss": 0.0985, "step": 11400 }, { "epoch": 0.57, "learning_rate": 3.615271361120983e-05, "loss": 0.1048, "step": 11410 }, { "epoch": 0.57, "learning_rate": 3.614002132304412e-05, "loss": 0.0624, "step": 11420 }, { "epoch": 0.57, "learning_rate": 3.612732903487841e-05, "loss": 0.036, "step": 11430 }, { "epoch": 0.57, "learning_rate": 3.6114636746712696e-05, "loss": 0.048, "step": 11440 }, { "epoch": 0.57, "learning_rate": 3.6101944458546986e-05, "loss": 0.0648, "step": 11450 }, { "epoch": 0.57, "learning_rate": 3.6089252170381276e-05, "loss": 0.0514, "step": 11460 }, { "epoch": 0.58, "learning_rate": 3.6076559882215566e-05, "loss": 0.0454, "step": 11470 }, { "epoch": 0.58, "learning_rate": 3.6063867594049856e-05, "loss": 0.0456, "step": 11480 }, { "epoch": 0.58, "learning_rate": 3.6051175305884146e-05, "loss": 0.0625, "step": 11490 }, { "epoch": 0.58, "learning_rate": 3.6038483017718436e-05, "loss": 0.0269, "step": 11500 }, { "epoch": 0.58, "learning_rate": 3.6025790729552726e-05, "loss": 0.1019, "step": 11510 }, { "epoch": 0.58, "learning_rate": 3.6013098441387016e-05, "loss": 0.0263, "step": 11520 }, { "epoch": 0.58, "learning_rate": 3.6000406153221306e-05, "loss": 0.0542, "step": 11530 }, { "epoch": 0.58, "learning_rate": 3.5987713865055596e-05, "loss": 0.0328, "step": 11540 }, { "epoch": 0.58, "learning_rate": 3.5975021576889886e-05, "loss": 0.0445, "step": 11550 }, { "epoch": 0.58, "learning_rate": 3.5962329288724176e-05, "loss": 0.0836, "step": 11560 }, { "epoch": 0.58, "learning_rate": 3.594963700055846e-05, "loss": 0.1183, "step": 11570 }, { "epoch": 0.58, "learning_rate": 3.593694471239275e-05, "loss": 0.0452, "step": 11580 }, { "epoch": 0.58, "learning_rate": 3.592425242422704e-05, "loss": 0.0262, "step": 11590 }, { "epoch": 0.58, "learning_rate": 3.591156013606133e-05, "loss": 0.1283, "step": 11600 }, { "epoch": 0.58, "learning_rate": 3.5898867847895626e-05, "loss": 0.0662, "step": 11610 }, { "epoch": 0.58, "learning_rate": 3.588617555972991e-05, "loss": 0.0322, "step": 11620 }, { "epoch": 0.58, "learning_rate": 3.58734832715642e-05, "loss": 0.0532, "step": 11630 }, { "epoch": 0.58, "learning_rate": 3.586079098339849e-05, "loss": 0.0734, "step": 11640 }, { "epoch": 0.58, "learning_rate": 3.584809869523278e-05, "loss": 0.0849, "step": 11650 }, { "epoch": 0.58, "learning_rate": 3.583540640706707e-05, "loss": 0.0389, "step": 11660 }, { "epoch": 0.59, "learning_rate": 3.582271411890136e-05, "loss": 0.0622, "step": 11670 }, { "epoch": 0.59, "learning_rate": 3.581002183073565e-05, "loss": 0.0404, "step": 11680 }, { "epoch": 0.59, "learning_rate": 3.579732954256993e-05, "loss": 0.0382, "step": 11690 }, { "epoch": 0.59, "learning_rate": 3.578463725440422e-05, "loss": 0.0933, "step": 11700 }, { "epoch": 0.59, "learning_rate": 3.577194496623851e-05, "loss": 0.0444, "step": 11710 }, { "epoch": 0.59, "learning_rate": 3.57592526780728e-05, "loss": 0.0722, "step": 11720 }, { "epoch": 0.59, "learning_rate": 3.57465603899071e-05, "loss": 0.0398, "step": 11730 }, { "epoch": 0.59, "learning_rate": 3.573386810174139e-05, "loss": 0.0658, "step": 11740 }, { "epoch": 0.59, "learning_rate": 3.572117581357567e-05, "loss": 0.0336, "step": 11750 }, { "epoch": 0.59, "learning_rate": 3.570848352540996e-05, "loss": 0.1392, "step": 11760 }, { "epoch": 0.59, "learning_rate": 3.569579123724425e-05, "loss": 0.0551, "step": 11770 }, { "epoch": 0.59, "learning_rate": 3.568309894907854e-05, "loss": 0.0504, "step": 11780 }, { "epoch": 0.59, "learning_rate": 3.567040666091283e-05, "loss": 0.0451, "step": 11790 }, { "epoch": 0.59, "learning_rate": 3.565771437274712e-05, "loss": 0.0744, "step": 11800 }, { "epoch": 0.59, "learning_rate": 3.564502208458141e-05, "loss": 0.1097, "step": 11810 }, { "epoch": 0.59, "learning_rate": 3.5632329796415695e-05, "loss": 0.0512, "step": 11820 }, { "epoch": 0.59, "learning_rate": 3.5619637508249985e-05, "loss": 0.0169, "step": 11830 }, { "epoch": 0.59, "learning_rate": 3.560694522008428e-05, "loss": 0.0607, "step": 11840 }, { "epoch": 0.59, "learning_rate": 3.559425293191857e-05, "loss": 0.0361, "step": 11850 }, { "epoch": 0.59, "learning_rate": 3.558156064375286e-05, "loss": 0.049, "step": 11860 }, { "epoch": 0.6, "learning_rate": 3.5568868355587145e-05, "loss": 0.0669, "step": 11870 }, { "epoch": 0.6, "learning_rate": 3.5556176067421435e-05, "loss": 0.0611, "step": 11880 }, { "epoch": 0.6, "learning_rate": 3.5543483779255725e-05, "loss": 0.0387, "step": 11890 }, { "epoch": 0.6, "learning_rate": 3.5530791491090015e-05, "loss": 0.0427, "step": 11900 }, { "epoch": 0.6, "learning_rate": 3.5518099202924305e-05, "loss": 0.0365, "step": 11910 }, { "epoch": 0.6, "learning_rate": 3.5505406914758595e-05, "loss": 0.0917, "step": 11920 }, { "epoch": 0.6, "learning_rate": 3.5492714626592885e-05, "loss": 0.0338, "step": 11930 }, { "epoch": 0.6, "learning_rate": 3.548002233842717e-05, "loss": 0.0711, "step": 11940 }, { "epoch": 0.6, "learning_rate": 3.546733005026146e-05, "loss": 0.043, "step": 11950 }, { "epoch": 0.6, "learning_rate": 3.5454637762095755e-05, "loss": 0.0667, "step": 11960 }, { "epoch": 0.6, "learning_rate": 3.5441945473930045e-05, "loss": 0.0682, "step": 11970 }, { "epoch": 0.6, "learning_rate": 3.5429253185764335e-05, "loss": 0.0599, "step": 11980 }, { "epoch": 0.6, "learning_rate": 3.5416560897598625e-05, "loss": 0.1187, "step": 11990 }, { "epoch": 0.6, "learning_rate": 3.540386860943291e-05, "loss": 0.065, "step": 12000 }, { "epoch": 0.6, "learning_rate": 3.53911763212672e-05, "loss": 0.0591, "step": 12010 }, { "epoch": 0.6, "learning_rate": 3.537848403310149e-05, "loss": 0.0264, "step": 12020 }, { "epoch": 0.6, "learning_rate": 3.536579174493578e-05, "loss": 0.0718, "step": 12030 }, { "epoch": 0.6, "learning_rate": 3.535309945677007e-05, "loss": 0.0476, "step": 12040 }, { "epoch": 0.6, "learning_rate": 3.534040716860436e-05, "loss": 0.0322, "step": 12050 }, { "epoch": 0.6, "learning_rate": 3.532771488043865e-05, "loss": 0.0414, "step": 12060 }, { "epoch": 0.61, "learning_rate": 3.531502259227294e-05, "loss": 0.0893, "step": 12070 }, { "epoch": 0.61, "learning_rate": 3.530233030410723e-05, "loss": 0.0366, "step": 12080 }, { "epoch": 0.61, "learning_rate": 3.528963801594152e-05, "loss": 0.0674, "step": 12090 }, { "epoch": 0.61, "learning_rate": 3.527694572777581e-05, "loss": 0.0791, "step": 12100 }, { "epoch": 0.61, "learning_rate": 3.52642534396101e-05, "loss": 0.0673, "step": 12110 }, { "epoch": 0.61, "learning_rate": 3.525156115144438e-05, "loss": 0.08, "step": 12120 }, { "epoch": 0.61, "learning_rate": 3.523886886327867e-05, "loss": 0.1036, "step": 12130 }, { "epoch": 0.61, "learning_rate": 3.522617657511296e-05, "loss": 0.0816, "step": 12140 }, { "epoch": 0.61, "learning_rate": 3.521348428694725e-05, "loss": 0.0767, "step": 12150 }, { "epoch": 0.61, "learning_rate": 3.520079199878154e-05, "loss": 0.0753, "step": 12160 }, { "epoch": 0.61, "learning_rate": 3.518809971061583e-05, "loss": 0.0805, "step": 12170 }, { "epoch": 0.61, "learning_rate": 3.517540742245012e-05, "loss": 0.0336, "step": 12180 }, { "epoch": 0.61, "learning_rate": 3.516271513428441e-05, "loss": 0.0445, "step": 12190 }, { "epoch": 0.61, "learning_rate": 3.51500228461187e-05, "loss": 0.039, "step": 12200 }, { "epoch": 0.61, "learning_rate": 3.513733055795299e-05, "loss": 0.0373, "step": 12210 }, { "epoch": 0.61, "learning_rate": 3.512463826978728e-05, "loss": 0.0695, "step": 12220 }, { "epoch": 0.61, "learning_rate": 3.511194598162157e-05, "loss": 0.0534, "step": 12230 }, { "epoch": 0.61, "learning_rate": 3.509925369345586e-05, "loss": 0.0656, "step": 12240 }, { "epoch": 0.61, "learning_rate": 3.5086561405290144e-05, "loss": 0.1346, "step": 12250 }, { "epoch": 0.61, "learning_rate": 3.5073869117124434e-05, "loss": 0.1275, "step": 12260 }, { "epoch": 0.62, "learning_rate": 3.5061176828958724e-05, "loss": 0.0566, "step": 12270 }, { "epoch": 0.62, "learning_rate": 3.5048484540793014e-05, "loss": 0.0682, "step": 12280 }, { "epoch": 0.62, "learning_rate": 3.5035792252627304e-05, "loss": 0.0487, "step": 12290 }, { "epoch": 0.62, "learning_rate": 3.5023099964461594e-05, "loss": 0.1314, "step": 12300 }, { "epoch": 0.62, "learning_rate": 3.5010407676295884e-05, "loss": 0.0531, "step": 12310 }, { "epoch": 0.62, "learning_rate": 3.4997715388130174e-05, "loss": 0.0319, "step": 12320 }, { "epoch": 0.62, "learning_rate": 3.4985023099964464e-05, "loss": 0.0933, "step": 12330 }, { "epoch": 0.62, "learning_rate": 3.4972330811798754e-05, "loss": 0.1114, "step": 12340 }, { "epoch": 0.62, "learning_rate": 3.4959638523633044e-05, "loss": 0.0601, "step": 12350 }, { "epoch": 0.62, "learning_rate": 3.4946946235467334e-05, "loss": 0.068, "step": 12360 }, { "epoch": 0.62, "learning_rate": 3.493425394730162e-05, "loss": 0.0416, "step": 12370 }, { "epoch": 0.62, "learning_rate": 3.492156165913591e-05, "loss": 0.1079, "step": 12380 }, { "epoch": 0.62, "learning_rate": 3.49088693709702e-05, "loss": 0.0427, "step": 12390 }, { "epoch": 0.62, "learning_rate": 3.489617708280449e-05, "loss": 0.0646, "step": 12400 }, { "epoch": 0.62, "learning_rate": 3.488348479463878e-05, "loss": 0.0367, "step": 12410 }, { "epoch": 0.62, "learning_rate": 3.4870792506473074e-05, "loss": 0.076, "step": 12420 }, { "epoch": 0.62, "learning_rate": 3.485810021830736e-05, "loss": 0.0896, "step": 12430 }, { "epoch": 0.62, "learning_rate": 3.484540793014165e-05, "loss": 0.0337, "step": 12440 }, { "epoch": 0.62, "learning_rate": 3.483271564197594e-05, "loss": 0.1039, "step": 12450 }, { "epoch": 0.62, "learning_rate": 3.482002335381023e-05, "loss": 0.0449, "step": 12460 }, { "epoch": 0.63, "learning_rate": 3.480733106564452e-05, "loss": 0.0543, "step": 12470 }, { "epoch": 0.63, "learning_rate": 3.479463877747881e-05, "loss": 0.0599, "step": 12480 }, { "epoch": 0.63, "learning_rate": 3.47819464893131e-05, "loss": 0.0772, "step": 12490 }, { "epoch": 0.63, "learning_rate": 3.476925420114738e-05, "loss": 0.0511, "step": 12500 }, { "epoch": 0.63, "learning_rate": 3.475656191298167e-05, "loss": 0.0314, "step": 12510 }, { "epoch": 0.63, "learning_rate": 3.474386962481596e-05, "loss": 0.1348, "step": 12520 }, { "epoch": 0.63, "learning_rate": 3.473117733665025e-05, "loss": 0.0598, "step": 12530 }, { "epoch": 0.63, "learning_rate": 3.471848504848455e-05, "loss": 0.0744, "step": 12540 }, { "epoch": 0.63, "learning_rate": 3.470579276031883e-05, "loss": 0.0802, "step": 12550 }, { "epoch": 0.63, "learning_rate": 3.469310047215312e-05, "loss": 0.0303, "step": 12560 }, { "epoch": 0.63, "learning_rate": 3.468040818398741e-05, "loss": 0.0457, "step": 12570 }, { "epoch": 0.63, "learning_rate": 3.46677158958217e-05, "loss": 0.1063, "step": 12580 }, { "epoch": 0.63, "learning_rate": 3.465502360765599e-05, "loss": 0.1652, "step": 12590 }, { "epoch": 0.63, "learning_rate": 3.464233131949028e-05, "loss": 0.0664, "step": 12600 }, { "epoch": 0.63, "learning_rate": 3.462963903132457e-05, "loss": 0.1067, "step": 12610 }, { "epoch": 0.63, "learning_rate": 3.4616946743158854e-05, "loss": 0.1758, "step": 12620 }, { "epoch": 0.63, "learning_rate": 3.4604254454993144e-05, "loss": 0.066, "step": 12630 }, { "epoch": 0.63, "learning_rate": 3.4591562166827434e-05, "loss": 0.0311, "step": 12640 }, { "epoch": 0.63, "learning_rate": 3.457886987866173e-05, "loss": 0.0382, "step": 12650 }, { "epoch": 0.63, "learning_rate": 3.456617759049602e-05, "loss": 0.0997, "step": 12660 }, { "epoch": 0.64, "learning_rate": 3.455348530233031e-05, "loss": 0.0356, "step": 12670 }, { "epoch": 0.64, "learning_rate": 3.4540793014164594e-05, "loss": 0.0717, "step": 12680 }, { "epoch": 0.64, "learning_rate": 3.4528100725998884e-05, "loss": 0.0162, "step": 12690 }, { "epoch": 0.64, "learning_rate": 3.4515408437833174e-05, "loss": 0.011, "step": 12700 }, { "epoch": 0.64, "learning_rate": 3.4502716149667464e-05, "loss": 0.0545, "step": 12710 }, { "epoch": 0.64, "learning_rate": 3.4490023861501754e-05, "loss": 0.063, "step": 12720 }, { "epoch": 0.64, "learning_rate": 3.4477331573336044e-05, "loss": 0.0934, "step": 12730 }, { "epoch": 0.64, "learning_rate": 3.4464639285170334e-05, "loss": 0.0831, "step": 12740 }, { "epoch": 0.64, "learning_rate": 3.445194699700462e-05, "loss": 0.0735, "step": 12750 }, { "epoch": 0.64, "learning_rate": 3.443925470883891e-05, "loss": 0.1312, "step": 12760 }, { "epoch": 0.64, "learning_rate": 3.4426562420673204e-05, "loss": 0.0655, "step": 12770 }, { "epoch": 0.64, "learning_rate": 3.4413870132507494e-05, "loss": 0.0659, "step": 12780 }, { "epoch": 0.64, "learning_rate": 3.4401177844341784e-05, "loss": 0.0821, "step": 12790 }, { "epoch": 0.64, "learning_rate": 3.438848555617607e-05, "loss": 0.0602, "step": 12800 }, { "epoch": 0.64, "learning_rate": 3.437579326801036e-05, "loss": 0.0439, "step": 12810 }, { "epoch": 0.64, "learning_rate": 3.436310097984465e-05, "loss": 0.0255, "step": 12820 }, { "epoch": 0.64, "learning_rate": 3.435040869167894e-05, "loss": 0.0626, "step": 12830 }, { "epoch": 0.64, "learning_rate": 3.433771640351323e-05, "loss": 0.0519, "step": 12840 }, { "epoch": 0.64, "learning_rate": 3.432502411534752e-05, "loss": 0.078, "step": 12850 }, { "epoch": 0.64, "learning_rate": 3.431233182718181e-05, "loss": 0.0683, "step": 12860 }, { "epoch": 0.65, "learning_rate": 3.429963953901609e-05, "loss": 0.0505, "step": 12870 }, { "epoch": 0.65, "learning_rate": 3.428694725085039e-05, "loss": 0.1128, "step": 12880 }, { "epoch": 0.65, "learning_rate": 3.427425496268468e-05, "loss": 0.0332, "step": 12890 }, { "epoch": 0.65, "learning_rate": 3.426156267451897e-05, "loss": 0.0516, "step": 12900 }, { "epoch": 0.65, "learning_rate": 3.424887038635326e-05, "loss": 0.0334, "step": 12910 }, { "epoch": 0.65, "learning_rate": 3.423617809818755e-05, "loss": 0.0805, "step": 12920 }, { "epoch": 0.65, "learning_rate": 3.422348581002183e-05, "loss": 0.0803, "step": 12930 }, { "epoch": 0.65, "learning_rate": 3.421079352185612e-05, "loss": 0.0275, "step": 12940 }, { "epoch": 0.65, "learning_rate": 3.419810123369041e-05, "loss": 0.0393, "step": 12950 }, { "epoch": 0.65, "learning_rate": 3.41854089455247e-05, "loss": 0.1332, "step": 12960 }, { "epoch": 0.65, "learning_rate": 3.417271665735899e-05, "loss": 0.014, "step": 12970 }, { "epoch": 0.65, "learning_rate": 3.416002436919328e-05, "loss": 0.074, "step": 12980 }, { "epoch": 0.65, "learning_rate": 3.414733208102757e-05, "loss": 0.0609, "step": 12990 }, { "epoch": 0.65, "learning_rate": 3.413463979286186e-05, "loss": 0.0527, "step": 13000 }, { "epoch": 0.65, "learning_rate": 3.412194750469615e-05, "loss": 0.1107, "step": 13010 }, { "epoch": 0.65, "learning_rate": 3.410925521653044e-05, "loss": 0.0669, "step": 13020 }, { "epoch": 0.65, "learning_rate": 3.409656292836473e-05, "loss": 0.0541, "step": 13030 }, { "epoch": 0.65, "learning_rate": 3.408387064019902e-05, "loss": 0.0696, "step": 13040 }, { "epoch": 0.65, "learning_rate": 3.40711783520333e-05, "loss": 0.0626, "step": 13050 }, { "epoch": 0.65, "learning_rate": 3.405848606386759e-05, "loss": 0.0728, "step": 13060 }, { "epoch": 0.66, "learning_rate": 3.404579377570188e-05, "loss": 0.0667, "step": 13070 }, { "epoch": 0.66, "learning_rate": 3.403310148753617e-05, "loss": 0.0497, "step": 13080 }, { "epoch": 0.66, "learning_rate": 3.402040919937046e-05, "loss": 0.0609, "step": 13090 }, { "epoch": 0.66, "learning_rate": 3.400771691120475e-05, "loss": 0.0695, "step": 13100 }, { "epoch": 0.66, "learning_rate": 3.399502462303904e-05, "loss": 0.0655, "step": 13110 }, { "epoch": 0.66, "learning_rate": 3.398233233487333e-05, "loss": 0.0602, "step": 13120 }, { "epoch": 0.66, "learning_rate": 3.396964004670762e-05, "loss": 0.0496, "step": 13130 }, { "epoch": 0.66, "learning_rate": 3.395694775854191e-05, "loss": 0.085, "step": 13140 }, { "epoch": 0.66, "learning_rate": 3.39442554703762e-05, "loss": 0.0739, "step": 13150 }, { "epoch": 0.66, "learning_rate": 3.393156318221049e-05, "loss": 0.0593, "step": 13160 }, { "epoch": 0.66, "learning_rate": 3.391887089404478e-05, "loss": 0.0927, "step": 13170 }, { "epoch": 0.66, "learning_rate": 3.3906178605879066e-05, "loss": 0.1023, "step": 13180 }, { "epoch": 0.66, "learning_rate": 3.3893486317713356e-05, "loss": 0.0749, "step": 13190 }, { "epoch": 0.66, "learning_rate": 3.3880794029547646e-05, "loss": 0.0547, "step": 13200 }, { "epoch": 0.66, "learning_rate": 3.3868101741381936e-05, "loss": 0.0829, "step": 13210 }, { "epoch": 0.66, "learning_rate": 3.3855409453216226e-05, "loss": 0.0702, "step": 13220 }, { "epoch": 0.66, "learning_rate": 3.3842717165050516e-05, "loss": 0.0537, "step": 13230 }, { "epoch": 0.66, "learning_rate": 3.3830024876884806e-05, "loss": 0.0753, "step": 13240 }, { "epoch": 0.66, "learning_rate": 3.3817332588719096e-05, "loss": 0.0255, "step": 13250 }, { "epoch": 0.66, "learning_rate": 3.3804640300553386e-05, "loss": 0.0433, "step": 13260 }, { "epoch": 0.67, "learning_rate": 3.3791948012387676e-05, "loss": 0.0652, "step": 13270 }, { "epoch": 0.67, "learning_rate": 3.3779255724221966e-05, "loss": 0.0999, "step": 13280 }, { "epoch": 0.67, "learning_rate": 3.3766563436056256e-05, "loss": 0.0608, "step": 13290 }, { "epoch": 0.67, "learning_rate": 3.375387114789054e-05, "loss": 0.1031, "step": 13300 }, { "epoch": 0.67, "learning_rate": 3.374117885972483e-05, "loss": 0.0499, "step": 13310 }, { "epoch": 0.67, "learning_rate": 3.372848657155912e-05, "loss": 0.0753, "step": 13320 }, { "epoch": 0.67, "learning_rate": 3.371579428339341e-05, "loss": 0.0765, "step": 13330 }, { "epoch": 0.67, "learning_rate": 3.3703101995227706e-05, "loss": 0.0978, "step": 13340 }, { "epoch": 0.67, "learning_rate": 3.3690409707061996e-05, "loss": 0.1033, "step": 13350 }, { "epoch": 0.67, "learning_rate": 3.367771741889628e-05, "loss": 0.0373, "step": 13360 }, { "epoch": 0.67, "learning_rate": 3.366502513073057e-05, "loss": 0.0586, "step": 13370 }, { "epoch": 0.67, "learning_rate": 3.365233284256486e-05, "loss": 0.0447, "step": 13380 }, { "epoch": 0.67, "learning_rate": 3.363964055439915e-05, "loss": 0.0255, "step": 13390 }, { "epoch": 0.67, "learning_rate": 3.362694826623344e-05, "loss": 0.0617, "step": 13400 }, { "epoch": 0.67, "learning_rate": 3.361425597806773e-05, "loss": 0.0437, "step": 13410 }, { "epoch": 0.67, "learning_rate": 3.360156368990202e-05, "loss": 0.0283, "step": 13420 }, { "epoch": 0.67, "learning_rate": 3.35888714017363e-05, "loss": 0.0656, "step": 13430 }, { "epoch": 0.67, "learning_rate": 3.357617911357059e-05, "loss": 0.0791, "step": 13440 }, { "epoch": 0.67, "learning_rate": 3.356348682540488e-05, "loss": 0.0707, "step": 13450 }, { "epoch": 0.67, "learning_rate": 3.355079453723918e-05, "loss": 0.0643, "step": 13460 }, { "epoch": 0.68, "learning_rate": 3.353810224907347e-05, "loss": 0.1148, "step": 13470 }, { "epoch": 0.68, "learning_rate": 3.352540996090775e-05, "loss": 0.0451, "step": 13480 }, { "epoch": 0.68, "learning_rate": 3.351271767274204e-05, "loss": 0.0791, "step": 13490 }, { "epoch": 0.68, "learning_rate": 3.350002538457633e-05, "loss": 0.0688, "step": 13500 }, { "epoch": 0.68, "learning_rate": 3.348733309641062e-05, "loss": 0.0848, "step": 13510 }, { "epoch": 0.68, "learning_rate": 3.347464080824491e-05, "loss": 0.0911, "step": 13520 }, { "epoch": 0.68, "learning_rate": 3.34619485200792e-05, "loss": 0.075, "step": 13530 }, { "epoch": 0.68, "learning_rate": 3.344925623191349e-05, "loss": 0.1152, "step": 13540 }, { "epoch": 0.68, "learning_rate": 3.3436563943747775e-05, "loss": 0.0351, "step": 13550 }, { "epoch": 0.68, "learning_rate": 3.3423871655582065e-05, "loss": 0.0234, "step": 13560 }, { "epoch": 0.68, "learning_rate": 3.341117936741636e-05, "loss": 0.1, "step": 13570 }, { "epoch": 0.68, "learning_rate": 3.339848707925065e-05, "loss": 0.044, "step": 13580 }, { "epoch": 0.68, "learning_rate": 3.338579479108494e-05, "loss": 0.0704, "step": 13590 }, { "epoch": 0.68, "learning_rate": 3.337310250291923e-05, "loss": 0.0578, "step": 13600 }, { "epoch": 0.68, "learning_rate": 3.3360410214753515e-05, "loss": 0.043, "step": 13610 }, { "epoch": 0.68, "learning_rate": 3.3347717926587805e-05, "loss": 0.0659, "step": 13620 }, { "epoch": 0.68, "learning_rate": 3.3335025638422095e-05, "loss": 0.0553, "step": 13630 }, { "epoch": 0.68, "learning_rate": 3.3322333350256385e-05, "loss": 0.0415, "step": 13640 }, { "epoch": 0.68, "learning_rate": 3.3309641062090675e-05, "loss": 0.0386, "step": 13650 }, { "epoch": 0.68, "learning_rate": 3.3296948773924965e-05, "loss": 0.0281, "step": 13660 }, { "epoch": 0.69, "learning_rate": 3.3284256485759255e-05, "loss": 0.056, "step": 13670 }, { "epoch": 0.69, "learning_rate": 3.327156419759354e-05, "loss": 0.0481, "step": 13680 }, { "epoch": 0.69, "learning_rate": 3.3258871909427835e-05, "loss": 0.0801, "step": 13690 }, { "epoch": 0.69, "learning_rate": 3.3246179621262125e-05, "loss": 0.0479, "step": 13700 }, { "epoch": 0.69, "learning_rate": 3.3233487333096415e-05, "loss": 0.0469, "step": 13710 }, { "epoch": 0.69, "learning_rate": 3.3220795044930705e-05, "loss": 0.0639, "step": 13720 }, { "epoch": 0.69, "learning_rate": 3.320810275676499e-05, "loss": 0.1236, "step": 13730 }, { "epoch": 0.69, "learning_rate": 3.319541046859928e-05, "loss": 0.0216, "step": 13740 }, { "epoch": 0.69, "learning_rate": 3.318271818043357e-05, "loss": 0.0819, "step": 13750 }, { "epoch": 0.69, "learning_rate": 3.317002589226786e-05, "loss": 0.1108, "step": 13760 }, { "epoch": 0.69, "learning_rate": 3.315733360410215e-05, "loss": 0.1345, "step": 13770 }, { "epoch": 0.69, "learning_rate": 3.314464131593644e-05, "loss": 0.0509, "step": 13780 }, { "epoch": 0.69, "learning_rate": 3.313194902777073e-05, "loss": 0.0589, "step": 13790 }, { "epoch": 0.69, "learning_rate": 3.311925673960502e-05, "loss": 0.0779, "step": 13800 }, { "epoch": 0.69, "learning_rate": 3.310656445143931e-05, "loss": 0.0966, "step": 13810 }, { "epoch": 0.69, "learning_rate": 3.30938721632736e-05, "loss": 0.0834, "step": 13820 }, { "epoch": 0.69, "learning_rate": 3.308117987510789e-05, "loss": 0.0382, "step": 13830 }, { "epoch": 0.69, "learning_rate": 3.306848758694218e-05, "loss": 0.0607, "step": 13840 }, { "epoch": 0.69, "learning_rate": 3.305579529877647e-05, "loss": 0.1165, "step": 13850 }, { "epoch": 0.69, "learning_rate": 3.304310301061075e-05, "loss": 0.0976, "step": 13860 }, { "epoch": 0.7, "learning_rate": 3.303041072244504e-05, "loss": 0.0441, "step": 13870 }, { "epoch": 0.7, "learning_rate": 3.301771843427933e-05, "loss": 0.051, "step": 13880 }, { "epoch": 0.7, "learning_rate": 3.300502614611362e-05, "loss": 0.0288, "step": 13890 }, { "epoch": 0.7, "learning_rate": 3.299233385794791e-05, "loss": 0.0881, "step": 13900 }, { "epoch": 0.7, "learning_rate": 3.29796415697822e-05, "loss": 0.069, "step": 13910 }, { "epoch": 0.7, "learning_rate": 3.296694928161649e-05, "loss": 0.09, "step": 13920 }, { "epoch": 0.7, "learning_rate": 3.295425699345078e-05, "loss": 0.028, "step": 13930 }, { "epoch": 0.7, "learning_rate": 3.294156470528507e-05, "loss": 0.0649, "step": 13940 }, { "epoch": 0.7, "learning_rate": 3.292887241711936e-05, "loss": 0.0524, "step": 13950 }, { "epoch": 0.7, "learning_rate": 3.291618012895365e-05, "loss": 0.0501, "step": 13960 }, { "epoch": 0.7, "learning_rate": 3.290348784078794e-05, "loss": 0.0465, "step": 13970 }, { "epoch": 0.7, "learning_rate": 3.2890795552622225e-05, "loss": 0.0259, "step": 13980 }, { "epoch": 0.7, "learning_rate": 3.2878103264456515e-05, "loss": 0.0345, "step": 13990 }, { "epoch": 0.7, "learning_rate": 3.2865410976290805e-05, "loss": 0.0658, "step": 14000 }, { "epoch": 0.7, "learning_rate": 3.2852718688125095e-05, "loss": 0.0399, "step": 14010 }, { "epoch": 0.7, "learning_rate": 3.2840026399959385e-05, "loss": 0.0403, "step": 14020 }, { "epoch": 0.7, "learning_rate": 3.282733411179368e-05, "loss": 0.0569, "step": 14030 }, { "epoch": 0.7, "learning_rate": 3.2814641823627965e-05, "loss": 0.0759, "step": 14040 }, { "epoch": 0.7, "learning_rate": 3.2801949535462255e-05, "loss": 0.1051, "step": 14050 }, { "epoch": 0.7, "learning_rate": 3.2789257247296545e-05, "loss": 0.0891, "step": 14060 }, { "epoch": 0.71, "learning_rate": 3.2776564959130835e-05, "loss": 0.0292, "step": 14070 }, { "epoch": 0.71, "learning_rate": 3.2763872670965125e-05, "loss": 0.0383, "step": 14080 }, { "epoch": 0.71, "learning_rate": 3.2751180382799415e-05, "loss": 0.0507, "step": 14090 }, { "epoch": 0.71, "learning_rate": 3.2738488094633705e-05, "loss": 0.0728, "step": 14100 }, { "epoch": 0.71, "learning_rate": 3.272579580646799e-05, "loss": 0.0591, "step": 14110 }, { "epoch": 0.71, "learning_rate": 3.271310351830228e-05, "loss": 0.038, "step": 14120 }, { "epoch": 0.71, "learning_rate": 3.270041123013657e-05, "loss": 0.0785, "step": 14130 }, { "epoch": 0.71, "learning_rate": 3.268771894197086e-05, "loss": 0.0738, "step": 14140 }, { "epoch": 0.71, "learning_rate": 3.2675026653805155e-05, "loss": 0.0393, "step": 14150 }, { "epoch": 0.71, "learning_rate": 3.266233436563944e-05, "loss": 0.0722, "step": 14160 }, { "epoch": 0.71, "learning_rate": 3.264964207747373e-05, "loss": 0.0437, "step": 14170 }, { "epoch": 0.71, "learning_rate": 3.263694978930802e-05, "loss": 0.0442, "step": 14180 }, { "epoch": 0.71, "learning_rate": 3.262425750114231e-05, "loss": 0.0409, "step": 14190 }, { "epoch": 0.71, "learning_rate": 3.26115652129766e-05, "loss": 0.0772, "step": 14200 }, { "epoch": 0.71, "learning_rate": 3.259887292481089e-05, "loss": 0.053, "step": 14210 }, { "epoch": 0.71, "learning_rate": 3.258618063664518e-05, "loss": 0.0514, "step": 14220 }, { "epoch": 0.71, "learning_rate": 3.257348834847946e-05, "loss": 0.0463, "step": 14230 }, { "epoch": 0.71, "learning_rate": 3.256079606031375e-05, "loss": 0.0533, "step": 14240 }, { "epoch": 0.71, "learning_rate": 3.254810377214804e-05, "loss": 0.043, "step": 14250 }, { "epoch": 0.71, "learning_rate": 3.253541148398234e-05, "loss": 0.0602, "step": 14260 }, { "epoch": 0.72, "learning_rate": 3.252271919581663e-05, "loss": 0.0467, "step": 14270 }, { "epoch": 0.72, "learning_rate": 3.251002690765092e-05, "loss": 0.0454, "step": 14280 }, { "epoch": 0.72, "learning_rate": 3.24973346194852e-05, "loss": 0.066, "step": 14290 }, { "epoch": 0.72, "learning_rate": 3.248464233131949e-05, "loss": 0.0406, "step": 14300 }, { "epoch": 0.72, "learning_rate": 3.247195004315378e-05, "loss": 0.0363, "step": 14310 }, { "epoch": 0.72, "learning_rate": 3.245925775498807e-05, "loss": 0.0492, "step": 14320 }, { "epoch": 0.72, "learning_rate": 3.244656546682236e-05, "loss": 0.0644, "step": 14330 }, { "epoch": 0.72, "learning_rate": 3.243387317865665e-05, "loss": 0.0595, "step": 14340 }, { "epoch": 0.72, "learning_rate": 3.242118089049094e-05, "loss": 0.0873, "step": 14350 }, { "epoch": 0.72, "learning_rate": 3.2408488602325224e-05, "loss": 0.0587, "step": 14360 }, { "epoch": 0.72, "learning_rate": 3.2395796314159514e-05, "loss": 0.0223, "step": 14370 }, { "epoch": 0.72, "learning_rate": 3.238310402599381e-05, "loss": 0.1567, "step": 14380 }, { "epoch": 0.72, "learning_rate": 3.23704117378281e-05, "loss": 0.0443, "step": 14390 }, { "epoch": 0.72, "learning_rate": 3.235771944966239e-05, "loss": 0.0591, "step": 14400 }, { "epoch": 0.72, "learning_rate": 3.2345027161496674e-05, "loss": 0.0621, "step": 14410 }, { "epoch": 0.72, "learning_rate": 3.2332334873330964e-05, "loss": 0.0536, "step": 14420 }, { "epoch": 0.72, "learning_rate": 3.2319642585165254e-05, "loss": 0.0676, "step": 14430 }, { "epoch": 0.72, "learning_rate": 3.2306950296999544e-05, "loss": 0.04, "step": 14440 }, { "epoch": 0.72, "learning_rate": 3.2294258008833834e-05, "loss": 0.06, "step": 14450 }, { "epoch": 0.72, "learning_rate": 3.2281565720668124e-05, "loss": 0.0496, "step": 14460 }, { "epoch": 0.73, "learning_rate": 3.2268873432502414e-05, "loss": 0.0741, "step": 14470 }, { "epoch": 0.73, "learning_rate": 3.22561811443367e-05, "loss": 0.0481, "step": 14480 }, { "epoch": 0.73, "learning_rate": 3.2243488856170994e-05, "loss": 0.0566, "step": 14490 }, { "epoch": 0.73, "learning_rate": 3.2230796568005284e-05, "loss": 0.0569, "step": 14500 }, { "epoch": 0.73, "learning_rate": 3.2218104279839574e-05, "loss": 0.0625, "step": 14510 }, { "epoch": 0.73, "learning_rate": 3.2205411991673864e-05, "loss": 0.0481, "step": 14520 }, { "epoch": 0.73, "learning_rate": 3.2192719703508154e-05, "loss": 0.018, "step": 14530 }, { "epoch": 0.73, "learning_rate": 3.218002741534244e-05, "loss": 0.0656, "step": 14540 }, { "epoch": 0.73, "learning_rate": 3.216733512717673e-05, "loss": 0.069, "step": 14550 }, { "epoch": 0.73, "learning_rate": 3.215464283901102e-05, "loss": 0.0416, "step": 14560 }, { "epoch": 0.73, "learning_rate": 3.214195055084531e-05, "loss": 0.0962, "step": 14570 }, { "epoch": 0.73, "learning_rate": 3.21292582626796e-05, "loss": 0.0328, "step": 14580 }, { "epoch": 0.73, "learning_rate": 3.211656597451389e-05, "loss": 0.017, "step": 14590 }, { "epoch": 0.73, "learning_rate": 3.210387368634818e-05, "loss": 0.0403, "step": 14600 }, { "epoch": 0.73, "learning_rate": 3.209118139818247e-05, "loss": 0.0787, "step": 14610 }, { "epoch": 0.73, "learning_rate": 3.207848911001676e-05, "loss": 0.0464, "step": 14620 }, { "epoch": 0.73, "learning_rate": 3.206579682185105e-05, "loss": 0.0672, "step": 14630 }, { "epoch": 0.73, "learning_rate": 3.205310453368534e-05, "loss": 0.0594, "step": 14640 }, { "epoch": 0.73, "learning_rate": 3.204041224551963e-05, "loss": 0.0787, "step": 14650 }, { "epoch": 0.73, "learning_rate": 3.202771995735391e-05, "loss": 0.0398, "step": 14660 }, { "epoch": 0.74, "learning_rate": 3.20150276691882e-05, "loss": 0.041, "step": 14670 }, { "epoch": 0.74, "learning_rate": 3.200233538102249e-05, "loss": 0.0725, "step": 14680 }, { "epoch": 0.74, "learning_rate": 3.198964309285678e-05, "loss": 0.0829, "step": 14690 }, { "epoch": 0.74, "learning_rate": 3.197695080469107e-05, "loss": 0.0592, "step": 14700 }, { "epoch": 0.74, "learning_rate": 3.196425851652536e-05, "loss": 0.0446, "step": 14710 }, { "epoch": 0.74, "learning_rate": 3.195156622835965e-05, "loss": 0.034, "step": 14720 }, { "epoch": 0.74, "learning_rate": 3.193887394019394e-05, "loss": 0.0674, "step": 14730 }, { "epoch": 0.74, "learning_rate": 3.192618165202823e-05, "loss": 0.0101, "step": 14740 }, { "epoch": 0.74, "learning_rate": 3.191348936386252e-05, "loss": 0.0525, "step": 14750 }, { "epoch": 0.74, "learning_rate": 3.190079707569681e-05, "loss": 0.04, "step": 14760 }, { "epoch": 0.74, "learning_rate": 3.18881047875311e-05, "loss": 0.0376, "step": 14770 }, { "epoch": 0.74, "learning_rate": 3.187541249936539e-05, "loss": 0.0779, "step": 14780 }, { "epoch": 0.74, "learning_rate": 3.186272021119967e-05, "loss": 0.0857, "step": 14790 }, { "epoch": 0.74, "learning_rate": 3.185002792303396e-05, "loss": 0.0414, "step": 14800 }, { "epoch": 0.74, "learning_rate": 3.183733563486825e-05, "loss": 0.1115, "step": 14810 }, { "epoch": 0.74, "learning_rate": 3.182464334670254e-05, "loss": 0.0822, "step": 14820 }, { "epoch": 0.74, "learning_rate": 3.181195105853683e-05, "loss": 0.0591, "step": 14830 }, { "epoch": 0.74, "learning_rate": 3.179925877037112e-05, "loss": 0.069, "step": 14840 }, { "epoch": 0.74, "learning_rate": 3.178656648220541e-05, "loss": 0.1431, "step": 14850 }, { "epoch": 0.74, "learning_rate": 3.17738741940397e-05, "loss": 0.0519, "step": 14860 }, { "epoch": 0.75, "learning_rate": 3.176118190587399e-05, "loss": 0.1094, "step": 14870 }, { "epoch": 0.75, "learning_rate": 3.174848961770828e-05, "loss": 0.0665, "step": 14880 }, { "epoch": 0.75, "learning_rate": 3.173579732954257e-05, "loss": 0.0621, "step": 14890 }, { "epoch": 0.75, "learning_rate": 3.172310504137686e-05, "loss": 0.0498, "step": 14900 }, { "epoch": 0.75, "learning_rate": 3.1710412753211146e-05, "loss": 0.0599, "step": 14910 }, { "epoch": 0.75, "learning_rate": 3.1697720465045436e-05, "loss": 0.0962, "step": 14920 }, { "epoch": 0.75, "learning_rate": 3.1685028176879726e-05, "loss": 0.0399, "step": 14930 }, { "epoch": 0.75, "learning_rate": 3.1672335888714016e-05, "loss": 0.02, "step": 14940 }, { "epoch": 0.75, "learning_rate": 3.165964360054831e-05, "loss": 0.0655, "step": 14950 }, { "epoch": 0.75, "learning_rate": 3.16469513123826e-05, "loss": 0.0651, "step": 14960 }, { "epoch": 0.75, "learning_rate": 3.1634259024216886e-05, "loss": 0.052, "step": 14970 }, { "epoch": 0.75, "learning_rate": 3.1621566736051176e-05, "loss": 0.1316, "step": 14980 }, { "epoch": 0.75, "learning_rate": 3.1608874447885466e-05, "loss": 0.0632, "step": 14990 }, { "epoch": 0.75, "learning_rate": 3.1596182159719756e-05, "loss": 0.0936, "step": 15000 }, { "epoch": 0.75, "learning_rate": 3.1583489871554046e-05, "loss": 0.3442, "step": 15010 }, { "epoch": 0.75, "learning_rate": 3.1570797583388336e-05, "loss": 1.1646, "step": 15020 }, { "epoch": 0.75, "learning_rate": 3.1558105295222626e-05, "loss": 0.2148, "step": 15030 }, { "epoch": 0.75, "learning_rate": 3.154541300705691e-05, "loss": 0.0949, "step": 15040 }, { "epoch": 0.75, "learning_rate": 3.15327207188912e-05, "loss": 0.1596, "step": 15050 }, { "epoch": 0.76, "learning_rate": 3.152002843072549e-05, "loss": 0.0598, "step": 15060 }, { "epoch": 0.76, "learning_rate": 3.1507336142559786e-05, "loss": 0.0844, "step": 15070 }, { "epoch": 0.76, "learning_rate": 3.1494643854394076e-05, "loss": 0.0708, "step": 15080 }, { "epoch": 0.76, "learning_rate": 3.148195156622836e-05, "loss": 0.0628, "step": 15090 }, { "epoch": 0.76, "learning_rate": 3.146925927806265e-05, "loss": 0.0696, "step": 15100 }, { "epoch": 0.76, "learning_rate": 3.145656698989694e-05, "loss": 0.0614, "step": 15110 }, { "epoch": 0.76, "learning_rate": 3.144387470173123e-05, "loss": 0.0377, "step": 15120 }, { "epoch": 0.76, "learning_rate": 3.143118241356552e-05, "loss": 0.15, "step": 15130 }, { "epoch": 0.76, "learning_rate": 3.141849012539981e-05, "loss": 0.0767, "step": 15140 }, { "epoch": 0.76, "learning_rate": 3.14057978372341e-05, "loss": 0.0501, "step": 15150 }, { "epoch": 0.76, "learning_rate": 3.139310554906838e-05, "loss": 0.0601, "step": 15160 }, { "epoch": 0.76, "learning_rate": 3.138041326090267e-05, "loss": 0.0287, "step": 15170 }, { "epoch": 0.76, "learning_rate": 3.136772097273697e-05, "loss": 0.1037, "step": 15180 }, { "epoch": 0.76, "learning_rate": 3.135502868457126e-05, "loss": 0.069, "step": 15190 }, { "epoch": 0.76, "learning_rate": 3.134233639640555e-05, "loss": 0.0316, "step": 15200 }, { "epoch": 0.76, "learning_rate": 3.132964410823984e-05, "loss": 0.049, "step": 15210 }, { "epoch": 0.76, "learning_rate": 3.131695182007412e-05, "loss": 0.0506, "step": 15220 }, { "epoch": 0.76, "learning_rate": 3.130425953190841e-05, "loss": 0.0355, "step": 15230 }, { "epoch": 0.76, "learning_rate": 3.12915672437427e-05, "loss": 0.0516, "step": 15240 }, { "epoch": 0.76, "learning_rate": 3.127887495557699e-05, "loss": 0.0344, "step": 15250 }, { "epoch": 0.77, "learning_rate": 3.126618266741128e-05, "loss": 0.0708, "step": 15260 }, { "epoch": 0.77, "learning_rate": 3.125349037924557e-05, "loss": 0.0893, "step": 15270 }, { "epoch": 0.77, "learning_rate": 3.124079809107986e-05, "loss": 0.0717, "step": 15280 }, { "epoch": 0.77, "learning_rate": 3.1228105802914146e-05, "loss": 0.0725, "step": 15290 }, { "epoch": 0.77, "learning_rate": 3.121541351474844e-05, "loss": 0.082, "step": 15300 }, { "epoch": 0.77, "learning_rate": 3.120272122658273e-05, "loss": 0.0852, "step": 15310 }, { "epoch": 0.77, "learning_rate": 3.119002893841702e-05, "loss": 0.057, "step": 15320 }, { "epoch": 0.77, "learning_rate": 3.117733665025131e-05, "loss": 0.0741, "step": 15330 }, { "epoch": 0.77, "learning_rate": 3.1164644362085596e-05, "loss": 0.064, "step": 15340 }, { "epoch": 0.77, "learning_rate": 3.1151952073919886e-05, "loss": 0.0653, "step": 15350 }, { "epoch": 0.77, "learning_rate": 3.1139259785754176e-05, "loss": 0.0559, "step": 15360 }, { "epoch": 0.77, "learning_rate": 3.1126567497588466e-05, "loss": 0.0752, "step": 15370 }, { "epoch": 0.77, "learning_rate": 3.1113875209422756e-05, "loss": 0.0683, "step": 15380 }, { "epoch": 0.77, "learning_rate": 3.1101182921257046e-05, "loss": 0.0569, "step": 15390 }, { "epoch": 0.77, "learning_rate": 3.1088490633091336e-05, "loss": 0.0486, "step": 15400 }, { "epoch": 0.77, "learning_rate": 3.1075798344925626e-05, "loss": 0.0268, "step": 15410 }, { "epoch": 0.77, "learning_rate": 3.1063106056759916e-05, "loss": 0.0539, "step": 15420 }, { "epoch": 0.77, "learning_rate": 3.1050413768594205e-05, "loss": 0.0359, "step": 15430 }, { "epoch": 0.77, "learning_rate": 3.1037721480428495e-05, "loss": 0.0876, "step": 15440 }, { "epoch": 0.77, "learning_rate": 3.1025029192262785e-05, "loss": 0.0473, "step": 15450 }, { "epoch": 0.78, "learning_rate": 3.1012336904097075e-05, "loss": 0.0351, "step": 15460 }, { "epoch": 0.78, "learning_rate": 3.099964461593136e-05, "loss": 0.0301, "step": 15470 }, { "epoch": 0.78, "learning_rate": 3.098695232776565e-05, "loss": 0.0685, "step": 15480 }, { "epoch": 0.78, "learning_rate": 3.097426003959994e-05, "loss": 0.0762, "step": 15490 }, { "epoch": 0.78, "learning_rate": 3.096156775143423e-05, "loss": 0.1065, "step": 15500 }, { "epoch": 0.78, "learning_rate": 3.094887546326852e-05, "loss": 0.0578, "step": 15510 }, { "epoch": 0.78, "learning_rate": 3.093618317510281e-05, "loss": 0.0612, "step": 15520 }, { "epoch": 0.78, "learning_rate": 3.09234908869371e-05, "loss": 0.0364, "step": 15530 }, { "epoch": 0.78, "learning_rate": 3.091079859877139e-05, "loss": 0.0373, "step": 15540 }, { "epoch": 0.78, "learning_rate": 3.089810631060568e-05, "loss": 0.0778, "step": 15550 }, { "epoch": 0.78, "learning_rate": 3.088541402243997e-05, "loss": 0.06, "step": 15560 }, { "epoch": 0.78, "learning_rate": 3.087272173427426e-05, "loss": 0.0325, "step": 15570 }, { "epoch": 0.78, "learning_rate": 3.086002944610855e-05, "loss": 0.0642, "step": 15580 }, { "epoch": 0.78, "learning_rate": 3.084733715794283e-05, "loss": 0.0481, "step": 15590 }, { "epoch": 0.78, "learning_rate": 3.083464486977712e-05, "loss": 0.0266, "step": 15600 }, { "epoch": 0.78, "learning_rate": 3.082195258161141e-05, "loss": 0.0761, "step": 15610 }, { "epoch": 0.78, "learning_rate": 3.08092602934457e-05, "loss": 0.029, "step": 15620 }, { "epoch": 0.78, "learning_rate": 3.079656800527999e-05, "loss": 0.0638, "step": 15630 }, { "epoch": 0.78, "learning_rate": 3.078387571711429e-05, "loss": 0.0782, "step": 15640 }, { "epoch": 0.78, "learning_rate": 3.077118342894857e-05, "loss": 0.017, "step": 15650 }, { "epoch": 0.79, "learning_rate": 3.075849114078286e-05, "loss": 0.0539, "step": 15660 }, { "epoch": 0.79, "learning_rate": 3.074579885261715e-05, "loss": 0.0354, "step": 15670 }, { "epoch": 0.79, "learning_rate": 3.073310656445144e-05, "loss": 0.0384, "step": 15680 }, { "epoch": 0.79, "learning_rate": 3.072041427628573e-05, "loss": 0.0495, "step": 15690 }, { "epoch": 0.79, "learning_rate": 3.070772198812002e-05, "loss": 0.034, "step": 15700 }, { "epoch": 0.79, "learning_rate": 3.069502969995431e-05, "loss": 0.0975, "step": 15710 }, { "epoch": 0.79, "learning_rate": 3.0682337411788595e-05, "loss": 0.0282, "step": 15720 }, { "epoch": 0.79, "learning_rate": 3.0669645123622885e-05, "loss": 0.0839, "step": 15730 }, { "epoch": 0.79, "learning_rate": 3.0656952835457175e-05, "loss": 0.0649, "step": 15740 }, { "epoch": 0.79, "learning_rate": 3.0644260547291465e-05, "loss": 0.1045, "step": 15750 }, { "epoch": 0.79, "learning_rate": 3.063156825912576e-05, "loss": 0.0463, "step": 15760 }, { "epoch": 0.79, "learning_rate": 3.0618875970960045e-05, "loss": 0.114, "step": 15770 }, { "epoch": 0.79, "learning_rate": 3.0606183682794335e-05, "loss": 0.1338, "step": 15780 }, { "epoch": 0.79, "learning_rate": 3.0593491394628625e-05, "loss": 0.0479, "step": 15790 }, { "epoch": 0.79, "learning_rate": 3.0580799106462915e-05, "loss": 0.0255, "step": 15800 }, { "epoch": 0.79, "learning_rate": 3.0568106818297205e-05, "loss": 0.0386, "step": 15810 }, { "epoch": 0.79, "learning_rate": 3.0555414530131495e-05, "loss": 0.0218, "step": 15820 }, { "epoch": 0.79, "learning_rate": 3.0542722241965785e-05, "loss": 0.1023, "step": 15830 }, { "epoch": 0.79, "learning_rate": 3.053002995380007e-05, "loss": 0.0725, "step": 15840 }, { "epoch": 0.79, "learning_rate": 3.051733766563436e-05, "loss": 0.0427, "step": 15850 }, { "epoch": 0.8, "learning_rate": 3.0504645377468648e-05, "loss": 0.0495, "step": 15860 }, { "epoch": 0.8, "learning_rate": 3.0491953089302945e-05, "loss": 0.0775, "step": 15870 }, { "epoch": 0.8, "learning_rate": 3.047926080113723e-05, "loss": 0.0252, "step": 15880 }, { "epoch": 0.8, "learning_rate": 3.046656851297152e-05, "loss": 0.052, "step": 15890 }, { "epoch": 0.8, "learning_rate": 3.045387622480581e-05, "loss": 0.0772, "step": 15900 }, { "epoch": 0.8, "learning_rate": 3.04411839366401e-05, "loss": 0.0278, "step": 15910 }, { "epoch": 0.8, "learning_rate": 3.0428491648474388e-05, "loss": 0.0828, "step": 15920 }, { "epoch": 0.8, "learning_rate": 3.0415799360308678e-05, "loss": 0.0436, "step": 15930 }, { "epoch": 0.8, "learning_rate": 3.0403107072142968e-05, "loss": 0.0809, "step": 15940 }, { "epoch": 0.8, "learning_rate": 3.0390414783977254e-05, "loss": 0.0461, "step": 15950 }, { "epoch": 0.8, "learning_rate": 3.0377722495811544e-05, "loss": 0.1274, "step": 15960 }, { "epoch": 0.8, "learning_rate": 3.0365030207645834e-05, "loss": 0.048, "step": 15970 }, { "epoch": 0.8, "learning_rate": 3.035233791948012e-05, "loss": 0.0935, "step": 15980 }, { "epoch": 0.8, "learning_rate": 3.0339645631314418e-05, "loss": 0.0496, "step": 15990 }, { "epoch": 0.8, "learning_rate": 3.0326953343148708e-05, "loss": 0.0392, "step": 16000 }, { "epoch": 0.8, "learning_rate": 3.0314261054982994e-05, "loss": 0.0878, "step": 16010 }, { "epoch": 0.8, "learning_rate": 3.0301568766817284e-05, "loss": 0.0409, "step": 16020 }, { "epoch": 0.8, "learning_rate": 3.0288876478651574e-05, "loss": 0.0356, "step": 16030 }, { "epoch": 0.8, "learning_rate": 3.027618419048586e-05, "loss": 0.061, "step": 16040 }, { "epoch": 0.8, "learning_rate": 3.026349190232015e-05, "loss": 0.0442, "step": 16050 }, { "epoch": 0.81, "learning_rate": 3.025079961415444e-05, "loss": 0.0673, "step": 16060 }, { "epoch": 0.81, "learning_rate": 3.023810732598873e-05, "loss": 0.0205, "step": 16070 }, { "epoch": 0.81, "learning_rate": 3.0225415037823018e-05, "loss": 0.0435, "step": 16080 }, { "epoch": 0.81, "learning_rate": 3.0212722749657308e-05, "loss": 0.0628, "step": 16090 }, { "epoch": 0.81, "learning_rate": 3.02000304614916e-05, "loss": 0.0501, "step": 16100 }, { "epoch": 0.81, "learning_rate": 3.018733817332589e-05, "loss": 0.0549, "step": 16110 }, { "epoch": 0.81, "learning_rate": 3.017464588516018e-05, "loss": 0.1053, "step": 16120 }, { "epoch": 0.81, "learning_rate": 3.0161953596994468e-05, "loss": 0.1015, "step": 16130 }, { "epoch": 0.81, "learning_rate": 3.0149261308828758e-05, "loss": 0.033, "step": 16140 }, { "epoch": 0.81, "learning_rate": 3.0136569020663048e-05, "loss": 0.1144, "step": 16150 }, { "epoch": 0.81, "learning_rate": 3.0123876732497338e-05, "loss": 0.0901, "step": 16160 }, { "epoch": 0.81, "learning_rate": 3.0111184444331624e-05, "loss": 0.0519, "step": 16170 }, { "epoch": 0.81, "learning_rate": 3.0098492156165914e-05, "loss": 0.1012, "step": 16180 }, { "epoch": 0.81, "learning_rate": 3.0085799868000204e-05, "loss": 0.0401, "step": 16190 }, { "epoch": 0.81, "learning_rate": 3.007310757983449e-05, "loss": 0.0613, "step": 16200 }, { "epoch": 0.81, "learning_rate": 3.006041529166878e-05, "loss": 0.057, "step": 16210 }, { "epoch": 0.81, "learning_rate": 3.0047723003503074e-05, "loss": 0.0727, "step": 16220 }, { "epoch": 0.81, "learning_rate": 3.0035030715337364e-05, "loss": 0.0619, "step": 16230 }, { "epoch": 0.81, "learning_rate": 3.0022338427171654e-05, "loss": 0.1002, "step": 16240 }, { "epoch": 0.81, "learning_rate": 3.0009646139005944e-05, "loss": 0.0538, "step": 16250 }, { "epoch": 0.82, "learning_rate": 2.999695385084023e-05, "loss": 0.0507, "step": 16260 }, { "epoch": 0.82, "learning_rate": 2.998426156267452e-05, "loss": 0.0668, "step": 16270 }, { "epoch": 0.82, "learning_rate": 2.997156927450881e-05, "loss": 0.0746, "step": 16280 }, { "epoch": 0.82, "learning_rate": 2.9958876986343097e-05, "loss": 0.0921, "step": 16290 }, { "epoch": 0.82, "learning_rate": 2.9946184698177387e-05, "loss": 0.0514, "step": 16300 }, { "epoch": 0.82, "learning_rate": 2.9933492410011677e-05, "loss": 0.0563, "step": 16310 }, { "epoch": 0.82, "learning_rate": 2.9920800121845964e-05, "loss": 0.0882, "step": 16320 }, { "epoch": 0.82, "learning_rate": 2.990810783368026e-05, "loss": 0.0536, "step": 16330 }, { "epoch": 0.82, "learning_rate": 2.989541554551455e-05, "loss": 0.0796, "step": 16340 }, { "epoch": 0.82, "learning_rate": 2.9882723257348837e-05, "loss": 0.0919, "step": 16350 }, { "epoch": 0.82, "learning_rate": 2.9870030969183127e-05, "loss": 0.0439, "step": 16360 }, { "epoch": 0.82, "learning_rate": 2.9857338681017417e-05, "loss": 0.0337, "step": 16370 }, { "epoch": 0.82, "learning_rate": 2.9844646392851704e-05, "loss": 0.058, "step": 16380 }, { "epoch": 0.82, "learning_rate": 2.9831954104685994e-05, "loss": 0.086, "step": 16390 }, { "epoch": 0.82, "learning_rate": 2.9819261816520284e-05, "loss": 0.0325, "step": 16400 }, { "epoch": 0.82, "learning_rate": 2.9806569528354574e-05, "loss": 0.0604, "step": 16410 }, { "epoch": 0.82, "learning_rate": 2.979387724018886e-05, "loss": 0.0789, "step": 16420 }, { "epoch": 0.82, "learning_rate": 2.978118495202315e-05, "loss": 0.0426, "step": 16430 }, { "epoch": 0.82, "learning_rate": 2.976849266385744e-05, "loss": 0.039, "step": 16440 }, { "epoch": 0.82, "learning_rate": 2.9755800375691734e-05, "loss": 0.0481, "step": 16450 }, { "epoch": 0.83, "learning_rate": 2.9743108087526024e-05, "loss": 0.082, "step": 16460 }, { "epoch": 0.83, "learning_rate": 2.973041579936031e-05, "loss": 0.0317, "step": 16470 }, { "epoch": 0.83, "learning_rate": 2.97177235111946e-05, "loss": 0.0251, "step": 16480 }, { "epoch": 0.83, "learning_rate": 2.970503122302889e-05, "loss": 0.0352, "step": 16490 }, { "epoch": 0.83, "learning_rate": 2.969233893486318e-05, "loss": 0.0547, "step": 16500 }, { "epoch": 0.83, "learning_rate": 2.9679646646697467e-05, "loss": 0.0895, "step": 16510 }, { "epoch": 0.83, "learning_rate": 2.9666954358531757e-05, "loss": 0.0993, "step": 16520 }, { "epoch": 0.83, "learning_rate": 2.9654262070366047e-05, "loss": 0.0424, "step": 16530 }, { "epoch": 0.83, "learning_rate": 2.9641569782200333e-05, "loss": 0.046, "step": 16540 }, { "epoch": 0.83, "learning_rate": 2.9628877494034623e-05, "loss": 0.0339, "step": 16550 }, { "epoch": 0.83, "learning_rate": 2.9616185205868917e-05, "loss": 0.0604, "step": 16560 }, { "epoch": 0.83, "learning_rate": 2.9603492917703207e-05, "loss": 0.0806, "step": 16570 }, { "epoch": 0.83, "learning_rate": 2.9590800629537497e-05, "loss": 0.0616, "step": 16580 }, { "epoch": 0.83, "learning_rate": 2.9578108341371787e-05, "loss": 0.0827, "step": 16590 }, { "epoch": 0.83, "learning_rate": 2.9565416053206073e-05, "loss": 0.0562, "step": 16600 }, { "epoch": 0.83, "learning_rate": 2.9552723765040363e-05, "loss": 0.0466, "step": 16610 }, { "epoch": 0.83, "learning_rate": 2.9540031476874653e-05, "loss": 0.0592, "step": 16620 }, { "epoch": 0.83, "learning_rate": 2.952733918870894e-05, "loss": 0.0317, "step": 16630 }, { "epoch": 0.83, "learning_rate": 2.951464690054323e-05, "loss": 0.0747, "step": 16640 }, { "epoch": 0.83, "learning_rate": 2.950195461237752e-05, "loss": 0.0535, "step": 16650 }, { "epoch": 0.84, "learning_rate": 2.948926232421181e-05, "loss": 0.026, "step": 16660 }, { "epoch": 0.84, "learning_rate": 2.9476570036046097e-05, "loss": 0.0394, "step": 16670 }, { "epoch": 0.84, "learning_rate": 2.9463877747880393e-05, "loss": 0.0687, "step": 16680 }, { "epoch": 0.84, "learning_rate": 2.945118545971468e-05, "loss": 0.0313, "step": 16690 }, { "epoch": 0.84, "learning_rate": 2.943849317154897e-05, "loss": 0.0349, "step": 16700 }, { "epoch": 0.84, "learning_rate": 2.942580088338326e-05, "loss": 0.0454, "step": 16710 }, { "epoch": 0.84, "learning_rate": 2.9413108595217546e-05, "loss": 0.0313, "step": 16720 }, { "epoch": 0.84, "learning_rate": 2.9400416307051836e-05, "loss": 0.0726, "step": 16730 }, { "epoch": 0.84, "learning_rate": 2.9387724018886126e-05, "loss": 0.0752, "step": 16740 }, { "epoch": 0.84, "learning_rate": 2.9375031730720416e-05, "loss": 0.0598, "step": 16750 }, { "epoch": 0.84, "learning_rate": 2.9362339442554703e-05, "loss": 0.0336, "step": 16760 }, { "epoch": 0.84, "learning_rate": 2.9349647154388993e-05, "loss": 0.078, "step": 16770 }, { "epoch": 0.84, "learning_rate": 2.9336954866223283e-05, "loss": 0.0585, "step": 16780 }, { "epoch": 0.84, "learning_rate": 2.9324262578057576e-05, "loss": 0.0601, "step": 16790 }, { "epoch": 0.84, "learning_rate": 2.9311570289891866e-05, "loss": 0.0228, "step": 16800 }, { "epoch": 0.84, "learning_rate": 2.9298878001726153e-05, "loss": 0.0265, "step": 16810 }, { "epoch": 0.84, "learning_rate": 2.9286185713560443e-05, "loss": 0.0425, "step": 16820 }, { "epoch": 0.84, "learning_rate": 2.9273493425394733e-05, "loss": 0.052, "step": 16830 }, { "epoch": 0.84, "learning_rate": 2.9260801137229023e-05, "loss": 0.0323, "step": 16840 }, { "epoch": 0.84, "learning_rate": 2.924810884906331e-05, "loss": 0.0857, "step": 16850 }, { "epoch": 0.85, "learning_rate": 2.92354165608976e-05, "loss": 0.0755, "step": 16860 }, { "epoch": 0.85, "learning_rate": 2.922272427273189e-05, "loss": 0.0751, "step": 16870 }, { "epoch": 0.85, "learning_rate": 2.9210031984566176e-05, "loss": 0.0553, "step": 16880 }, { "epoch": 0.85, "learning_rate": 2.9197339696400466e-05, "loss": 0.0557, "step": 16890 }, { "epoch": 0.85, "learning_rate": 2.9184647408234756e-05, "loss": 0.0634, "step": 16900 }, { "epoch": 0.85, "learning_rate": 2.917195512006905e-05, "loss": 0.0532, "step": 16910 }, { "epoch": 0.85, "learning_rate": 2.915926283190334e-05, "loss": 0.0857, "step": 16920 }, { "epoch": 0.85, "learning_rate": 2.914657054373763e-05, "loss": 0.1036, "step": 16930 }, { "epoch": 0.85, "learning_rate": 2.9133878255571916e-05, "loss": 0.1134, "step": 16940 }, { "epoch": 0.85, "learning_rate": 2.9121185967406206e-05, "loss": 0.086, "step": 16950 }, { "epoch": 0.85, "learning_rate": 2.9108493679240496e-05, "loss": 0.0355, "step": 16960 }, { "epoch": 0.85, "learning_rate": 2.9095801391074783e-05, "loss": 0.0568, "step": 16970 }, { "epoch": 0.85, "learning_rate": 2.9083109102909073e-05, "loss": 0.0595, "step": 16980 }, { "epoch": 0.85, "learning_rate": 2.9070416814743363e-05, "loss": 0.1058, "step": 16990 }, { "epoch": 0.85, "learning_rate": 2.9057724526577653e-05, "loss": 0.0847, "step": 17000 }, { "epoch": 0.85, "learning_rate": 2.904503223841194e-05, "loss": 0.088, "step": 17010 }, { "epoch": 0.85, "learning_rate": 2.9032339950246236e-05, "loss": 0.0701, "step": 17020 }, { "epoch": 0.85, "learning_rate": 2.9019647662080523e-05, "loss": 0.0694, "step": 17030 }, { "epoch": 0.85, "learning_rate": 2.9006955373914813e-05, "loss": 0.0574, "step": 17040 }, { "epoch": 0.85, "learning_rate": 2.8994263085749103e-05, "loss": 0.0664, "step": 17050 }, { "epoch": 0.86, "learning_rate": 2.898157079758339e-05, "loss": 0.0653, "step": 17060 }, { "epoch": 0.86, "learning_rate": 2.896887850941768e-05, "loss": 0.078, "step": 17070 }, { "epoch": 0.86, "learning_rate": 2.895618622125197e-05, "loss": 0.0778, "step": 17080 }, { "epoch": 0.86, "learning_rate": 2.894349393308626e-05, "loss": 0.0736, "step": 17090 }, { "epoch": 0.86, "learning_rate": 2.8930801644920546e-05, "loss": 0.063, "step": 17100 }, { "epoch": 0.86, "learning_rate": 2.8918109356754836e-05, "loss": 0.0427, "step": 17110 }, { "epoch": 0.86, "learning_rate": 2.8905417068589126e-05, "loss": 0.0371, "step": 17120 }, { "epoch": 0.86, "learning_rate": 2.8892724780423412e-05, "loss": 0.0746, "step": 17130 }, { "epoch": 0.86, "learning_rate": 2.888003249225771e-05, "loss": 0.0578, "step": 17140 }, { "epoch": 0.86, "learning_rate": 2.8867340204091996e-05, "loss": 0.0961, "step": 17150 }, { "epoch": 0.86, "learning_rate": 2.8854647915926286e-05, "loss": 0.0481, "step": 17160 }, { "epoch": 0.86, "learning_rate": 2.8841955627760576e-05, "loss": 0.036, "step": 17170 }, { "epoch": 0.86, "learning_rate": 2.8829263339594866e-05, "loss": 0.0501, "step": 17180 }, { "epoch": 0.86, "learning_rate": 2.8816571051429152e-05, "loss": 0.0408, "step": 17190 }, { "epoch": 0.86, "learning_rate": 2.8803878763263442e-05, "loss": 0.0623, "step": 17200 }, { "epoch": 0.86, "learning_rate": 2.8791186475097732e-05, "loss": 0.0569, "step": 17210 }, { "epoch": 0.86, "learning_rate": 2.877849418693202e-05, "loss": 0.1149, "step": 17220 }, { "epoch": 0.86, "learning_rate": 2.876580189876631e-05, "loss": 0.0589, "step": 17230 }, { "epoch": 0.86, "learning_rate": 2.87531096106006e-05, "loss": 0.062, "step": 17240 }, { "epoch": 0.86, "learning_rate": 2.8740417322434892e-05, "loss": 0.0307, "step": 17250 }, { "epoch": 0.87, "learning_rate": 2.8727725034269182e-05, "loss": 0.0572, "step": 17260 }, { "epoch": 0.87, "learning_rate": 2.8715032746103472e-05, "loss": 0.0694, "step": 17270 }, { "epoch": 0.87, "learning_rate": 2.870234045793776e-05, "loss": 0.0951, "step": 17280 }, { "epoch": 0.87, "learning_rate": 2.868964816977205e-05, "loss": 0.0687, "step": 17290 }, { "epoch": 0.87, "learning_rate": 2.867695588160634e-05, "loss": 0.0738, "step": 17300 }, { "epoch": 0.87, "learning_rate": 2.8664263593440625e-05, "loss": 0.0745, "step": 17310 }, { "epoch": 0.87, "learning_rate": 2.8651571305274915e-05, "loss": 0.1048, "step": 17320 }, { "epoch": 0.87, "learning_rate": 2.8638879017109205e-05, "loss": 0.0224, "step": 17330 }, { "epoch": 0.87, "learning_rate": 2.8626186728943495e-05, "loss": 0.0431, "step": 17340 }, { "epoch": 0.87, "learning_rate": 2.8613494440777782e-05, "loss": 0.0482, "step": 17350 }, { "epoch": 0.87, "learning_rate": 2.8600802152612072e-05, "loss": 0.0589, "step": 17360 }, { "epoch": 0.87, "learning_rate": 2.8588109864446365e-05, "loss": 0.0495, "step": 17370 }, { "epoch": 0.87, "learning_rate": 2.8575417576280655e-05, "loss": 0.038, "step": 17380 }, { "epoch": 0.87, "learning_rate": 2.8562725288114945e-05, "loss": 0.0535, "step": 17390 }, { "epoch": 0.87, "learning_rate": 2.8550032999949232e-05, "loss": 0.0671, "step": 17400 }, { "epoch": 0.87, "learning_rate": 2.8537340711783522e-05, "loss": 0.0588, "step": 17410 }, { "epoch": 0.87, "learning_rate": 2.8524648423617812e-05, "loss": 0.0457, "step": 17420 }, { "epoch": 0.87, "learning_rate": 2.8511956135452102e-05, "loss": 0.0579, "step": 17430 }, { "epoch": 0.87, "learning_rate": 2.849926384728639e-05, "loss": 0.0309, "step": 17440 }, { "epoch": 0.87, "learning_rate": 2.848657155912068e-05, "loss": 0.0689, "step": 17450 }, { "epoch": 0.88, "learning_rate": 2.847387927095497e-05, "loss": 0.0595, "step": 17460 }, { "epoch": 0.88, "learning_rate": 2.8461186982789255e-05, "loss": 0.0607, "step": 17470 }, { "epoch": 0.88, "learning_rate": 2.8448494694623545e-05, "loss": 0.0391, "step": 17480 }, { "epoch": 0.88, "learning_rate": 2.843580240645784e-05, "loss": 0.0371, "step": 17490 }, { "epoch": 0.88, "learning_rate": 2.842311011829213e-05, "loss": 0.0596, "step": 17500 }, { "epoch": 0.88, "learning_rate": 2.841041783012642e-05, "loss": 0.0692, "step": 17510 }, { "epoch": 0.88, "learning_rate": 2.839772554196071e-05, "loss": 0.08, "step": 17520 }, { "epoch": 0.88, "learning_rate": 2.8385033253794995e-05, "loss": 0.0669, "step": 17530 }, { "epoch": 0.88, "learning_rate": 2.8372340965629285e-05, "loss": 0.1356, "step": 17540 }, { "epoch": 0.88, "learning_rate": 2.8359648677463575e-05, "loss": 0.0382, "step": 17550 }, { "epoch": 0.88, "learning_rate": 2.834695638929786e-05, "loss": 0.0422, "step": 17560 }, { "epoch": 0.88, "learning_rate": 2.833426410113215e-05, "loss": 0.0129, "step": 17570 }, { "epoch": 0.88, "learning_rate": 2.832157181296644e-05, "loss": 0.0265, "step": 17580 }, { "epoch": 0.88, "learning_rate": 2.830887952480073e-05, "loss": 0.1166, "step": 17590 }, { "epoch": 0.88, "learning_rate": 2.8296187236635025e-05, "loss": 0.0378, "step": 17600 }, { "epoch": 0.88, "learning_rate": 2.8283494948469315e-05, "loss": 0.0269, "step": 17610 }, { "epoch": 0.88, "learning_rate": 2.82708026603036e-05, "loss": 0.0838, "step": 17620 }, { "epoch": 0.88, "learning_rate": 2.825811037213789e-05, "loss": 0.0409, "step": 17630 }, { "epoch": 0.88, "learning_rate": 2.824541808397218e-05, "loss": 0.1202, "step": 17640 }, { "epoch": 0.88, "learning_rate": 2.8232725795806468e-05, "loss": 0.0406, "step": 17650 }, { "epoch": 0.89, "learning_rate": 2.8220033507640758e-05, "loss": 0.0261, "step": 17660 }, { "epoch": 0.89, "learning_rate": 2.8207341219475048e-05, "loss": 0.1117, "step": 17670 }, { "epoch": 0.89, "learning_rate": 2.8194648931309338e-05, "loss": 0.0519, "step": 17680 }, { "epoch": 0.89, "learning_rate": 2.8181956643143625e-05, "loss": 0.0554, "step": 17690 }, { "epoch": 0.89, "learning_rate": 2.8169264354977915e-05, "loss": 0.0472, "step": 17700 }, { "epoch": 0.89, "learning_rate": 2.8156572066812205e-05, "loss": 0.0619, "step": 17710 }, { "epoch": 0.89, "learning_rate": 2.8143879778646498e-05, "loss": 0.0507, "step": 17720 }, { "epoch": 0.89, "learning_rate": 2.8131187490480788e-05, "loss": 0.0719, "step": 17730 }, { "epoch": 0.89, "learning_rate": 2.8118495202315075e-05, "loss": 0.036, "step": 17740 }, { "epoch": 0.89, "learning_rate": 2.8105802914149365e-05, "loss": 0.0687, "step": 17750 }, { "epoch": 0.89, "learning_rate": 2.8093110625983655e-05, "loss": 0.0498, "step": 17760 }, { "epoch": 0.89, "learning_rate": 2.8080418337817945e-05, "loss": 0.062, "step": 17770 }, { "epoch": 0.89, "learning_rate": 2.806772604965223e-05, "loss": 0.0614, "step": 17780 }, { "epoch": 0.89, "learning_rate": 2.805503376148652e-05, "loss": 0.0645, "step": 17790 }, { "epoch": 0.89, "learning_rate": 2.804234147332081e-05, "loss": 0.0691, "step": 17800 }, { "epoch": 0.89, "learning_rate": 2.8029649185155098e-05, "loss": 0.0856, "step": 17810 }, { "epoch": 0.89, "learning_rate": 2.8016956896989388e-05, "loss": 0.0545, "step": 17820 }, { "epoch": 0.89, "learning_rate": 2.800426460882368e-05, "loss": 0.1081, "step": 17830 }, { "epoch": 0.89, "learning_rate": 2.799157232065797e-05, "loss": 0.0541, "step": 17840 }, { "epoch": 0.89, "learning_rate": 2.797888003249226e-05, "loss": 0.0655, "step": 17850 }, { "epoch": 0.9, "learning_rate": 2.796618774432655e-05, "loss": 0.067, "step": 17860 }, { "epoch": 0.9, "learning_rate": 2.7953495456160838e-05, "loss": 0.0424, "step": 17870 }, { "epoch": 0.9, "learning_rate": 2.7940803167995128e-05, "loss": 0.0522, "step": 17880 }, { "epoch": 0.9, "learning_rate": 2.7928110879829418e-05, "loss": 0.1039, "step": 17890 }, { "epoch": 0.9, "learning_rate": 2.7915418591663704e-05, "loss": 0.0572, "step": 17900 }, { "epoch": 0.9, "learning_rate": 2.7902726303497994e-05, "loss": 0.086, "step": 17910 }, { "epoch": 0.9, "learning_rate": 2.7890034015332284e-05, "loss": 0.0393, "step": 17920 }, { "epoch": 0.9, "learning_rate": 2.7877341727166574e-05, "loss": 0.1095, "step": 17930 }, { "epoch": 0.9, "learning_rate": 2.786464943900086e-05, "loss": 0.1186, "step": 17940 }, { "epoch": 0.9, "learning_rate": 2.7851957150835158e-05, "loss": 0.0698, "step": 17950 }, { "epoch": 0.9, "learning_rate": 2.7839264862669444e-05, "loss": 0.0493, "step": 17960 }, { "epoch": 0.9, "learning_rate": 2.7826572574503734e-05, "loss": 0.0855, "step": 17970 }, { "epoch": 0.9, "learning_rate": 2.7813880286338024e-05, "loss": 0.051, "step": 17980 }, { "epoch": 0.9, "learning_rate": 2.780118799817231e-05, "loss": 0.0388, "step": 17990 }, { "epoch": 0.9, "learning_rate": 2.77884957100066e-05, "loss": 0.0539, "step": 18000 }, { "epoch": 0.9, "learning_rate": 2.777580342184089e-05, "loss": 0.0231, "step": 18010 }, { "epoch": 0.9, "learning_rate": 2.776311113367518e-05, "loss": 0.0357, "step": 18020 }, { "epoch": 0.9, "learning_rate": 2.7750418845509467e-05, "loss": 0.0527, "step": 18030 }, { "epoch": 0.9, "learning_rate": 2.7737726557343757e-05, "loss": 0.0536, "step": 18040 }, { "epoch": 0.9, "learning_rate": 2.7725034269178047e-05, "loss": 0.0759, "step": 18050 }, { "epoch": 0.91, "learning_rate": 2.771234198101234e-05, "loss": 0.0484, "step": 18060 }, { "epoch": 0.91, "learning_rate": 2.769964969284663e-05, "loss": 0.039, "step": 18070 }, { "epoch": 0.91, "learning_rate": 2.7686957404680917e-05, "loss": 0.1022, "step": 18080 }, { "epoch": 0.91, "learning_rate": 2.7674265116515207e-05, "loss": 0.0766, "step": 18090 }, { "epoch": 0.91, "learning_rate": 2.7661572828349497e-05, "loss": 0.0717, "step": 18100 }, { "epoch": 0.91, "learning_rate": 2.7648880540183787e-05, "loss": 0.0412, "step": 18110 }, { "epoch": 0.91, "learning_rate": 2.7636188252018074e-05, "loss": 0.0314, "step": 18120 }, { "epoch": 0.91, "learning_rate": 2.7623495963852364e-05, "loss": 0.1084, "step": 18130 }, { "epoch": 0.91, "learning_rate": 2.7610803675686654e-05, "loss": 0.0898, "step": 18140 }, { "epoch": 0.91, "learning_rate": 2.759811138752094e-05, "loss": 0.0155, "step": 18150 }, { "epoch": 0.91, "learning_rate": 2.758541909935523e-05, "loss": 0.0429, "step": 18160 }, { "epoch": 0.91, "learning_rate": 2.757272681118952e-05, "loss": 0.1057, "step": 18170 }, { "epoch": 0.91, "learning_rate": 2.7560034523023814e-05, "loss": 0.13, "step": 18180 }, { "epoch": 0.91, "learning_rate": 2.7547342234858104e-05, "loss": 0.0794, "step": 18190 }, { "epoch": 0.91, "learning_rate": 2.7534649946692394e-05, "loss": 0.0333, "step": 18200 }, { "epoch": 0.91, "learning_rate": 2.752195765852668e-05, "loss": 0.0459, "step": 18210 }, { "epoch": 0.91, "learning_rate": 2.750926537036097e-05, "loss": 0.0896, "step": 18220 }, { "epoch": 0.91, "learning_rate": 2.749657308219526e-05, "loss": 0.0471, "step": 18230 }, { "epoch": 0.91, "learning_rate": 2.7483880794029547e-05, "loss": 0.0651, "step": 18240 }, { "epoch": 0.91, "learning_rate": 2.7471188505863837e-05, "loss": 0.0543, "step": 18250 }, { "epoch": 0.92, "learning_rate": 2.7458496217698127e-05, "loss": 0.0644, "step": 18260 }, { "epoch": 0.92, "learning_rate": 2.7445803929532417e-05, "loss": 0.0499, "step": 18270 }, { "epoch": 0.92, "learning_rate": 2.7433111641366704e-05, "loss": 0.0353, "step": 18280 }, { "epoch": 0.92, "learning_rate": 2.7420419353201e-05, "loss": 0.0628, "step": 18290 }, { "epoch": 0.92, "learning_rate": 2.7407727065035287e-05, "loss": 0.0568, "step": 18300 }, { "epoch": 0.92, "learning_rate": 2.7395034776869577e-05, "loss": 0.0366, "step": 18310 }, { "epoch": 0.92, "learning_rate": 2.7382342488703867e-05, "loss": 0.0607, "step": 18320 }, { "epoch": 0.92, "learning_rate": 2.7369650200538154e-05, "loss": 0.1289, "step": 18330 }, { "epoch": 0.92, "learning_rate": 2.7356957912372444e-05, "loss": 0.0244, "step": 18340 }, { "epoch": 0.92, "learning_rate": 2.7344265624206734e-05, "loss": 0.031, "step": 18350 }, { "epoch": 0.92, "learning_rate": 2.7331573336041024e-05, "loss": 0.0128, "step": 18360 }, { "epoch": 0.92, "learning_rate": 2.731888104787531e-05, "loss": 0.1098, "step": 18370 }, { "epoch": 0.92, "learning_rate": 2.73061887597096e-05, "loss": 0.0764, "step": 18380 }, { "epoch": 0.92, "learning_rate": 2.729349647154389e-05, "loss": 0.1127, "step": 18390 }, { "epoch": 0.92, "learning_rate": 2.7280804183378177e-05, "loss": 0.0353, "step": 18400 }, { "epoch": 0.92, "learning_rate": 2.7268111895212473e-05, "loss": 0.0282, "step": 18410 }, { "epoch": 0.92, "learning_rate": 2.725541960704676e-05, "loss": 0.0751, "step": 18420 }, { "epoch": 0.92, "learning_rate": 2.724272731888105e-05, "loss": 0.0449, "step": 18430 }, { "epoch": 0.92, "learning_rate": 2.723003503071534e-05, "loss": 0.0539, "step": 18440 }, { "epoch": 0.92, "learning_rate": 2.721734274254963e-05, "loss": 0.0962, "step": 18450 }, { "epoch": 0.93, "learning_rate": 2.7204650454383917e-05, "loss": 0.0982, "step": 18460 }, { "epoch": 0.93, "learning_rate": 2.7191958166218207e-05, "loss": 0.0567, "step": 18470 }, { "epoch": 0.93, "learning_rate": 2.7179265878052497e-05, "loss": 0.0973, "step": 18480 }, { "epoch": 0.93, "learning_rate": 2.7166573589886783e-05, "loss": 0.0715, "step": 18490 }, { "epoch": 0.93, "learning_rate": 2.7153881301721073e-05, "loss": 0.0306, "step": 18500 }, { "epoch": 0.93, "learning_rate": 2.7141189013555363e-05, "loss": 0.0369, "step": 18510 }, { "epoch": 0.93, "learning_rate": 2.7128496725389657e-05, "loss": 0.0478, "step": 18520 }, { "epoch": 0.93, "learning_rate": 2.7115804437223947e-05, "loss": 0.0308, "step": 18530 }, { "epoch": 0.93, "learning_rate": 2.7103112149058237e-05, "loss": 0.0264, "step": 18540 }, { "epoch": 0.93, "learning_rate": 2.7090419860892523e-05, "loss": 0.0386, "step": 18550 }, { "epoch": 0.93, "learning_rate": 2.7077727572726813e-05, "loss": 0.0486, "step": 18560 }, { "epoch": 0.93, "learning_rate": 2.7065035284561103e-05, "loss": 0.0685, "step": 18570 }, { "epoch": 0.93, "learning_rate": 2.705234299639539e-05, "loss": 0.0729, "step": 18580 }, { "epoch": 0.93, "learning_rate": 2.703965070822968e-05, "loss": 0.0481, "step": 18590 }, { "epoch": 0.93, "learning_rate": 2.702695842006397e-05, "loss": 0.052, "step": 18600 }, { "epoch": 0.93, "learning_rate": 2.701426613189826e-05, "loss": 0.0499, "step": 18610 }, { "epoch": 0.93, "learning_rate": 2.7001573843732546e-05, "loss": 0.0666, "step": 18620 }, { "epoch": 0.93, "learning_rate": 2.6988881555566836e-05, "loss": 0.0743, "step": 18630 }, { "epoch": 0.93, "learning_rate": 2.697618926740113e-05, "loss": 0.0475, "step": 18640 }, { "epoch": 0.93, "learning_rate": 2.696349697923542e-05, "loss": 0.0607, "step": 18650 }, { "epoch": 0.94, "learning_rate": 2.695080469106971e-05, "loss": 0.0366, "step": 18660 }, { "epoch": 0.94, "learning_rate": 2.6938112402903996e-05, "loss": 0.0724, "step": 18670 }, { "epoch": 0.94, "learning_rate": 2.6925420114738286e-05, "loss": 0.0424, "step": 18680 }, { "epoch": 0.94, "learning_rate": 2.6912727826572576e-05, "loss": 0.0661, "step": 18690 }, { "epoch": 0.94, "learning_rate": 2.6900035538406866e-05, "loss": 0.0364, "step": 18700 }, { "epoch": 0.94, "learning_rate": 2.6887343250241153e-05, "loss": 0.0718, "step": 18710 }, { "epoch": 0.94, "learning_rate": 2.6874650962075443e-05, "loss": 0.07, "step": 18720 }, { "epoch": 0.94, "learning_rate": 2.6861958673909733e-05, "loss": 0.0955, "step": 18730 }, { "epoch": 0.94, "learning_rate": 2.684926638574402e-05, "loss": 0.0729, "step": 18740 }, { "epoch": 0.94, "learning_rate": 2.6836574097578316e-05, "loss": 0.0628, "step": 18750 }, { "epoch": 0.94, "learning_rate": 2.6823881809412603e-05, "loss": 0.0262, "step": 18760 }, { "epoch": 0.94, "learning_rate": 2.6811189521246893e-05, "loss": 0.0797, "step": 18770 }, { "epoch": 0.94, "learning_rate": 2.6798497233081183e-05, "loss": 0.0491, "step": 18780 }, { "epoch": 0.94, "learning_rate": 2.6785804944915473e-05, "loss": 0.0134, "step": 18790 }, { "epoch": 0.94, "learning_rate": 2.677311265674976e-05, "loss": 0.1239, "step": 18800 }, { "epoch": 0.94, "learning_rate": 2.676042036858405e-05, "loss": 0.1611, "step": 18810 }, { "epoch": 0.94, "learning_rate": 2.674772808041834e-05, "loss": 0.1042, "step": 18820 }, { "epoch": 0.94, "learning_rate": 2.6735035792252626e-05, "loss": 0.0412, "step": 18830 }, { "epoch": 0.94, "learning_rate": 2.6722343504086916e-05, "loss": 0.0696, "step": 18840 }, { "epoch": 0.95, "learning_rate": 2.6709651215921206e-05, "loss": 0.0682, "step": 18850 }, { "epoch": 0.95, "learning_rate": 2.6696958927755496e-05, "loss": 0.0696, "step": 18860 }, { "epoch": 0.95, "learning_rate": 2.668426663958979e-05, "loss": 0.0697, "step": 18870 }, { "epoch": 0.95, "learning_rate": 2.667157435142408e-05, "loss": 0.0872, "step": 18880 }, { "epoch": 0.95, "learning_rate": 2.6658882063258366e-05, "loss": 0.0649, "step": 18890 }, { "epoch": 0.95, "learning_rate": 2.6646189775092656e-05, "loss": 0.0633, "step": 18900 }, { "epoch": 0.95, "learning_rate": 2.6633497486926946e-05, "loss": 0.062, "step": 18910 }, { "epoch": 0.95, "learning_rate": 2.6620805198761232e-05, "loss": 0.0352, "step": 18920 }, { "epoch": 0.95, "learning_rate": 2.6608112910595522e-05, "loss": 0.0751, "step": 18930 }, { "epoch": 0.95, "learning_rate": 2.6595420622429812e-05, "loss": 0.0512, "step": 18940 }, { "epoch": 0.95, "learning_rate": 2.6582728334264102e-05, "loss": 0.0486, "step": 18950 }, { "epoch": 0.95, "learning_rate": 2.657003604609839e-05, "loss": 0.053, "step": 18960 }, { "epoch": 0.95, "learning_rate": 2.655734375793268e-05, "loss": 0.0451, "step": 18970 }, { "epoch": 0.95, "learning_rate": 2.6544651469766972e-05, "loss": 0.0577, "step": 18980 }, { "epoch": 0.95, "learning_rate": 2.6531959181601262e-05, "loss": 0.0856, "step": 18990 }, { "epoch": 0.95, "learning_rate": 2.6519266893435552e-05, "loss": 0.0916, "step": 19000 }, { "epoch": 0.95, "learning_rate": 2.650657460526984e-05, "loss": 0.0522, "step": 19010 }, { "epoch": 0.95, "learning_rate": 2.649388231710413e-05, "loss": 0.0524, "step": 19020 }, { "epoch": 0.95, "learning_rate": 2.648119002893842e-05, "loss": 0.088, "step": 19030 }, { "epoch": 0.95, "learning_rate": 2.646849774077271e-05, "loss": 0.0552, "step": 19040 }, { "epoch": 0.96, "learning_rate": 2.6455805452606996e-05, "loss": 0.1381, "step": 19050 }, { "epoch": 0.96, "learning_rate": 2.6443113164441286e-05, "loss": 0.0517, "step": 19060 }, { "epoch": 0.96, "learning_rate": 2.6430420876275576e-05, "loss": 0.088, "step": 19070 }, { "epoch": 0.96, "learning_rate": 2.6417728588109862e-05, "loss": 0.0457, "step": 19080 }, { "epoch": 0.96, "learning_rate": 2.6405036299944152e-05, "loss": 0.0554, "step": 19090 }, { "epoch": 0.96, "learning_rate": 2.6392344011778446e-05, "loss": 0.0762, "step": 19100 }, { "epoch": 0.96, "learning_rate": 2.6379651723612736e-05, "loss": 0.083, "step": 19110 }, { "epoch": 0.96, "learning_rate": 2.6366959435447026e-05, "loss": 0.1036, "step": 19120 }, { "epoch": 0.96, "learning_rate": 2.6354267147281315e-05, "loss": 0.0363, "step": 19130 }, { "epoch": 0.96, "learning_rate": 2.6341574859115602e-05, "loss": 0.0766, "step": 19140 }, { "epoch": 0.96, "learning_rate": 2.6328882570949892e-05, "loss": 0.1042, "step": 19150 }, { "epoch": 0.96, "learning_rate": 2.6316190282784182e-05, "loss": 0.0513, "step": 19160 }, { "epoch": 0.96, "learning_rate": 2.630349799461847e-05, "loss": 0.0479, "step": 19170 }, { "epoch": 0.96, "learning_rate": 2.629080570645276e-05, "loss": 0.0704, "step": 19180 }, { "epoch": 0.96, "learning_rate": 2.627811341828705e-05, "loss": 0.0481, "step": 19190 }, { "epoch": 0.96, "learning_rate": 2.626542113012134e-05, "loss": 0.0875, "step": 19200 }, { "epoch": 0.96, "learning_rate": 2.6252728841955632e-05, "loss": 0.0717, "step": 19210 }, { "epoch": 0.96, "learning_rate": 2.6240036553789922e-05, "loss": 0.1084, "step": 19220 }, { "epoch": 0.96, "learning_rate": 2.622734426562421e-05, "loss": 0.0749, "step": 19230 }, { "epoch": 0.96, "learning_rate": 2.62146519774585e-05, "loss": 0.0746, "step": 19240 }, { "epoch": 0.97, "learning_rate": 2.620195968929279e-05, "loss": 0.1212, "step": 19250 }, { "epoch": 0.97, "learning_rate": 2.6189267401127075e-05, "loss": 0.0754, "step": 19260 }, { "epoch": 0.97, "learning_rate": 2.6176575112961365e-05, "loss": 0.0871, "step": 19270 }, { "epoch": 0.97, "learning_rate": 2.6163882824795655e-05, "loss": 0.0617, "step": 19280 }, { "epoch": 0.97, "learning_rate": 2.6151190536629945e-05, "loss": 0.1063, "step": 19290 }, { "epoch": 0.97, "learning_rate": 2.6138498248464232e-05, "loss": 0.0854, "step": 19300 }, { "epoch": 0.97, "learning_rate": 2.6125805960298522e-05, "loss": 0.1131, "step": 19310 }, { "epoch": 0.97, "learning_rate": 2.6113113672132812e-05, "loss": 0.1135, "step": 19320 }, { "epoch": 0.97, "learning_rate": 2.6100421383967105e-05, "loss": 0.0501, "step": 19330 }, { "epoch": 0.97, "learning_rate": 2.6087729095801395e-05, "loss": 0.0834, "step": 19340 }, { "epoch": 0.97, "learning_rate": 2.6075036807635682e-05, "loss": 0.0908, "step": 19350 }, { "epoch": 0.97, "learning_rate": 2.6062344519469972e-05, "loss": 0.0333, "step": 19360 }, { "epoch": 0.97, "learning_rate": 2.6049652231304262e-05, "loss": 0.0662, "step": 19370 }, { "epoch": 0.97, "learning_rate": 2.6036959943138552e-05, "loss": 0.1015, "step": 19380 }, { "epoch": 0.97, "learning_rate": 2.6024267654972838e-05, "loss": 0.0539, "step": 19390 }, { "epoch": 0.97, "learning_rate": 2.6011575366807128e-05, "loss": 0.0584, "step": 19400 }, { "epoch": 0.97, "learning_rate": 2.5998883078641418e-05, "loss": 0.0732, "step": 19410 }, { "epoch": 0.97, "learning_rate": 2.5986190790475705e-05, "loss": 0.0921, "step": 19420 }, { "epoch": 0.97, "learning_rate": 2.5973498502309995e-05, "loss": 0.0837, "step": 19430 }, { "epoch": 0.97, "learning_rate": 2.5960806214144288e-05, "loss": 0.0709, "step": 19440 }, { "epoch": 0.98, "learning_rate": 2.5948113925978578e-05, "loss": 0.0998, "step": 19450 }, { "epoch": 0.98, "learning_rate": 2.5935421637812868e-05, "loss": 0.1104, "step": 19460 }, { "epoch": 0.98, "learning_rate": 2.5922729349647158e-05, "loss": 0.0351, "step": 19470 }, { "epoch": 0.98, "learning_rate": 2.5910037061481445e-05, "loss": 0.0669, "step": 19480 }, { "epoch": 0.98, "learning_rate": 2.5897344773315735e-05, "loss": 0.0686, "step": 19490 }, { "epoch": 0.98, "learning_rate": 2.5884652485150025e-05, "loss": 0.0736, "step": 19500 }, { "epoch": 0.98, "learning_rate": 2.587196019698431e-05, "loss": 0.0511, "step": 19510 }, { "epoch": 0.98, "learning_rate": 2.58592679088186e-05, "loss": 0.0678, "step": 19520 }, { "epoch": 0.98, "learning_rate": 2.584657562065289e-05, "loss": 0.1051, "step": 19530 }, { "epoch": 0.98, "learning_rate": 2.583388333248718e-05, "loss": 0.0457, "step": 19540 }, { "epoch": 0.98, "learning_rate": 2.5821191044321468e-05, "loss": 0.0919, "step": 19550 }, { "epoch": 0.98, "learning_rate": 2.5808498756155765e-05, "loss": 0.0924, "step": 19560 }, { "epoch": 0.98, "learning_rate": 2.579580646799005e-05, "loss": 0.098, "step": 19570 }, { "epoch": 0.98, "learning_rate": 2.578311417982434e-05, "loss": 0.0727, "step": 19580 }, { "epoch": 0.98, "learning_rate": 2.577042189165863e-05, "loss": 0.148, "step": 19590 }, { "epoch": 0.98, "learning_rate": 2.5757729603492918e-05, "loss": 0.0729, "step": 19600 }, { "epoch": 0.98, "learning_rate": 2.5745037315327208e-05, "loss": 0.0379, "step": 19610 }, { "epoch": 0.98, "learning_rate": 2.5732345027161498e-05, "loss": 0.1091, "step": 19620 }, { "epoch": 0.98, "learning_rate": 2.5719652738995788e-05, "loss": 0.0551, "step": 19630 }, { "epoch": 0.98, "learning_rate": 2.5706960450830074e-05, "loss": 0.0676, "step": 19640 }, { "epoch": 0.99, "learning_rate": 2.5694268162664364e-05, "loss": 0.0974, "step": 19650 }, { "epoch": 0.99, "learning_rate": 2.5681575874498654e-05, "loss": 0.0407, "step": 19660 }, { "epoch": 0.99, "learning_rate": 2.5668883586332948e-05, "loss": 0.0388, "step": 19670 }, { "epoch": 0.99, "learning_rate": 2.5656191298167238e-05, "loss": 0.0602, "step": 19680 }, { "epoch": 0.99, "learning_rate": 2.5643499010001524e-05, "loss": 0.1309, "step": 19690 }, { "epoch": 0.99, "learning_rate": 2.5630806721835814e-05, "loss": 0.0807, "step": 19700 }, { "epoch": 0.99, "learning_rate": 2.5618114433670104e-05, "loss": 0.0886, "step": 19710 }, { "epoch": 0.99, "learning_rate": 2.5605422145504394e-05, "loss": 0.0715, "step": 19720 }, { "epoch": 0.99, "learning_rate": 2.559272985733868e-05, "loss": 0.0762, "step": 19730 }, { "epoch": 0.99, "learning_rate": 2.558003756917297e-05, "loss": 0.0784, "step": 19740 }, { "epoch": 0.99, "learning_rate": 2.556734528100726e-05, "loss": 0.0497, "step": 19750 }, { "epoch": 0.99, "learning_rate": 2.5554652992841548e-05, "loss": 0.095, "step": 19760 }, { "epoch": 0.99, "learning_rate": 2.5541960704675838e-05, "loss": 0.0773, "step": 19770 }, { "epoch": 0.99, "learning_rate": 2.5529268416510128e-05, "loss": 0.0616, "step": 19780 }, { "epoch": 0.99, "learning_rate": 2.551657612834442e-05, "loss": 0.0571, "step": 19790 }, { "epoch": 0.99, "learning_rate": 2.550388384017871e-05, "loss": 0.0494, "step": 19800 }, { "epoch": 0.99, "learning_rate": 2.5491191552013e-05, "loss": 0.0845, "step": 19810 }, { "epoch": 0.99, "learning_rate": 2.5478499263847288e-05, "loss": 0.0998, "step": 19820 }, { "epoch": 0.99, "learning_rate": 2.5465806975681578e-05, "loss": 0.0787, "step": 19830 }, { "epoch": 0.99, "learning_rate": 2.5453114687515868e-05, "loss": 0.0612, "step": 19840 }, { "epoch": 1.0, "learning_rate": 2.5440422399350154e-05, "loss": 0.0484, "step": 19850 }, { "epoch": 1.0, "learning_rate": 2.5427730111184444e-05, "loss": 0.061, "step": 19860 }, { "epoch": 1.0, "learning_rate": 2.5415037823018734e-05, "loss": 0.0904, "step": 19870 }, { "epoch": 1.0, "learning_rate": 2.5402345534853024e-05, "loss": 0.1476, "step": 19880 }, { "epoch": 1.0, "learning_rate": 2.538965324668731e-05, "loss": 0.1475, "step": 19890 }, { "epoch": 1.0, "learning_rate": 2.5376960958521607e-05, "loss": 0.0939, "step": 19900 }, { "epoch": 1.0, "learning_rate": 2.5364268670355894e-05, "loss": 0.1074, "step": 19910 }, { "epoch": 1.0, "learning_rate": 2.5351576382190184e-05, "loss": 0.0759, "step": 19920 }, { "epoch": 1.0, "learning_rate": 2.5338884094024474e-05, "loss": 0.0777, "step": 19930 }, { "epoch": 1.0, "learning_rate": 2.532619180585876e-05, "loss": 0.0748, "step": 19940 }, { "epoch": 1.0, "learning_rate": 2.531349951769305e-05, "loss": 0.1388, "step": 19950 }, { "epoch": 1.0, "learning_rate": 2.530080722952734e-05, "loss": 0.115, "step": 19960 }, { "epoch": 1.0, "learning_rate": 2.528811494136163e-05, "loss": 0.1078, "step": 19970 }, { "epoch": 1.0, "learning_rate": 2.5275422653195917e-05, "loss": 0.1471, "step": 19980 }, { "epoch": 1.0, "learning_rate": 2.5262730365030207e-05, "loss": 0.0583, "step": 19990 }, { "epoch": 1.0, "learning_rate": 2.5250038076864497e-05, "loss": 0.0807, "step": 20000 }, { "epoch": 1.0, "learning_rate": 2.5237345788698784e-05, "loss": 0.0697, "step": 20010 }, { "epoch": 1.0, "learning_rate": 2.522465350053308e-05, "loss": 0.0874, "step": 20020 }, { "epoch": 1.0, "learning_rate": 2.5211961212367367e-05, "loss": 0.1243, "step": 20030 }, { "epoch": 1.0, "learning_rate": 2.5199268924201657e-05, "loss": 0.1294, "step": 20040 }, { "epoch": 1.01, "learning_rate": 2.5186576636035947e-05, "loss": 0.1117, "step": 20050 }, { "epoch": 1.01, "learning_rate": 2.5173884347870237e-05, "loss": 0.0786, "step": 20060 }, { "epoch": 1.01, "learning_rate": 2.5161192059704524e-05, "loss": 0.1095, "step": 20070 }, { "epoch": 1.01, "learning_rate": 2.5148499771538814e-05, "loss": 0.1471, "step": 20080 }, { "epoch": 1.01, "learning_rate": 2.5135807483373104e-05, "loss": 0.1366, "step": 20090 }, { "epoch": 1.01, "learning_rate": 2.512311519520739e-05, "loss": 0.0929, "step": 20100 }, { "epoch": 1.01, "learning_rate": 2.511042290704168e-05, "loss": 0.0786, "step": 20110 }, { "epoch": 1.01, "learning_rate": 2.509773061887597e-05, "loss": 0.0852, "step": 20120 }, { "epoch": 1.01, "learning_rate": 2.5085038330710264e-05, "loss": 0.1051, "step": 20130 }, { "epoch": 1.01, "learning_rate": 2.5072346042544554e-05, "loss": 0.1413, "step": 20140 }, { "epoch": 1.01, "learning_rate": 2.5059653754378844e-05, "loss": 0.0502, "step": 20150 }, { "epoch": 1.01, "learning_rate": 2.504696146621313e-05, "loss": 0.0369, "step": 20160 }, { "epoch": 1.01, "learning_rate": 2.503426917804742e-05, "loss": 0.0818, "step": 20170 }, { "epoch": 1.01, "learning_rate": 2.502157688988171e-05, "loss": 0.05, "step": 20180 }, { "epoch": 1.01, "learning_rate": 2.5008884601715997e-05, "loss": 0.0868, "step": 20190 }, { "epoch": 1.01, "learning_rate": 2.4996192313550287e-05, "loss": 0.0497, "step": 20200 }, { "epoch": 1.01, "learning_rate": 2.4983500025384577e-05, "loss": 0.0413, "step": 20210 }, { "epoch": 1.01, "learning_rate": 2.4970807737218867e-05, "loss": 0.1064, "step": 20220 }, { "epoch": 1.01, "learning_rate": 2.4958115449053157e-05, "loss": 0.0419, "step": 20230 }, { "epoch": 1.01, "learning_rate": 2.4945423160887447e-05, "loss": 0.1047, "step": 20240 }, { "epoch": 1.02, "learning_rate": 2.4932730872721737e-05, "loss": 0.0547, "step": 20250 }, { "epoch": 1.02, "learning_rate": 2.4920038584556023e-05, "loss": 0.0823, "step": 20260 }, { "epoch": 1.02, "learning_rate": 2.4907346296390313e-05, "loss": 0.0785, "step": 20270 }, { "epoch": 1.02, "learning_rate": 2.4894654008224603e-05, "loss": 0.0903, "step": 20280 }, { "epoch": 1.02, "learning_rate": 2.4881961720058893e-05, "loss": 0.0443, "step": 20290 }, { "epoch": 1.02, "learning_rate": 2.4869269431893183e-05, "loss": 0.0516, "step": 20300 }, { "epoch": 1.02, "learning_rate": 2.4856577143727473e-05, "loss": 0.0208, "step": 20310 }, { "epoch": 1.02, "learning_rate": 2.484388485556176e-05, "loss": 0.0924, "step": 20320 }, { "epoch": 1.02, "learning_rate": 2.4831192567396053e-05, "loss": 0.0812, "step": 20330 }, { "epoch": 1.02, "learning_rate": 2.4818500279230343e-05, "loss": 0.0494, "step": 20340 }, { "epoch": 1.02, "learning_rate": 2.480580799106463e-05, "loss": 0.1161, "step": 20350 }, { "epoch": 1.02, "learning_rate": 2.479311570289892e-05, "loss": 0.0828, "step": 20360 }, { "epoch": 1.02, "learning_rate": 2.478042341473321e-05, "loss": 0.0899, "step": 20370 }, { "epoch": 1.02, "learning_rate": 2.4767731126567496e-05, "loss": 0.067, "step": 20380 }, { "epoch": 1.02, "learning_rate": 2.475503883840179e-05, "loss": 0.0549, "step": 20390 }, { "epoch": 1.02, "learning_rate": 2.474234655023608e-05, "loss": 0.0416, "step": 20400 }, { "epoch": 1.02, "learning_rate": 2.4729654262070366e-05, "loss": 0.0431, "step": 20410 }, { "epoch": 1.02, "learning_rate": 2.4716961973904656e-05, "loss": 0.0262, "step": 20420 }, { "epoch": 1.02, "learning_rate": 2.4704269685738946e-05, "loss": 0.0535, "step": 20430 }, { "epoch": 1.02, "learning_rate": 2.4691577397573233e-05, "loss": 0.0293, "step": 20440 }, { "epoch": 1.03, "learning_rate": 2.4678885109407526e-05, "loss": 0.0909, "step": 20450 }, { "epoch": 1.03, "learning_rate": 2.4666192821241816e-05, "loss": 0.1396, "step": 20460 }, { "epoch": 1.03, "learning_rate": 2.4653500533076103e-05, "loss": 0.0308, "step": 20470 }, { "epoch": 1.03, "learning_rate": 2.4640808244910393e-05, "loss": 0.0769, "step": 20480 }, { "epoch": 1.03, "learning_rate": 2.4628115956744683e-05, "loss": 0.0763, "step": 20490 }, { "epoch": 1.03, "learning_rate": 2.4615423668578973e-05, "loss": 0.1033, "step": 20500 }, { "epoch": 1.03, "learning_rate": 2.4602731380413263e-05, "loss": 0.1041, "step": 20510 }, { "epoch": 1.03, "learning_rate": 2.4590039092247553e-05, "loss": 0.1001, "step": 20520 }, { "epoch": 1.03, "learning_rate": 2.457734680408184e-05, "loss": 0.0763, "step": 20530 }, { "epoch": 1.03, "learning_rate": 2.456465451591613e-05, "loss": 0.0471, "step": 20540 }, { "epoch": 1.03, "learning_rate": 2.455196222775042e-05, "loss": 0.0571, "step": 20550 }, { "epoch": 1.03, "learning_rate": 2.453926993958471e-05, "loss": 0.0672, "step": 20560 }, { "epoch": 1.03, "learning_rate": 2.4526577651419e-05, "loss": 0.066, "step": 20570 }, { "epoch": 1.03, "learning_rate": 2.451388536325329e-05, "loss": 0.0942, "step": 20580 }, { "epoch": 1.03, "learning_rate": 2.450119307508758e-05, "loss": 0.0777, "step": 20590 }, { "epoch": 1.03, "learning_rate": 2.4488500786921866e-05, "loss": 0.071, "step": 20600 }, { "epoch": 1.03, "learning_rate": 2.4475808498756156e-05, "loss": 0.1014, "step": 20610 }, { "epoch": 1.03, "learning_rate": 2.4463116210590446e-05, "loss": 0.1055, "step": 20620 }, { "epoch": 1.03, "learning_rate": 2.4450423922424736e-05, "loss": 0.061, "step": 20630 }, { "epoch": 1.03, "learning_rate": 2.4437731634259026e-05, "loss": 0.1343, "step": 20640 }, { "epoch": 1.04, "learning_rate": 2.4425039346093316e-05, "loss": 0.0719, "step": 20650 }, { "epoch": 1.04, "learning_rate": 2.4412347057927603e-05, "loss": 0.15, "step": 20660 }, { "epoch": 1.04, "learning_rate": 2.4399654769761893e-05, "loss": 0.0762, "step": 20670 }, { "epoch": 1.04, "learning_rate": 2.4386962481596186e-05, "loss": 0.0553, "step": 20680 }, { "epoch": 1.04, "learning_rate": 2.4374270193430473e-05, "loss": 0.0745, "step": 20690 }, { "epoch": 1.04, "learning_rate": 2.4361577905264763e-05, "loss": 0.0943, "step": 20700 }, { "epoch": 1.04, "learning_rate": 2.4348885617099053e-05, "loss": 0.0436, "step": 20710 }, { "epoch": 1.04, "learning_rate": 2.433619332893334e-05, "loss": 0.0891, "step": 20720 }, { "epoch": 1.04, "learning_rate": 2.432350104076763e-05, "loss": 0.1015, "step": 20730 }, { "epoch": 1.04, "learning_rate": 2.4310808752601923e-05, "loss": 0.1043, "step": 20740 }, { "epoch": 1.04, "learning_rate": 2.429811646443621e-05, "loss": 0.0788, "step": 20750 }, { "epoch": 1.04, "learning_rate": 2.42854241762705e-05, "loss": 0.0909, "step": 20760 }, { "epoch": 1.04, "learning_rate": 2.427273188810479e-05, "loss": 0.1015, "step": 20770 }, { "epoch": 1.04, "learning_rate": 2.4260039599939076e-05, "loss": 0.1345, "step": 20780 }, { "epoch": 1.04, "learning_rate": 2.424734731177337e-05, "loss": 0.0676, "step": 20790 }, { "epoch": 1.04, "learning_rate": 2.423465502360766e-05, "loss": 0.068, "step": 20800 }, { "epoch": 1.04, "learning_rate": 2.4221962735441946e-05, "loss": 0.1361, "step": 20810 }, { "epoch": 1.04, "learning_rate": 2.4209270447276236e-05, "loss": 0.075, "step": 20820 }, { "epoch": 1.04, "learning_rate": 2.4196578159110526e-05, "loss": 0.0737, "step": 20830 }, { "epoch": 1.04, "learning_rate": 2.4183885870944816e-05, "loss": 0.1249, "step": 20840 }, { "epoch": 1.05, "learning_rate": 2.4171193582779106e-05, "loss": 0.094, "step": 20850 }, { "epoch": 1.05, "learning_rate": 2.4158501294613396e-05, "loss": 0.0785, "step": 20860 }, { "epoch": 1.05, "learning_rate": 2.4145809006447682e-05, "loss": 0.0472, "step": 20870 }, { "epoch": 1.05, "learning_rate": 2.4133116718281972e-05, "loss": 0.0479, "step": 20880 }, { "epoch": 1.05, "learning_rate": 2.4120424430116262e-05, "loss": 0.1103, "step": 20890 }, { "epoch": 1.05, "learning_rate": 2.4107732141950552e-05, "loss": 0.1232, "step": 20900 }, { "epoch": 1.05, "learning_rate": 2.4095039853784842e-05, "loss": 0.0504, "step": 20910 }, { "epoch": 1.05, "learning_rate": 2.4082347565619132e-05, "loss": 0.0668, "step": 20920 }, { "epoch": 1.05, "learning_rate": 2.4069655277453422e-05, "loss": 0.083, "step": 20930 }, { "epoch": 1.05, "learning_rate": 2.405696298928771e-05, "loss": 0.0606, "step": 20940 }, { "epoch": 1.05, "learning_rate": 2.4044270701122e-05, "loss": 0.041, "step": 20950 }, { "epoch": 1.05, "learning_rate": 2.403157841295629e-05, "loss": 0.0296, "step": 20960 }, { "epoch": 1.05, "learning_rate": 2.401888612479058e-05, "loss": 0.101, "step": 20970 }, { "epoch": 1.05, "learning_rate": 2.400619383662487e-05, "loss": 0.0513, "step": 20980 }, { "epoch": 1.05, "learning_rate": 2.399350154845916e-05, "loss": 0.0599, "step": 20990 }, { "epoch": 1.05, "learning_rate": 2.3980809260293445e-05, "loss": 0.0743, "step": 21000 }, { "epoch": 1.05, "learning_rate": 2.3968116972127735e-05, "loss": 0.1016, "step": 21010 }, { "epoch": 1.05, "learning_rate": 2.3955424683962025e-05, "loss": 0.085, "step": 21020 }, { "epoch": 1.05, "learning_rate": 2.3942732395796315e-05, "loss": 0.0994, "step": 21030 }, { "epoch": 1.05, "learning_rate": 2.3930040107630605e-05, "loss": 0.0642, "step": 21040 }, { "epoch": 1.06, "learning_rate": 2.3917347819464895e-05, "loss": 0.059, "step": 21050 }, { "epoch": 1.06, "learning_rate": 2.3904655531299182e-05, "loss": 0.0434, "step": 21060 }, { "epoch": 1.06, "learning_rate": 2.3891963243133472e-05, "loss": 0.058, "step": 21070 }, { "epoch": 1.06, "learning_rate": 2.3879270954967765e-05, "loss": 0.0642, "step": 21080 }, { "epoch": 1.06, "learning_rate": 2.3866578666802052e-05, "loss": 0.0767, "step": 21090 }, { "epoch": 1.06, "learning_rate": 2.3853886378636342e-05, "loss": 0.0609, "step": 21100 }, { "epoch": 1.06, "learning_rate": 2.3841194090470632e-05, "loss": 0.0635, "step": 21110 }, { "epoch": 1.06, "learning_rate": 2.382850180230492e-05, "loss": 0.0837, "step": 21120 }, { "epoch": 1.06, "learning_rate": 2.381580951413921e-05, "loss": 0.076, "step": 21130 }, { "epoch": 1.06, "learning_rate": 2.3803117225973502e-05, "loss": 0.0703, "step": 21140 }, { "epoch": 1.06, "learning_rate": 2.379042493780779e-05, "loss": 0.0825, "step": 21150 }, { "epoch": 1.06, "learning_rate": 2.377773264964208e-05, "loss": 0.0946, "step": 21160 }, { "epoch": 1.06, "learning_rate": 2.376504036147637e-05, "loss": 0.0492, "step": 21170 }, { "epoch": 1.06, "learning_rate": 2.375234807331066e-05, "loss": 0.0643, "step": 21180 }, { "epoch": 1.06, "learning_rate": 2.3739655785144945e-05, "loss": 0.0704, "step": 21190 }, { "epoch": 1.06, "learning_rate": 2.372696349697924e-05, "loss": 0.1192, "step": 21200 }, { "epoch": 1.06, "learning_rate": 2.3714271208813525e-05, "loss": 0.0746, "step": 21210 }, { "epoch": 1.06, "learning_rate": 2.3701578920647815e-05, "loss": 0.0948, "step": 21220 }, { "epoch": 1.06, "learning_rate": 2.3688886632482105e-05, "loss": 0.0858, "step": 21230 }, { "epoch": 1.06, "learning_rate": 2.3676194344316395e-05, "loss": 0.0362, "step": 21240 }, { "epoch": 1.07, "learning_rate": 2.366350205615068e-05, "loss": 0.0512, "step": 21250 }, { "epoch": 1.07, "learning_rate": 2.3650809767984975e-05, "loss": 0.0948, "step": 21260 }, { "epoch": 1.07, "learning_rate": 2.3638117479819265e-05, "loss": 0.1003, "step": 21270 }, { "epoch": 1.07, "learning_rate": 2.362542519165355e-05, "loss": 0.0972, "step": 21280 }, { "epoch": 1.07, "learning_rate": 2.361273290348784e-05, "loss": 0.1329, "step": 21290 }, { "epoch": 1.07, "learning_rate": 2.360004061532213e-05, "loss": 0.0771, "step": 21300 }, { "epoch": 1.07, "learning_rate": 2.358734832715642e-05, "loss": 0.0615, "step": 21310 }, { "epoch": 1.07, "learning_rate": 2.357465603899071e-05, "loss": 0.0654, "step": 21320 }, { "epoch": 1.07, "learning_rate": 2.3561963750825e-05, "loss": 0.0234, "step": 21330 }, { "epoch": 1.07, "learning_rate": 2.3549271462659288e-05, "loss": 0.0219, "step": 21340 }, { "epoch": 1.07, "learning_rate": 2.3536579174493578e-05, "loss": 0.0639, "step": 21350 }, { "epoch": 1.07, "learning_rate": 2.3523886886327868e-05, "loss": 0.1166, "step": 21360 }, { "epoch": 1.07, "learning_rate": 2.3511194598162158e-05, "loss": 0.0441, "step": 21370 }, { "epoch": 1.07, "learning_rate": 2.3498502309996448e-05, "loss": 0.0455, "step": 21380 }, { "epoch": 1.07, "learning_rate": 2.3485810021830738e-05, "loss": 0.0404, "step": 21390 }, { "epoch": 1.07, "learning_rate": 2.3473117733665025e-05, "loss": 0.043, "step": 21400 }, { "epoch": 1.07, "learning_rate": 2.3460425445499315e-05, "loss": 0.1103, "step": 21410 }, { "epoch": 1.07, "learning_rate": 2.3447733157333605e-05, "loss": 0.0508, "step": 21420 }, { "epoch": 1.07, "learning_rate": 2.3435040869167895e-05, "loss": 0.0814, "step": 21430 }, { "epoch": 1.07, "learning_rate": 2.3422348581002185e-05, "loss": 0.08, "step": 21440 }, { "epoch": 1.08, "learning_rate": 2.3409656292836475e-05, "loss": 0.0792, "step": 21450 }, { "epoch": 1.08, "learning_rate": 2.339696400467076e-05, "loss": 0.0902, "step": 21460 }, { "epoch": 1.08, "learning_rate": 2.338427171650505e-05, "loss": 0.0832, "step": 21470 }, { "epoch": 1.08, "learning_rate": 2.337157942833934e-05, "loss": 0.0719, "step": 21480 }, { "epoch": 1.08, "learning_rate": 2.335888714017363e-05, "loss": 0.0564, "step": 21490 }, { "epoch": 1.08, "learning_rate": 2.334619485200792e-05, "loss": 0.1654, "step": 21500 }, { "epoch": 1.08, "learning_rate": 2.333350256384221e-05, "loss": 0.0923, "step": 21510 }, { "epoch": 1.08, "learning_rate": 2.33208102756765e-05, "loss": 0.0584, "step": 21520 }, { "epoch": 1.08, "learning_rate": 2.3308117987510788e-05, "loss": 0.0667, "step": 21530 }, { "epoch": 1.08, "learning_rate": 2.329542569934508e-05, "loss": 0.0394, "step": 21540 }, { "epoch": 1.08, "learning_rate": 2.3282733411179368e-05, "loss": 0.0436, "step": 21550 }, { "epoch": 1.08, "learning_rate": 2.3270041123013658e-05, "loss": 0.0662, "step": 21560 }, { "epoch": 1.08, "learning_rate": 2.3257348834847948e-05, "loss": 0.0801, "step": 21570 }, { "epoch": 1.08, "learning_rate": 2.3244656546682238e-05, "loss": 0.1047, "step": 21580 }, { "epoch": 1.08, "learning_rate": 2.3231964258516524e-05, "loss": 0.0826, "step": 21590 }, { "epoch": 1.08, "learning_rate": 2.3219271970350818e-05, "loss": 0.055, "step": 21600 }, { "epoch": 1.08, "learning_rate": 2.3206579682185108e-05, "loss": 0.0594, "step": 21610 }, { "epoch": 1.08, "learning_rate": 2.3193887394019394e-05, "loss": 0.0468, "step": 21620 }, { "epoch": 1.08, "learning_rate": 2.3181195105853684e-05, "loss": 0.11, "step": 21630 }, { "epoch": 1.08, "learning_rate": 2.3168502817687974e-05, "loss": 0.0361, "step": 21640 }, { "epoch": 1.09, "learning_rate": 2.315581052952226e-05, "loss": 0.0981, "step": 21650 }, { "epoch": 1.09, "learning_rate": 2.3143118241356554e-05, "loss": 0.0621, "step": 21660 }, { "epoch": 1.09, "learning_rate": 2.3130425953190844e-05, "loss": 0.087, "step": 21670 }, { "epoch": 1.09, "learning_rate": 2.311773366502513e-05, "loss": 0.0619, "step": 21680 }, { "epoch": 1.09, "learning_rate": 2.310504137685942e-05, "loss": 0.1481, "step": 21690 }, { "epoch": 1.09, "learning_rate": 2.309234908869371e-05, "loss": 0.0951, "step": 21700 }, { "epoch": 1.09, "learning_rate": 2.3079656800527997e-05, "loss": 0.1046, "step": 21710 }, { "epoch": 1.09, "learning_rate": 2.306696451236229e-05, "loss": 0.0771, "step": 21720 }, { "epoch": 1.09, "learning_rate": 2.305427222419658e-05, "loss": 0.066, "step": 21730 }, { "epoch": 1.09, "learning_rate": 2.3041579936030867e-05, "loss": 0.0574, "step": 21740 }, { "epoch": 1.09, "learning_rate": 2.3028887647865157e-05, "loss": 0.0654, "step": 21750 }, { "epoch": 1.09, "learning_rate": 2.3016195359699447e-05, "loss": 0.0693, "step": 21760 }, { "epoch": 1.09, "learning_rate": 2.3003503071533737e-05, "loss": 0.024, "step": 21770 }, { "epoch": 1.09, "learning_rate": 2.2990810783368027e-05, "loss": 0.0561, "step": 21780 }, { "epoch": 1.09, "learning_rate": 2.2978118495202317e-05, "loss": 0.0481, "step": 21790 }, { "epoch": 1.09, "learning_rate": 2.2965426207036604e-05, "loss": 0.0901, "step": 21800 }, { "epoch": 1.09, "learning_rate": 2.2952733918870894e-05, "loss": 0.0896, "step": 21810 }, { "epoch": 1.09, "learning_rate": 2.2940041630705184e-05, "loss": 0.0645, "step": 21820 }, { "epoch": 1.09, "learning_rate": 2.2927349342539474e-05, "loss": 0.0398, "step": 21830 }, { "epoch": 1.09, "learning_rate": 2.2914657054373764e-05, "loss": 0.0555, "step": 21840 }, { "epoch": 1.1, "learning_rate": 2.2901964766208054e-05, "loss": 0.0258, "step": 21850 }, { "epoch": 1.1, "learning_rate": 2.2889272478042344e-05, "loss": 0.0641, "step": 21860 }, { "epoch": 1.1, "learning_rate": 2.287658018987663e-05, "loss": 0.0914, "step": 21870 }, { "epoch": 1.1, "learning_rate": 2.286388790171092e-05, "loss": 0.0391, "step": 21880 }, { "epoch": 1.1, "learning_rate": 2.285119561354521e-05, "loss": 0.1058, "step": 21890 }, { "epoch": 1.1, "learning_rate": 2.28385033253795e-05, "loss": 0.0624, "step": 21900 }, { "epoch": 1.1, "learning_rate": 2.282581103721379e-05, "loss": 0.0376, "step": 21910 }, { "epoch": 1.1, "learning_rate": 2.281311874904808e-05, "loss": 0.0711, "step": 21920 }, { "epoch": 1.1, "learning_rate": 2.2800426460882367e-05, "loss": 0.0937, "step": 21930 }, { "epoch": 1.1, "learning_rate": 2.2787734172716657e-05, "loss": 0.1126, "step": 21940 }, { "epoch": 1.1, "learning_rate": 2.277504188455095e-05, "loss": 0.0598, "step": 21950 }, { "epoch": 1.1, "learning_rate": 2.2762349596385237e-05, "loss": 0.0966, "step": 21960 }, { "epoch": 1.1, "learning_rate": 2.2749657308219527e-05, "loss": 0.0683, "step": 21970 }, { "epoch": 1.1, "learning_rate": 2.2736965020053817e-05, "loss": 0.0579, "step": 21980 }, { "epoch": 1.1, "learning_rate": 2.2724272731888104e-05, "loss": 0.0718, "step": 21990 }, { "epoch": 1.1, "learning_rate": 2.2711580443722397e-05, "loss": 0.1196, "step": 22000 }, { "epoch": 1.1, "learning_rate": 2.2698888155556687e-05, "loss": 0.0348, "step": 22010 }, { "epoch": 1.1, "learning_rate": 2.2686195867390974e-05, "loss": 0.067, "step": 22020 }, { "epoch": 1.1, "learning_rate": 2.2673503579225264e-05, "loss": 0.0493, "step": 22030 }, { "epoch": 1.1, "learning_rate": 2.2660811291059554e-05, "loss": 0.0769, "step": 22040 }, { "epoch": 1.11, "learning_rate": 2.264811900289384e-05, "loss": 0.0399, "step": 22050 }, { "epoch": 1.11, "learning_rate": 2.2635426714728134e-05, "loss": 0.0795, "step": 22060 }, { "epoch": 1.11, "learning_rate": 2.2622734426562424e-05, "loss": 0.0822, "step": 22070 }, { "epoch": 1.11, "learning_rate": 2.261004213839671e-05, "loss": 0.0417, "step": 22080 }, { "epoch": 1.11, "learning_rate": 2.2597349850231e-05, "loss": 0.0247, "step": 22090 }, { "epoch": 1.11, "learning_rate": 2.258465756206529e-05, "loss": 0.0787, "step": 22100 }, { "epoch": 1.11, "learning_rate": 2.257196527389958e-05, "loss": 0.0542, "step": 22110 }, { "epoch": 1.11, "learning_rate": 2.255927298573387e-05, "loss": 0.0508, "step": 22120 }, { "epoch": 1.11, "learning_rate": 2.254658069756816e-05, "loss": 0.0625, "step": 22130 }, { "epoch": 1.11, "learning_rate": 2.2533888409402447e-05, "loss": 0.0362, "step": 22140 }, { "epoch": 1.11, "learning_rate": 2.2521196121236737e-05, "loss": 0.0995, "step": 22150 }, { "epoch": 1.11, "learning_rate": 2.2508503833071027e-05, "loss": 0.0491, "step": 22160 }, { "epoch": 1.11, "learning_rate": 2.2495811544905317e-05, "loss": 0.0517, "step": 22170 }, { "epoch": 1.11, "learning_rate": 2.2483119256739607e-05, "loss": 0.0956, "step": 22180 }, { "epoch": 1.11, "learning_rate": 2.2470426968573897e-05, "loss": 0.0863, "step": 22190 }, { "epoch": 1.11, "learning_rate": 2.2457734680408187e-05, "loss": 0.0601, "step": 22200 }, { "epoch": 1.11, "learning_rate": 2.2445042392242473e-05, "loss": 0.0943, "step": 22210 }, { "epoch": 1.11, "learning_rate": 2.2432350104076763e-05, "loss": 0.0309, "step": 22220 }, { "epoch": 1.11, "learning_rate": 2.2419657815911053e-05, "loss": 0.1531, "step": 22230 }, { "epoch": 1.11, "learning_rate": 2.2406965527745343e-05, "loss": 0.0788, "step": 22240 }, { "epoch": 1.12, "learning_rate": 2.2394273239579633e-05, "loss": 0.0502, "step": 22250 }, { "epoch": 1.12, "learning_rate": 2.2381580951413923e-05, "loss": 0.0643, "step": 22260 }, { "epoch": 1.12, "learning_rate": 2.236888866324821e-05, "loss": 0.0503, "step": 22270 }, { "epoch": 1.12, "learning_rate": 2.23561963750825e-05, "loss": 0.0615, "step": 22280 }, { "epoch": 1.12, "learning_rate": 2.2343504086916793e-05, "loss": 0.0709, "step": 22290 }, { "epoch": 1.12, "learning_rate": 2.233081179875108e-05, "loss": 0.0691, "step": 22300 }, { "epoch": 1.12, "learning_rate": 2.231811951058537e-05, "loss": 0.0525, "step": 22310 }, { "epoch": 1.12, "learning_rate": 2.230542722241966e-05, "loss": 0.0287, "step": 22320 }, { "epoch": 1.12, "learning_rate": 2.2292734934253946e-05, "loss": 0.0632, "step": 22330 }, { "epoch": 1.12, "learning_rate": 2.2280042646088236e-05, "loss": 0.0613, "step": 22340 }, { "epoch": 1.12, "learning_rate": 2.226735035792253e-05, "loss": 0.0537, "step": 22350 }, { "epoch": 1.12, "learning_rate": 2.2254658069756816e-05, "loss": 0.0511, "step": 22360 }, { "epoch": 1.12, "learning_rate": 2.2241965781591106e-05, "loss": 0.038, "step": 22370 }, { "epoch": 1.12, "learning_rate": 2.2229273493425396e-05, "loss": 0.0986, "step": 22380 }, { "epoch": 1.12, "learning_rate": 2.2216581205259683e-05, "loss": 0.0564, "step": 22390 }, { "epoch": 1.12, "learning_rate": 2.2203888917093973e-05, "loss": 0.0495, "step": 22400 }, { "epoch": 1.12, "learning_rate": 2.2191196628928266e-05, "loss": 0.1161, "step": 22410 }, { "epoch": 1.12, "learning_rate": 2.2178504340762553e-05, "loss": 0.078, "step": 22420 }, { "epoch": 1.12, "learning_rate": 2.2165812052596843e-05, "loss": 0.0452, "step": 22430 }, { "epoch": 1.12, "learning_rate": 2.2153119764431133e-05, "loss": 0.0869, "step": 22440 }, { "epoch": 1.13, "learning_rate": 2.2140427476265423e-05, "loss": 0.0621, "step": 22450 }, { "epoch": 1.13, "learning_rate": 2.2127735188099713e-05, "loss": 0.0682, "step": 22460 }, { "epoch": 1.13, "learning_rate": 2.2115042899934003e-05, "loss": 0.0418, "step": 22470 }, { "epoch": 1.13, "learning_rate": 2.210235061176829e-05, "loss": 0.0799, "step": 22480 }, { "epoch": 1.13, "learning_rate": 2.208965832360258e-05, "loss": 0.0807, "step": 22490 }, { "epoch": 1.13, "learning_rate": 2.207696603543687e-05, "loss": 0.0454, "step": 22500 }, { "epoch": 1.13, "learning_rate": 2.206427374727116e-05, "loss": 0.0256, "step": 22510 }, { "epoch": 1.13, "learning_rate": 2.205158145910545e-05, "loss": 0.0456, "step": 22520 }, { "epoch": 1.13, "learning_rate": 2.203888917093974e-05, "loss": 0.0952, "step": 22530 }, { "epoch": 1.13, "learning_rate": 2.202619688277403e-05, "loss": 0.0527, "step": 22540 }, { "epoch": 1.13, "learning_rate": 2.2013504594608316e-05, "loss": 0.0191, "step": 22550 }, { "epoch": 1.13, "learning_rate": 2.2000812306442606e-05, "loss": 0.0211, "step": 22560 }, { "epoch": 1.13, "learning_rate": 2.1988120018276896e-05, "loss": 0.062, "step": 22570 }, { "epoch": 1.13, "learning_rate": 2.1975427730111186e-05, "loss": 0.0814, "step": 22580 }, { "epoch": 1.13, "learning_rate": 2.1962735441945476e-05, "loss": 0.0548, "step": 22590 }, { "epoch": 1.13, "learning_rate": 2.1950043153779766e-05, "loss": 0.0802, "step": 22600 }, { "epoch": 1.13, "learning_rate": 2.1937350865614052e-05, "loss": 0.0677, "step": 22610 }, { "epoch": 1.13, "learning_rate": 2.1924658577448342e-05, "loss": 0.0817, "step": 22620 }, { "epoch": 1.13, "learning_rate": 2.1911966289282632e-05, "loss": 0.071, "step": 22630 }, { "epoch": 1.14, "learning_rate": 2.1899274001116922e-05, "loss": 0.0466, "step": 22640 }, { "epoch": 1.14, "learning_rate": 2.1886581712951212e-05, "loss": 0.0298, "step": 22650 }, { "epoch": 1.14, "learning_rate": 2.1873889424785502e-05, "loss": 0.0381, "step": 22660 }, { "epoch": 1.14, "learning_rate": 2.186119713661979e-05, "loss": 0.0463, "step": 22670 }, { "epoch": 1.14, "learning_rate": 2.184850484845408e-05, "loss": 0.1219, "step": 22680 }, { "epoch": 1.14, "learning_rate": 2.1835812560288372e-05, "loss": 0.0601, "step": 22690 }, { "epoch": 1.14, "learning_rate": 2.182312027212266e-05, "loss": 0.076, "step": 22700 }, { "epoch": 1.14, "learning_rate": 2.181042798395695e-05, "loss": 0.0756, "step": 22710 }, { "epoch": 1.14, "learning_rate": 2.179773569579124e-05, "loss": 0.0962, "step": 22720 }, { "epoch": 1.14, "learning_rate": 2.1785043407625526e-05, "loss": 0.0546, "step": 22730 }, { "epoch": 1.14, "learning_rate": 2.1772351119459816e-05, "loss": 0.0383, "step": 22740 }, { "epoch": 1.14, "learning_rate": 2.175965883129411e-05, "loss": 0.0844, "step": 22750 }, { "epoch": 1.14, "learning_rate": 2.1746966543128396e-05, "loss": 0.0328, "step": 22760 }, { "epoch": 1.14, "learning_rate": 2.1734274254962686e-05, "loss": 0.0478, "step": 22770 }, { "epoch": 1.14, "learning_rate": 2.1721581966796976e-05, "loss": 0.0518, "step": 22780 }, { "epoch": 1.14, "learning_rate": 2.1708889678631266e-05, "loss": 0.0345, "step": 22790 }, { "epoch": 1.14, "learning_rate": 2.1696197390465552e-05, "loss": 0.0974, "step": 22800 }, { "epoch": 1.14, "learning_rate": 2.1683505102299846e-05, "loss": 0.0547, "step": 22810 }, { "epoch": 1.14, "learning_rate": 2.1670812814134132e-05, "loss": 0.0484, "step": 22820 }, { "epoch": 1.14, "learning_rate": 2.1658120525968422e-05, "loss": 0.0589, "step": 22830 }, { "epoch": 1.15, "learning_rate": 2.1645428237802712e-05, "loss": 0.0561, "step": 22840 }, { "epoch": 1.15, "learning_rate": 2.1632735949637002e-05, "loss": 0.0489, "step": 22850 }, { "epoch": 1.15, "learning_rate": 2.162004366147129e-05, "loss": 0.0713, "step": 22860 }, { "epoch": 1.15, "learning_rate": 2.1607351373305582e-05, "loss": 0.0368, "step": 22870 }, { "epoch": 1.15, "learning_rate": 2.1594659085139872e-05, "loss": 0.0819, "step": 22880 }, { "epoch": 1.15, "learning_rate": 2.158196679697416e-05, "loss": 0.0339, "step": 22890 }, { "epoch": 1.15, "learning_rate": 2.156927450880845e-05, "loss": 0.0506, "step": 22900 }, { "epoch": 1.15, "learning_rate": 2.155658222064274e-05, "loss": 0.0884, "step": 22910 }, { "epoch": 1.15, "learning_rate": 2.154388993247703e-05, "loss": 0.0173, "step": 22920 }, { "epoch": 1.15, "learning_rate": 2.153119764431132e-05, "loss": 0.0556, "step": 22930 }, { "epoch": 1.15, "learning_rate": 2.151850535614561e-05, "loss": 0.0496, "step": 22940 }, { "epoch": 1.15, "learning_rate": 2.1505813067979895e-05, "loss": 0.0799, "step": 22950 }, { "epoch": 1.15, "learning_rate": 2.1493120779814185e-05, "loss": 0.0695, "step": 22960 }, { "epoch": 1.15, "learning_rate": 2.1480428491648475e-05, "loss": 0.0462, "step": 22970 }, { "epoch": 1.15, "learning_rate": 2.1467736203482765e-05, "loss": 0.0448, "step": 22980 }, { "epoch": 1.15, "learning_rate": 2.1455043915317055e-05, "loss": 0.1078, "step": 22990 }, { "epoch": 1.15, "learning_rate": 2.1442351627151345e-05, "loss": 0.0692, "step": 23000 }, { "epoch": 1.15, "learning_rate": 2.1429659338985632e-05, "loss": 0.0538, "step": 23010 }, { "epoch": 1.15, "learning_rate": 2.1416967050819922e-05, "loss": 0.0605, "step": 23020 }, { "epoch": 1.15, "learning_rate": 2.1404274762654212e-05, "loss": 0.0434, "step": 23030 }, { "epoch": 1.16, "learning_rate": 2.1391582474488502e-05, "loss": 0.0277, "step": 23040 }, { "epoch": 1.16, "learning_rate": 2.1378890186322792e-05, "loss": 0.0656, "step": 23050 }, { "epoch": 1.16, "learning_rate": 2.1366197898157082e-05, "loss": 0.0802, "step": 23060 }, { "epoch": 1.16, "learning_rate": 2.135350560999137e-05, "loss": 0.0754, "step": 23070 }, { "epoch": 1.16, "learning_rate": 2.134081332182566e-05, "loss": 0.0631, "step": 23080 }, { "epoch": 1.16, "learning_rate": 2.1328121033659948e-05, "loss": 0.1201, "step": 23090 }, { "epoch": 1.16, "learning_rate": 2.1315428745494238e-05, "loss": 0.0828, "step": 23100 }, { "epoch": 1.16, "learning_rate": 2.1302736457328528e-05, "loss": 0.0428, "step": 23110 }, { "epoch": 1.16, "learning_rate": 2.1290044169162818e-05, "loss": 0.0352, "step": 23120 }, { "epoch": 1.16, "learning_rate": 2.1277351880997108e-05, "loss": 0.0716, "step": 23130 }, { "epoch": 1.16, "learning_rate": 2.1264659592831395e-05, "loss": 0.0583, "step": 23140 }, { "epoch": 1.16, "learning_rate": 2.1251967304665688e-05, "loss": 0.0471, "step": 23150 }, { "epoch": 1.16, "learning_rate": 2.1239275016499975e-05, "loss": 0.088, "step": 23160 }, { "epoch": 1.16, "learning_rate": 2.1226582728334265e-05, "loss": 0.0417, "step": 23170 }, { "epoch": 1.16, "learning_rate": 2.1213890440168555e-05, "loss": 0.1045, "step": 23180 }, { "epoch": 1.16, "learning_rate": 2.1201198152002845e-05, "loss": 0.1632, "step": 23190 }, { "epoch": 1.16, "learning_rate": 2.118850586383713e-05, "loss": 0.0819, "step": 23200 }, { "epoch": 1.16, "learning_rate": 2.1175813575671425e-05, "loss": 0.0822, "step": 23210 }, { "epoch": 1.16, "learning_rate": 2.1163121287505715e-05, "loss": 0.0322, "step": 23220 }, { "epoch": 1.16, "learning_rate": 2.115042899934e-05, "loss": 0.0669, "step": 23230 }, { "epoch": 1.17, "learning_rate": 2.113773671117429e-05, "loss": 0.0564, "step": 23240 }, { "epoch": 1.17, "learning_rate": 2.112504442300858e-05, "loss": 0.1186, "step": 23250 }, { "epoch": 1.17, "learning_rate": 2.1112352134842868e-05, "loss": 0.0753, "step": 23260 }, { "epoch": 1.17, "learning_rate": 2.109965984667716e-05, "loss": 0.0491, "step": 23270 }, { "epoch": 1.17, "learning_rate": 2.108696755851145e-05, "loss": 0.0334, "step": 23280 }, { "epoch": 1.17, "learning_rate": 2.1074275270345738e-05, "loss": 0.1088, "step": 23290 }, { "epoch": 1.17, "learning_rate": 2.1061582982180028e-05, "loss": 0.0903, "step": 23300 }, { "epoch": 1.17, "learning_rate": 2.1048890694014318e-05, "loss": 0.1167, "step": 23310 }, { "epoch": 1.17, "learning_rate": 2.1036198405848605e-05, "loss": 0.0524, "step": 23320 }, { "epoch": 1.17, "learning_rate": 2.1023506117682898e-05, "loss": 0.1588, "step": 23330 }, { "epoch": 1.17, "learning_rate": 2.1010813829517188e-05, "loss": 0.1295, "step": 23340 }, { "epoch": 1.17, "learning_rate": 2.0998121541351474e-05, "loss": 0.0616, "step": 23350 }, { "epoch": 1.17, "learning_rate": 2.0985429253185764e-05, "loss": 0.1786, "step": 23360 }, { "epoch": 1.17, "learning_rate": 2.0972736965020054e-05, "loss": 0.1484, "step": 23370 }, { "epoch": 1.17, "learning_rate": 2.0960044676854344e-05, "loss": 0.1794, "step": 23380 }, { "epoch": 1.17, "learning_rate": 2.0947352388688634e-05, "loss": 0.1774, "step": 23390 }, { "epoch": 1.17, "learning_rate": 2.0934660100522924e-05, "loss": 0.151, "step": 23400 }, { "epoch": 1.17, "learning_rate": 2.092196781235721e-05, "loss": 0.0903, "step": 23410 }, { "epoch": 1.17, "learning_rate": 2.09092755241915e-05, "loss": 0.0606, "step": 23420 }, { "epoch": 1.17, "learning_rate": 2.089658323602579e-05, "loss": 0.1149, "step": 23430 }, { "epoch": 1.18, "learning_rate": 2.088389094786008e-05, "loss": 0.0674, "step": 23440 }, { "epoch": 1.18, "learning_rate": 2.087119865969437e-05, "loss": 0.1451, "step": 23450 }, { "epoch": 1.18, "learning_rate": 2.085850637152866e-05, "loss": 0.1204, "step": 23460 }, { "epoch": 1.18, "learning_rate": 2.084581408336295e-05, "loss": 0.2073, "step": 23470 }, { "epoch": 1.18, "learning_rate": 2.0833121795197238e-05, "loss": 0.2041, "step": 23480 }, { "epoch": 1.18, "learning_rate": 2.0820429507031528e-05, "loss": 0.1754, "step": 23490 }, { "epoch": 1.18, "learning_rate": 2.080773721886582e-05, "loss": 0.122, "step": 23500 }, { "epoch": 1.18, "learning_rate": 2.0795044930700108e-05, "loss": 0.0885, "step": 23510 }, { "epoch": 1.18, "learning_rate": 2.0782352642534398e-05, "loss": 0.038, "step": 23520 }, { "epoch": 1.18, "learning_rate": 2.0769660354368688e-05, "loss": 0.1115, "step": 23530 }, { "epoch": 1.18, "learning_rate": 2.0756968066202974e-05, "loss": 0.1064, "step": 23540 }, { "epoch": 1.18, "learning_rate": 2.0744275778037264e-05, "loss": 0.1122, "step": 23550 }, { "epoch": 1.18, "learning_rate": 2.0731583489871558e-05, "loss": 0.149, "step": 23560 }, { "epoch": 1.18, "learning_rate": 2.0718891201705844e-05, "loss": 0.1803, "step": 23570 }, { "epoch": 1.18, "learning_rate": 2.0706198913540134e-05, "loss": 0.1009, "step": 23580 }, { "epoch": 1.18, "learning_rate": 2.0693506625374424e-05, "loss": 0.243, "step": 23590 }, { "epoch": 1.18, "learning_rate": 2.068081433720871e-05, "loss": 0.1525, "step": 23600 }, { "epoch": 1.18, "learning_rate": 2.0668122049043e-05, "loss": 0.1507, "step": 23610 }, { "epoch": 1.18, "learning_rate": 2.0655429760877294e-05, "loss": 0.1884, "step": 23620 }, { "epoch": 1.18, "learning_rate": 2.064273747271158e-05, "loss": 0.172, "step": 23630 }, { "epoch": 1.19, "learning_rate": 2.063004518454587e-05, "loss": 0.1484, "step": 23640 }, { "epoch": 1.19, "learning_rate": 2.061735289638016e-05, "loss": 0.091, "step": 23650 }, { "epoch": 1.19, "learning_rate": 2.0604660608214447e-05, "loss": 0.1062, "step": 23660 }, { "epoch": 1.19, "learning_rate": 2.059196832004874e-05, "loss": 0.2096, "step": 23670 }, { "epoch": 1.19, "learning_rate": 2.057927603188303e-05, "loss": 0.1261, "step": 23680 }, { "epoch": 1.19, "learning_rate": 2.0566583743717317e-05, "loss": 0.1085, "step": 23690 }, { "epoch": 1.19, "learning_rate": 2.0553891455551607e-05, "loss": 0.0881, "step": 23700 }, { "epoch": 1.19, "learning_rate": 2.0541199167385897e-05, "loss": 0.1067, "step": 23710 }, { "epoch": 1.19, "learning_rate": 2.0528506879220187e-05, "loss": 0.1442, "step": 23720 }, { "epoch": 1.19, "learning_rate": 2.0515814591054477e-05, "loss": 0.1226, "step": 23730 }, { "epoch": 1.19, "learning_rate": 2.0503122302888767e-05, "loss": 0.1433, "step": 23740 }, { "epoch": 1.19, "learning_rate": 2.0490430014723054e-05, "loss": 0.1023, "step": 23750 }, { "epoch": 1.19, "learning_rate": 2.0477737726557344e-05, "loss": 0.1446, "step": 23760 }, { "epoch": 1.19, "learning_rate": 2.0465045438391634e-05, "loss": 0.132, "step": 23770 }, { "epoch": 1.19, "learning_rate": 2.0452353150225924e-05, "loss": 0.201, "step": 23780 }, { "epoch": 1.19, "learning_rate": 2.0439660862060214e-05, "loss": 0.1413, "step": 23790 }, { "epoch": 1.19, "learning_rate": 2.0426968573894504e-05, "loss": 0.16, "step": 23800 }, { "epoch": 1.19, "learning_rate": 2.0414276285728794e-05, "loss": 0.2141, "step": 23810 }, { "epoch": 1.19, "learning_rate": 2.040158399756308e-05, "loss": 0.1474, "step": 23820 }, { "epoch": 1.19, "learning_rate": 2.038889170939737e-05, "loss": 0.086, "step": 23830 }, { "epoch": 1.2, "learning_rate": 2.037619942123166e-05, "loss": 0.1349, "step": 23840 }, { "epoch": 1.2, "learning_rate": 2.036350713306595e-05, "loss": 0.1261, "step": 23850 }, { "epoch": 1.2, "learning_rate": 2.035081484490024e-05, "loss": 0.1421, "step": 23860 }, { "epoch": 1.2, "learning_rate": 2.033812255673453e-05, "loss": 0.0954, "step": 23870 }, { "epoch": 1.2, "learning_rate": 2.0325430268568817e-05, "loss": 0.2254, "step": 23880 }, { "epoch": 1.2, "learning_rate": 2.0312737980403107e-05, "loss": 0.1136, "step": 23890 }, { "epoch": 1.2, "learning_rate": 2.03000456922374e-05, "loss": 0.1294, "step": 23900 }, { "epoch": 1.2, "learning_rate": 2.0287353404071687e-05, "loss": 0.111, "step": 23910 }, { "epoch": 1.2, "learning_rate": 2.0274661115905977e-05, "loss": 0.1404, "step": 23920 }, { "epoch": 1.2, "learning_rate": 2.0261968827740267e-05, "loss": 0.0983, "step": 23930 }, { "epoch": 1.2, "learning_rate": 2.0249276539574553e-05, "loss": 0.1219, "step": 23940 }, { "epoch": 1.2, "learning_rate": 2.0236584251408843e-05, "loss": 0.1413, "step": 23950 }, { "epoch": 1.2, "learning_rate": 2.0223891963243137e-05, "loss": 0.1717, "step": 23960 }, { "epoch": 1.2, "learning_rate": 2.0211199675077423e-05, "loss": 0.0742, "step": 23970 }, { "epoch": 1.2, "learning_rate": 2.0198507386911713e-05, "loss": 0.0684, "step": 23980 }, { "epoch": 1.2, "learning_rate": 2.0185815098746003e-05, "loss": 0.1238, "step": 23990 }, { "epoch": 1.2, "learning_rate": 2.017312281058029e-05, "loss": 0.1373, "step": 24000 }, { "epoch": 1.2, "learning_rate": 2.016043052241458e-05, "loss": 0.063, "step": 24010 }, { "epoch": 1.2, "learning_rate": 2.0147738234248873e-05, "loss": 0.1085, "step": 24020 }, { "epoch": 1.2, "learning_rate": 2.013504594608316e-05, "loss": 0.1976, "step": 24030 }, { "epoch": 1.21, "learning_rate": 2.012235365791745e-05, "loss": 0.1504, "step": 24040 }, { "epoch": 1.21, "learning_rate": 2.010966136975174e-05, "loss": 0.092, "step": 24050 }, { "epoch": 1.21, "learning_rate": 2.009696908158603e-05, "loss": 0.1638, "step": 24060 }, { "epoch": 1.21, "learning_rate": 2.0084276793420317e-05, "loss": 0.1244, "step": 24070 }, { "epoch": 1.21, "learning_rate": 2.007158450525461e-05, "loss": 0.0955, "step": 24080 }, { "epoch": 1.21, "learning_rate": 2.00588922170889e-05, "loss": 0.0742, "step": 24090 }, { "epoch": 1.21, "learning_rate": 2.0046199928923186e-05, "loss": 0.0859, "step": 24100 }, { "epoch": 1.21, "learning_rate": 2.0033507640757476e-05, "loss": 0.0942, "step": 24110 }, { "epoch": 1.21, "learning_rate": 2.0020815352591766e-05, "loss": 0.2104, "step": 24120 }, { "epoch": 1.21, "learning_rate": 2.0008123064426056e-05, "loss": 0.1695, "step": 24130 }, { "epoch": 1.21, "learning_rate": 1.9995430776260346e-05, "loss": 0.1402, "step": 24140 }, { "epoch": 1.21, "learning_rate": 1.9982738488094636e-05, "loss": 0.2342, "step": 24150 }, { "epoch": 1.21, "learning_rate": 1.9970046199928923e-05, "loss": 0.1006, "step": 24160 }, { "epoch": 1.21, "learning_rate": 1.9957353911763213e-05, "loss": 0.1452, "step": 24170 }, { "epoch": 1.21, "learning_rate": 1.9944661623597503e-05, "loss": 0.1497, "step": 24180 }, { "epoch": 1.21, "learning_rate": 1.9931969335431793e-05, "loss": 0.1079, "step": 24190 }, { "epoch": 1.21, "learning_rate": 1.9919277047266083e-05, "loss": 0.1037, "step": 24200 }, { "epoch": 1.21, "learning_rate": 1.9906584759100373e-05, "loss": 0.1188, "step": 24210 }, { "epoch": 1.21, "learning_rate": 1.989389247093466e-05, "loss": 0.1665, "step": 24220 }, { "epoch": 1.21, "learning_rate": 1.988120018276895e-05, "loss": 0.1266, "step": 24230 }, { "epoch": 1.22, "learning_rate": 1.986850789460324e-05, "loss": 0.061, "step": 24240 }, { "epoch": 1.22, "learning_rate": 1.985581560643753e-05, "loss": 0.0913, "step": 24250 }, { "epoch": 1.22, "learning_rate": 1.984312331827182e-05, "loss": 0.1361, "step": 24260 }, { "epoch": 1.22, "learning_rate": 1.983043103010611e-05, "loss": 0.1674, "step": 24270 }, { "epoch": 1.22, "learning_rate": 1.9817738741940396e-05, "loss": 0.0912, "step": 24280 }, { "epoch": 1.22, "learning_rate": 1.9805046453774686e-05, "loss": 0.0907, "step": 24290 }, { "epoch": 1.22, "learning_rate": 1.9792354165608976e-05, "loss": 0.1364, "step": 24300 }, { "epoch": 1.22, "learning_rate": 1.9779661877443266e-05, "loss": 0.0931, "step": 24310 }, { "epoch": 1.22, "learning_rate": 1.9766969589277556e-05, "loss": 0.1063, "step": 24320 }, { "epoch": 1.22, "learning_rate": 1.9754277301111846e-05, "loss": 0.0723, "step": 24330 }, { "epoch": 1.22, "learning_rate": 1.9741585012946136e-05, "loss": 0.1472, "step": 24340 }, { "epoch": 1.22, "learning_rate": 1.9728892724780423e-05, "loss": 0.1153, "step": 24350 }, { "epoch": 1.22, "learning_rate": 1.9716200436614716e-05, "loss": 0.1848, "step": 24360 }, { "epoch": 1.22, "learning_rate": 1.9703508148449003e-05, "loss": 0.0504, "step": 24370 }, { "epoch": 1.22, "learning_rate": 1.9690815860283293e-05, "loss": 0.1245, "step": 24380 }, { "epoch": 1.22, "learning_rate": 1.9678123572117583e-05, "loss": 0.1402, "step": 24390 }, { "epoch": 1.22, "learning_rate": 1.9665431283951873e-05, "loss": 0.0973, "step": 24400 }, { "epoch": 1.22, "learning_rate": 1.965273899578616e-05, "loss": 0.1202, "step": 24410 }, { "epoch": 1.22, "learning_rate": 1.9640046707620453e-05, "loss": 0.1605, "step": 24420 }, { "epoch": 1.22, "learning_rate": 1.9627354419454743e-05, "loss": 0.1234, "step": 24430 }, { "epoch": 1.23, "learning_rate": 1.961466213128903e-05, "loss": 0.0851, "step": 24440 }, { "epoch": 1.23, "learning_rate": 1.960196984312332e-05, "loss": 0.0849, "step": 24450 }, { "epoch": 1.23, "learning_rate": 1.958927755495761e-05, "loss": 0.1831, "step": 24460 }, { "epoch": 1.23, "learning_rate": 1.9576585266791896e-05, "loss": 0.1357, "step": 24470 }, { "epoch": 1.23, "learning_rate": 1.956389297862619e-05, "loss": 0.1314, "step": 24480 }, { "epoch": 1.23, "learning_rate": 1.955120069046048e-05, "loss": 0.0792, "step": 24490 }, { "epoch": 1.23, "learning_rate": 1.9538508402294766e-05, "loss": 0.1024, "step": 24500 }, { "epoch": 1.23, "learning_rate": 1.9525816114129056e-05, "loss": 0.1046, "step": 24510 }, { "epoch": 1.23, "learning_rate": 1.9513123825963346e-05, "loss": 0.1037, "step": 24520 }, { "epoch": 1.23, "learning_rate": 1.9500431537797632e-05, "loss": 0.2002, "step": 24530 }, { "epoch": 1.23, "learning_rate": 1.9487739249631926e-05, "loss": 0.2447, "step": 24540 }, { "epoch": 1.23, "learning_rate": 1.9475046961466216e-05, "loss": 0.2203, "step": 24550 }, { "epoch": 1.23, "learning_rate": 1.9462354673300502e-05, "loss": 0.1264, "step": 24560 }, { "epoch": 1.23, "learning_rate": 1.9449662385134792e-05, "loss": 0.121, "step": 24570 }, { "epoch": 1.23, "learning_rate": 1.9436970096969082e-05, "loss": 0.0901, "step": 24580 }, { "epoch": 1.23, "learning_rate": 1.9424277808803372e-05, "loss": 0.1897, "step": 24590 }, { "epoch": 1.23, "learning_rate": 1.9411585520637662e-05, "loss": 0.1099, "step": 24600 }, { "epoch": 1.23, "learning_rate": 1.9398893232471952e-05, "loss": 0.1338, "step": 24610 }, { "epoch": 1.23, "learning_rate": 1.938620094430624e-05, "loss": 0.1267, "step": 24620 }, { "epoch": 1.23, "learning_rate": 1.937350865614053e-05, "loss": 0.1253, "step": 24630 }, { "epoch": 1.24, "learning_rate": 1.936081636797482e-05, "loss": 0.1272, "step": 24640 }, { "epoch": 1.24, "learning_rate": 1.934812407980911e-05, "loss": 0.0553, "step": 24650 }, { "epoch": 1.24, "learning_rate": 1.93354317916434e-05, "loss": 0.1677, "step": 24660 }, { "epoch": 1.24, "learning_rate": 1.932273950347769e-05, "loss": 0.1266, "step": 24670 }, { "epoch": 1.24, "learning_rate": 1.931004721531198e-05, "loss": 0.0511, "step": 24680 }, { "epoch": 1.24, "learning_rate": 1.9297354927146265e-05, "loss": 0.2055, "step": 24690 }, { "epoch": 1.24, "learning_rate": 1.9284662638980555e-05, "loss": 0.1307, "step": 24700 }, { "epoch": 1.24, "learning_rate": 1.9271970350814845e-05, "loss": 0.109, "step": 24710 }, { "epoch": 1.24, "learning_rate": 1.9259278062649135e-05, "loss": 0.0727, "step": 24720 }, { "epoch": 1.24, "learning_rate": 1.9246585774483425e-05, "loss": 0.1057, "step": 24730 }, { "epoch": 1.24, "learning_rate": 1.9233893486317715e-05, "loss": 0.1051, "step": 24740 }, { "epoch": 1.24, "learning_rate": 1.9221201198152002e-05, "loss": 0.0906, "step": 24750 }, { "epoch": 1.24, "learning_rate": 1.9208508909986292e-05, "loss": 0.1528, "step": 24760 }, { "epoch": 1.24, "learning_rate": 1.9195816621820585e-05, "loss": 0.0972, "step": 24770 }, { "epoch": 1.24, "learning_rate": 1.9183124333654872e-05, "loss": 0.0551, "step": 24780 }, { "epoch": 1.24, "learning_rate": 1.9170432045489162e-05, "loss": 0.1489, "step": 24790 }, { "epoch": 1.24, "learning_rate": 1.9157739757323452e-05, "loss": 0.1938, "step": 24800 }, { "epoch": 1.24, "learning_rate": 1.914504746915774e-05, "loss": 0.0494, "step": 24810 }, { "epoch": 1.24, "learning_rate": 1.9132355180992032e-05, "loss": 0.1588, "step": 24820 }, { "epoch": 1.24, "learning_rate": 1.9119662892826322e-05, "loss": 0.1692, "step": 24830 }, { "epoch": 1.25, "learning_rate": 1.910697060466061e-05, "loss": 0.1623, "step": 24840 }, { "epoch": 1.25, "learning_rate": 1.90942783164949e-05, "loss": 0.0775, "step": 24850 }, { "epoch": 1.25, "learning_rate": 1.908158602832919e-05, "loss": 0.0906, "step": 24860 }, { "epoch": 1.25, "learning_rate": 1.9068893740163475e-05, "loss": 0.1153, "step": 24870 }, { "epoch": 1.25, "learning_rate": 1.905620145199777e-05, "loss": 0.1136, "step": 24880 }, { "epoch": 1.25, "learning_rate": 1.904350916383206e-05, "loss": 0.0745, "step": 24890 }, { "epoch": 1.25, "learning_rate": 1.9030816875666345e-05, "loss": 0.0694, "step": 24900 }, { "epoch": 1.25, "learning_rate": 1.9018124587500635e-05, "loss": 0.1344, "step": 24910 }, { "epoch": 1.25, "learning_rate": 1.9005432299334925e-05, "loss": 0.1841, "step": 24920 }, { "epoch": 1.25, "learning_rate": 1.8992740011169215e-05, "loss": 0.0797, "step": 24930 }, { "epoch": 1.25, "learning_rate": 1.8980047723003505e-05, "loss": 0.1417, "step": 24940 }, { "epoch": 1.25, "learning_rate": 1.8967355434837795e-05, "loss": 0.1726, "step": 24950 }, { "epoch": 1.25, "learning_rate": 1.895466314667208e-05, "loss": 0.147, "step": 24960 }, { "epoch": 1.25, "learning_rate": 1.894197085850637e-05, "loss": 0.1546, "step": 24970 }, { "epoch": 1.25, "learning_rate": 1.892927857034066e-05, "loss": 0.114, "step": 24980 }, { "epoch": 1.25, "learning_rate": 1.891658628217495e-05, "loss": 0.2145, "step": 24990 }, { "epoch": 1.25, "learning_rate": 1.890389399400924e-05, "loss": 0.1103, "step": 25000 }, { "epoch": 1.25, "learning_rate": 1.889120170584353e-05, "loss": 0.1412, "step": 25010 }, { "epoch": 1.25, "learning_rate": 1.887850941767782e-05, "loss": 0.1737, "step": 25020 }, { "epoch": 1.25, "learning_rate": 1.8865817129512108e-05, "loss": 0.057, "step": 25030 }, { "epoch": 1.26, "learning_rate": 1.8853124841346398e-05, "loss": 0.1096, "step": 25040 }, { "epoch": 1.26, "learning_rate": 1.8840432553180688e-05, "loss": 0.0952, "step": 25050 }, { "epoch": 1.26, "learning_rate": 1.8827740265014978e-05, "loss": 0.1225, "step": 25060 }, { "epoch": 1.26, "learning_rate": 1.8815047976849268e-05, "loss": 0.1286, "step": 25070 }, { "epoch": 1.26, "learning_rate": 1.8802355688683558e-05, "loss": 0.172, "step": 25080 }, { "epoch": 1.26, "learning_rate": 1.8789663400517845e-05, "loss": 0.0985, "step": 25090 }, { "epoch": 1.26, "learning_rate": 1.8776971112352135e-05, "loss": 0.1069, "step": 25100 }, { "epoch": 1.26, "learning_rate": 1.8764278824186428e-05, "loss": 0.1685, "step": 25110 }, { "epoch": 1.26, "learning_rate": 1.8751586536020715e-05, "loss": 0.0993, "step": 25120 }, { "epoch": 1.26, "learning_rate": 1.8738894247855005e-05, "loss": 0.0749, "step": 25130 }, { "epoch": 1.26, "learning_rate": 1.8726201959689295e-05, "loss": 0.0691, "step": 25140 }, { "epoch": 1.26, "learning_rate": 1.871350967152358e-05, "loss": 0.0591, "step": 25150 }, { "epoch": 1.26, "learning_rate": 1.870081738335787e-05, "loss": 0.0709, "step": 25160 }, { "epoch": 1.26, "learning_rate": 1.8688125095192165e-05, "loss": 0.0414, "step": 25170 }, { "epoch": 1.26, "learning_rate": 1.867543280702645e-05, "loss": 0.0692, "step": 25180 }, { "epoch": 1.26, "learning_rate": 1.866274051886074e-05, "loss": 0.0451, "step": 25190 }, { "epoch": 1.26, "learning_rate": 1.865004823069503e-05, "loss": 0.0631, "step": 25200 }, { "epoch": 1.26, "learning_rate": 1.8637355942529318e-05, "loss": 0.0652, "step": 25210 }, { "epoch": 1.26, "learning_rate": 1.8624663654363608e-05, "loss": 0.0819, "step": 25220 }, { "epoch": 1.26, "learning_rate": 1.86119713661979e-05, "loss": 0.0678, "step": 25230 }, { "epoch": 1.27, "learning_rate": 1.8599279078032188e-05, "loss": 0.0627, "step": 25240 }, { "epoch": 1.27, "learning_rate": 1.8586586789866478e-05, "loss": 0.0785, "step": 25250 }, { "epoch": 1.27, "learning_rate": 1.8573894501700768e-05, "loss": 0.0659, "step": 25260 }, { "epoch": 1.27, "learning_rate": 1.8561202213535058e-05, "loss": 0.0557, "step": 25270 }, { "epoch": 1.27, "learning_rate": 1.8548509925369348e-05, "loss": 0.0583, "step": 25280 }, { "epoch": 1.27, "learning_rate": 1.8535817637203638e-05, "loss": 0.0905, "step": 25290 }, { "epoch": 1.27, "learning_rate": 1.8523125349037924e-05, "loss": 0.0701, "step": 25300 }, { "epoch": 1.27, "learning_rate": 1.8510433060872214e-05, "loss": 0.0667, "step": 25310 }, { "epoch": 1.27, "learning_rate": 1.8497740772706504e-05, "loss": 0.0878, "step": 25320 }, { "epoch": 1.27, "learning_rate": 1.8485048484540794e-05, "loss": 0.0808, "step": 25330 }, { "epoch": 1.27, "learning_rate": 1.8472356196375084e-05, "loss": 0.0527, "step": 25340 }, { "epoch": 1.27, "learning_rate": 1.8459663908209374e-05, "loss": 0.0564, "step": 25350 }, { "epoch": 1.27, "learning_rate": 1.8446971620043664e-05, "loss": 0.0315, "step": 25360 }, { "epoch": 1.27, "learning_rate": 1.843427933187795e-05, "loss": 0.0557, "step": 25370 }, { "epoch": 1.27, "learning_rate": 1.842158704371224e-05, "loss": 0.0587, "step": 25380 }, { "epoch": 1.27, "learning_rate": 1.840889475554653e-05, "loss": 0.0539, "step": 25390 }, { "epoch": 1.27, "learning_rate": 1.839620246738082e-05, "loss": 0.0559, "step": 25400 }, { "epoch": 1.27, "learning_rate": 1.838351017921511e-05, "loss": 0.0471, "step": 25410 }, { "epoch": 1.27, "learning_rate": 1.83708178910494e-05, "loss": 0.0784, "step": 25420 }, { "epoch": 1.27, "learning_rate": 1.8358125602883687e-05, "loss": 0.0579, "step": 25430 }, { "epoch": 1.28, "learning_rate": 1.8345433314717977e-05, "loss": 0.0649, "step": 25440 }, { "epoch": 1.28, "learning_rate": 1.8332741026552267e-05, "loss": 0.0515, "step": 25450 }, { "epoch": 1.28, "learning_rate": 1.8320048738386557e-05, "loss": 0.1013, "step": 25460 }, { "epoch": 1.28, "learning_rate": 1.8307356450220847e-05, "loss": 0.0654, "step": 25470 }, { "epoch": 1.28, "learning_rate": 1.8294664162055137e-05, "loss": 0.0617, "step": 25480 }, { "epoch": 1.28, "learning_rate": 1.8281971873889424e-05, "loss": 0.0399, "step": 25490 }, { "epoch": 1.28, "learning_rate": 1.8269279585723714e-05, "loss": 0.0395, "step": 25500 }, { "epoch": 1.28, "learning_rate": 1.8256587297558007e-05, "loss": 0.0825, "step": 25510 }, { "epoch": 1.28, "learning_rate": 1.8243895009392294e-05, "loss": 0.0571, "step": 25520 }, { "epoch": 1.28, "learning_rate": 1.8231202721226584e-05, "loss": 0.0468, "step": 25530 }, { "epoch": 1.28, "learning_rate": 1.8218510433060874e-05, "loss": 0.088, "step": 25540 }, { "epoch": 1.28, "learning_rate": 1.820581814489516e-05, "loss": 0.0726, "step": 25550 }, { "epoch": 1.28, "learning_rate": 1.819312585672945e-05, "loss": 0.0616, "step": 25560 }, { "epoch": 1.28, "learning_rate": 1.8180433568563744e-05, "loss": 0.0693, "step": 25570 }, { "epoch": 1.28, "learning_rate": 1.816774128039803e-05, "loss": 0.0492, "step": 25580 }, { "epoch": 1.28, "learning_rate": 1.815504899223232e-05, "loss": 0.0422, "step": 25590 }, { "epoch": 1.28, "learning_rate": 1.814235670406661e-05, "loss": 0.0371, "step": 25600 }, { "epoch": 1.28, "learning_rate": 1.81296644159009e-05, "loss": 0.0559, "step": 25610 }, { "epoch": 1.28, "learning_rate": 1.8116972127735187e-05, "loss": 0.0385, "step": 25620 }, { "epoch": 1.28, "learning_rate": 1.810427983956948e-05, "loss": 0.0307, "step": 25630 }, { "epoch": 1.29, "learning_rate": 1.8091587551403767e-05, "loss": 0.0648, "step": 25640 }, { "epoch": 1.29, "learning_rate": 1.8078895263238057e-05, "loss": 0.034, "step": 25650 }, { "epoch": 1.29, "learning_rate": 1.8066202975072347e-05, "loss": 0.0911, "step": 25660 }, { "epoch": 1.29, "learning_rate": 1.8053510686906637e-05, "loss": 0.0425, "step": 25670 }, { "epoch": 1.29, "learning_rate": 1.8040818398740924e-05, "loss": 0.0683, "step": 25680 }, { "epoch": 1.29, "learning_rate": 1.8028126110575217e-05, "loss": 0.0748, "step": 25690 }, { "epoch": 1.29, "learning_rate": 1.8015433822409507e-05, "loss": 0.0667, "step": 25700 }, { "epoch": 1.29, "learning_rate": 1.8002741534243794e-05, "loss": 0.0408, "step": 25710 }, { "epoch": 1.29, "learning_rate": 1.7990049246078084e-05, "loss": 0.0576, "step": 25720 }, { "epoch": 1.29, "learning_rate": 1.7977356957912374e-05, "loss": 0.0559, "step": 25730 }, { "epoch": 1.29, "learning_rate": 1.796466466974666e-05, "loss": 0.0849, "step": 25740 }, { "epoch": 1.29, "learning_rate": 1.7951972381580954e-05, "loss": 0.0717, "step": 25750 }, { "epoch": 1.29, "learning_rate": 1.7939280093415244e-05, "loss": 0.0293, "step": 25760 }, { "epoch": 1.29, "learning_rate": 1.792658780524953e-05, "loss": 0.0709, "step": 25770 }, { "epoch": 1.29, "learning_rate": 1.791389551708382e-05, "loss": 0.0625, "step": 25780 }, { "epoch": 1.29, "learning_rate": 1.790120322891811e-05, "loss": 0.0667, "step": 25790 }, { "epoch": 1.29, "learning_rate": 1.78885109407524e-05, "loss": 0.0786, "step": 25800 }, { "epoch": 1.29, "learning_rate": 1.787581865258669e-05, "loss": 0.0708, "step": 25810 }, { "epoch": 1.29, "learning_rate": 1.786312636442098e-05, "loss": 0.0731, "step": 25820 }, { "epoch": 1.29, "learning_rate": 1.7850434076255267e-05, "loss": 0.0369, "step": 25830 }, { "epoch": 1.3, "learning_rate": 1.7837741788089557e-05, "loss": 0.0708, "step": 25840 }, { "epoch": 1.3, "learning_rate": 1.7825049499923847e-05, "loss": 0.029, "step": 25850 }, { "epoch": 1.3, "learning_rate": 1.7812357211758137e-05, "loss": 0.0807, "step": 25860 }, { "epoch": 1.3, "learning_rate": 1.7799664923592427e-05, "loss": 0.0523, "step": 25870 }, { "epoch": 1.3, "learning_rate": 1.7786972635426717e-05, "loss": 0.0905, "step": 25880 }, { "epoch": 1.3, "learning_rate": 1.7774280347261003e-05, "loss": 0.0602, "step": 25890 }, { "epoch": 1.3, "learning_rate": 1.7761588059095293e-05, "loss": 0.0428, "step": 25900 }, { "epoch": 1.3, "learning_rate": 1.7748895770929583e-05, "loss": 0.0605, "step": 25910 }, { "epoch": 1.3, "learning_rate": 1.7736203482763873e-05, "loss": 0.1772, "step": 25920 }, { "epoch": 1.3, "learning_rate": 1.7723511194598163e-05, "loss": 0.0571, "step": 25930 }, { "epoch": 1.3, "learning_rate": 1.7710818906432453e-05, "loss": 0.1101, "step": 25940 }, { "epoch": 1.3, "learning_rate": 1.7698126618266743e-05, "loss": 0.0639, "step": 25950 }, { "epoch": 1.3, "learning_rate": 1.768543433010103e-05, "loss": 0.0365, "step": 25960 }, { "epoch": 1.3, "learning_rate": 1.767274204193532e-05, "loss": 0.1097, "step": 25970 }, { "epoch": 1.3, "learning_rate": 1.766004975376961e-05, "loss": 0.0601, "step": 25980 }, { "epoch": 1.3, "learning_rate": 1.76473574656039e-05, "loss": 0.0116, "step": 25990 }, { "epoch": 1.3, "learning_rate": 1.763466517743819e-05, "loss": 0.0865, "step": 26000 }, { "epoch": 1.3, "learning_rate": 1.762197288927248e-05, "loss": 0.0308, "step": 26010 }, { "epoch": 1.3, "learning_rate": 1.7609280601106766e-05, "loss": 0.0784, "step": 26020 }, { "epoch": 1.3, "learning_rate": 1.759658831294106e-05, "loss": 0.0619, "step": 26030 }, { "epoch": 1.31, "learning_rate": 1.758389602477535e-05, "loss": 0.1242, "step": 26040 }, { "epoch": 1.31, "learning_rate": 1.7571203736609636e-05, "loss": 0.0818, "step": 26050 }, { "epoch": 1.31, "learning_rate": 1.7558511448443926e-05, "loss": 0.0826, "step": 26060 }, { "epoch": 1.31, "learning_rate": 1.7545819160278216e-05, "loss": 0.1291, "step": 26070 }, { "epoch": 1.31, "learning_rate": 1.7533126872112503e-05, "loss": 0.0366, "step": 26080 }, { "epoch": 1.31, "learning_rate": 1.7520434583946796e-05, "loss": 0.0498, "step": 26090 }, { "epoch": 1.31, "learning_rate": 1.7507742295781086e-05, "loss": 0.0461, "step": 26100 }, { "epoch": 1.31, "learning_rate": 1.7495050007615373e-05, "loss": 0.081, "step": 26110 }, { "epoch": 1.31, "learning_rate": 1.7482357719449663e-05, "loss": 0.0807, "step": 26120 }, { "epoch": 1.31, "learning_rate": 1.7469665431283953e-05, "loss": 0.089, "step": 26130 }, { "epoch": 1.31, "learning_rate": 1.745697314311824e-05, "loss": 0.0862, "step": 26140 }, { "epoch": 1.31, "learning_rate": 1.7444280854952533e-05, "loss": 0.0422, "step": 26150 }, { "epoch": 1.31, "learning_rate": 1.7431588566786823e-05, "loss": 0.0925, "step": 26160 }, { "epoch": 1.31, "learning_rate": 1.741889627862111e-05, "loss": 0.1323, "step": 26170 }, { "epoch": 1.31, "learning_rate": 1.74062039904554e-05, "loss": 0.0357, "step": 26180 }, { "epoch": 1.31, "learning_rate": 1.739351170228969e-05, "loss": 0.0338, "step": 26190 }, { "epoch": 1.31, "learning_rate": 1.738081941412398e-05, "loss": 0.0237, "step": 26200 }, { "epoch": 1.31, "learning_rate": 1.736812712595827e-05, "loss": 0.0776, "step": 26210 }, { "epoch": 1.31, "learning_rate": 1.735543483779256e-05, "loss": 0.0331, "step": 26220 }, { "epoch": 1.31, "learning_rate": 1.7342742549626846e-05, "loss": 0.0615, "step": 26230 }, { "epoch": 1.32, "learning_rate": 1.7330050261461136e-05, "loss": 0.1429, "step": 26240 }, { "epoch": 1.32, "learning_rate": 1.7317357973295426e-05, "loss": 0.0715, "step": 26250 }, { "epoch": 1.32, "learning_rate": 1.7304665685129716e-05, "loss": 0.0528, "step": 26260 }, { "epoch": 1.32, "learning_rate": 1.7291973396964006e-05, "loss": 0.0415, "step": 26270 }, { "epoch": 1.32, "learning_rate": 1.7279281108798296e-05, "loss": 0.1015, "step": 26280 }, { "epoch": 1.32, "learning_rate": 1.7266588820632586e-05, "loss": 0.0658, "step": 26290 }, { "epoch": 1.32, "learning_rate": 1.7253896532466872e-05, "loss": 0.0559, "step": 26300 }, { "epoch": 1.32, "learning_rate": 1.7241204244301162e-05, "loss": 0.0334, "step": 26310 }, { "epoch": 1.32, "learning_rate": 1.7228511956135452e-05, "loss": 0.0778, "step": 26320 }, { "epoch": 1.32, "learning_rate": 1.7215819667969742e-05, "loss": 0.0622, "step": 26330 }, { "epoch": 1.32, "learning_rate": 1.7203127379804032e-05, "loss": 0.074, "step": 26340 }, { "epoch": 1.32, "learning_rate": 1.7190435091638322e-05, "loss": 0.0987, "step": 26350 }, { "epoch": 1.32, "learning_rate": 1.717774280347261e-05, "loss": 0.1105, "step": 26360 }, { "epoch": 1.32, "learning_rate": 1.71650505153069e-05, "loss": 0.0685, "step": 26370 }, { "epoch": 1.32, "learning_rate": 1.7152358227141192e-05, "loss": 0.0777, "step": 26380 }, { "epoch": 1.32, "learning_rate": 1.713966593897548e-05, "loss": 0.0704, "step": 26390 }, { "epoch": 1.32, "learning_rate": 1.712697365080977e-05, "loss": 0.1226, "step": 26400 }, { "epoch": 1.32, "learning_rate": 1.711428136264406e-05, "loss": 0.0751, "step": 26410 }, { "epoch": 1.32, "learning_rate": 1.7101589074478346e-05, "loss": 0.154, "step": 26420 }, { "epoch": 1.33, "learning_rate": 1.7088896786312636e-05, "loss": 0.046, "step": 26430 }, { "epoch": 1.33, "learning_rate": 1.707620449814693e-05, "loss": 0.041, "step": 26440 }, { "epoch": 1.33, "learning_rate": 1.7063512209981216e-05, "loss": 0.0291, "step": 26450 }, { "epoch": 1.33, "learning_rate": 1.7050819921815506e-05, "loss": 0.0543, "step": 26460 }, { "epoch": 1.33, "learning_rate": 1.7038127633649796e-05, "loss": 0.088, "step": 26470 }, { "epoch": 1.33, "learning_rate": 1.7025435345484082e-05, "loss": 0.103, "step": 26480 }, { "epoch": 1.33, "learning_rate": 1.7012743057318376e-05, "loss": 0.0531, "step": 26490 }, { "epoch": 1.33, "learning_rate": 1.7000050769152666e-05, "loss": 0.0809, "step": 26500 }, { "epoch": 1.33, "learning_rate": 1.6987358480986952e-05, "loss": 0.0907, "step": 26510 }, { "epoch": 1.33, "learning_rate": 1.6974666192821242e-05, "loss": 0.0645, "step": 26520 }, { "epoch": 1.33, "learning_rate": 1.6961973904655532e-05, "loss": 0.0936, "step": 26530 }, { "epoch": 1.33, "learning_rate": 1.6949281616489822e-05, "loss": 0.0399, "step": 26540 }, { "epoch": 1.33, "learning_rate": 1.6936589328324112e-05, "loss": 0.0744, "step": 26550 }, { "epoch": 1.33, "learning_rate": 1.6923897040158402e-05, "loss": 0.0485, "step": 26560 }, { "epoch": 1.33, "learning_rate": 1.691120475199269e-05, "loss": 0.057, "step": 26570 }, { "epoch": 1.33, "learning_rate": 1.689851246382698e-05, "loss": 0.0799, "step": 26580 }, { "epoch": 1.33, "learning_rate": 1.688582017566127e-05, "loss": 0.0326, "step": 26590 }, { "epoch": 1.33, "learning_rate": 1.687312788749556e-05, "loss": 0.0617, "step": 26600 }, { "epoch": 1.33, "learning_rate": 1.686043559932985e-05, "loss": 0.0403, "step": 26610 }, { "epoch": 1.33, "learning_rate": 1.684774331116414e-05, "loss": 0.0734, "step": 26620 }, { "epoch": 1.34, "learning_rate": 1.683505102299843e-05, "loss": 0.1026, "step": 26630 }, { "epoch": 1.34, "learning_rate": 1.6822358734832715e-05, "loss": 0.0788, "step": 26640 }, { "epoch": 1.34, "learning_rate": 1.6809666446667005e-05, "loss": 0.0635, "step": 26650 }, { "epoch": 1.34, "learning_rate": 1.6796974158501295e-05, "loss": 0.0302, "step": 26660 }, { "epoch": 1.34, "learning_rate": 1.6784281870335585e-05, "loss": 0.0397, "step": 26670 }, { "epoch": 1.34, "learning_rate": 1.6771589582169875e-05, "loss": 0.0711, "step": 26680 }, { "epoch": 1.34, "learning_rate": 1.6758897294004165e-05, "loss": 0.0459, "step": 26690 }, { "epoch": 1.34, "learning_rate": 1.6746205005838452e-05, "loss": 0.0772, "step": 26700 }, { "epoch": 1.34, "learning_rate": 1.6733512717672742e-05, "loss": 0.0806, "step": 26710 }, { "epoch": 1.34, "learning_rate": 1.6720820429507035e-05, "loss": 0.0195, "step": 26720 }, { "epoch": 1.34, "learning_rate": 1.6708128141341322e-05, "loss": 0.0251, "step": 26730 }, { "epoch": 1.34, "learning_rate": 1.6695435853175612e-05, "loss": 0.028, "step": 26740 }, { "epoch": 1.34, "learning_rate": 1.6682743565009902e-05, "loss": 0.0437, "step": 26750 }, { "epoch": 1.34, "learning_rate": 1.667005127684419e-05, "loss": 0.0648, "step": 26760 }, { "epoch": 1.34, "learning_rate": 1.665735898867848e-05, "loss": 0.0144, "step": 26770 }, { "epoch": 1.34, "learning_rate": 1.6644666700512772e-05, "loss": 0.0415, "step": 26780 }, { "epoch": 1.34, "learning_rate": 1.6631974412347058e-05, "loss": 0.0505, "step": 26790 }, { "epoch": 1.34, "learning_rate": 1.6619282124181348e-05, "loss": 0.0438, "step": 26800 }, { "epoch": 1.34, "learning_rate": 1.6606589836015638e-05, "loss": 0.0471, "step": 26810 }, { "epoch": 1.34, "learning_rate": 1.6593897547849925e-05, "loss": 0.0573, "step": 26820 }, { "epoch": 1.35, "learning_rate": 1.6581205259684215e-05, "loss": 0.0355, "step": 26830 }, { "epoch": 1.35, "learning_rate": 1.6568512971518508e-05, "loss": 0.0387, "step": 26840 }, { "epoch": 1.35, "learning_rate": 1.6555820683352795e-05, "loss": 0.1009, "step": 26850 }, { "epoch": 1.35, "learning_rate": 1.6543128395187085e-05, "loss": 0.0445, "step": 26860 }, { "epoch": 1.35, "learning_rate": 1.6530436107021375e-05, "loss": 0.044, "step": 26870 }, { "epoch": 1.35, "learning_rate": 1.6517743818855665e-05, "loss": 0.0159, "step": 26880 }, { "epoch": 1.35, "learning_rate": 1.650505153068995e-05, "loss": 0.0324, "step": 26890 }, { "epoch": 1.35, "learning_rate": 1.6492359242524245e-05, "loss": 0.0537, "step": 26900 }, { "epoch": 1.35, "learning_rate": 1.647966695435853e-05, "loss": 0.0375, "step": 26910 }, { "epoch": 1.35, "learning_rate": 1.646697466619282e-05, "loss": 0.0317, "step": 26920 }, { "epoch": 1.35, "learning_rate": 1.645428237802711e-05, "loss": 0.0474, "step": 26930 }, { "epoch": 1.35, "learning_rate": 1.64415900898614e-05, "loss": 0.0398, "step": 26940 }, { "epoch": 1.35, "learning_rate": 1.642889780169569e-05, "loss": 0.0432, "step": 26950 }, { "epoch": 1.35, "learning_rate": 1.641620551352998e-05, "loss": 0.0694, "step": 26960 }, { "epoch": 1.35, "learning_rate": 1.640351322536427e-05, "loss": 0.0463, "step": 26970 }, { "epoch": 1.35, "learning_rate": 1.6390820937198558e-05, "loss": 0.0576, "step": 26980 }, { "epoch": 1.35, "learning_rate": 1.6378128649032848e-05, "loss": 0.05, "step": 26990 }, { "epoch": 1.35, "learning_rate": 1.6365436360867138e-05, "loss": 0.0698, "step": 27000 }, { "epoch": 1.35, "learning_rate": 1.6352744072701428e-05, "loss": 0.0515, "step": 27010 }, { "epoch": 1.35, "learning_rate": 1.6340051784535718e-05, "loss": 0.0729, "step": 27020 }, { "epoch": 1.36, "learning_rate": 1.6327359496370008e-05, "loss": 0.0553, "step": 27030 }, { "epoch": 1.36, "learning_rate": 1.6314667208204294e-05, "loss": 0.0489, "step": 27040 }, { "epoch": 1.36, "learning_rate": 1.6301974920038584e-05, "loss": 0.0313, "step": 27050 }, { "epoch": 1.36, "learning_rate": 1.6289282631872874e-05, "loss": 0.0858, "step": 27060 }, { "epoch": 1.36, "learning_rate": 1.6276590343707164e-05, "loss": 0.0715, "step": 27070 }, { "epoch": 1.36, "learning_rate": 1.6263898055541454e-05, "loss": 0.0408, "step": 27080 }, { "epoch": 1.36, "learning_rate": 1.6251205767375744e-05, "loss": 0.0505, "step": 27090 }, { "epoch": 1.36, "learning_rate": 1.623851347921003e-05, "loss": 0.0362, "step": 27100 }, { "epoch": 1.36, "learning_rate": 1.622582119104432e-05, "loss": 0.0257, "step": 27110 }, { "epoch": 1.36, "learning_rate": 1.621312890287861e-05, "loss": 0.0699, "step": 27120 }, { "epoch": 1.36, "learning_rate": 1.62004366147129e-05, "loss": 0.0522, "step": 27130 }, { "epoch": 1.36, "learning_rate": 1.618774432654719e-05, "loss": 0.0496, "step": 27140 }, { "epoch": 1.36, "learning_rate": 1.617505203838148e-05, "loss": 0.0632, "step": 27150 }, { "epoch": 1.36, "learning_rate": 1.6162359750215768e-05, "loss": 0.0538, "step": 27160 }, { "epoch": 1.36, "learning_rate": 1.6149667462050058e-05, "loss": 0.0393, "step": 27170 }, { "epoch": 1.36, "learning_rate": 1.613697517388435e-05, "loss": 0.0364, "step": 27180 }, { "epoch": 1.36, "learning_rate": 1.6124282885718638e-05, "loss": 0.0978, "step": 27190 }, { "epoch": 1.36, "learning_rate": 1.6111590597552928e-05, "loss": 0.0883, "step": 27200 }, { "epoch": 1.36, "learning_rate": 1.6098898309387218e-05, "loss": 0.0634, "step": 27210 }, { "epoch": 1.36, "learning_rate": 1.6086206021221508e-05, "loss": 0.0723, "step": 27220 }, { "epoch": 1.37, "learning_rate": 1.6073513733055794e-05, "loss": 0.0376, "step": 27230 }, { "epoch": 1.37, "learning_rate": 1.6060821444890088e-05, "loss": 0.0885, "step": 27240 }, { "epoch": 1.37, "learning_rate": 1.6048129156724374e-05, "loss": 0.0252, "step": 27250 }, { "epoch": 1.37, "learning_rate": 1.6035436868558664e-05, "loss": 0.0778, "step": 27260 }, { "epoch": 1.37, "learning_rate": 1.6022744580392954e-05, "loss": 0.0338, "step": 27270 }, { "epoch": 1.37, "learning_rate": 1.6010052292227244e-05, "loss": 0.0489, "step": 27280 }, { "epoch": 1.37, "learning_rate": 1.599736000406153e-05, "loss": 0.0358, "step": 27290 }, { "epoch": 1.37, "learning_rate": 1.5984667715895824e-05, "loss": 0.0318, "step": 27300 }, { "epoch": 1.37, "learning_rate": 1.5971975427730114e-05, "loss": 0.044, "step": 27310 }, { "epoch": 1.37, "learning_rate": 1.59592831395644e-05, "loss": 0.0545, "step": 27320 }, { "epoch": 1.37, "learning_rate": 1.594659085139869e-05, "loss": 0.0647, "step": 27330 }, { "epoch": 1.37, "learning_rate": 1.593389856323298e-05, "loss": 0.0529, "step": 27340 }, { "epoch": 1.37, "learning_rate": 1.5921206275067267e-05, "loss": 0.0675, "step": 27350 }, { "epoch": 1.37, "learning_rate": 1.590851398690156e-05, "loss": 0.0231, "step": 27360 }, { "epoch": 1.37, "learning_rate": 1.589582169873585e-05, "loss": 0.0399, "step": 27370 }, { "epoch": 1.37, "learning_rate": 1.5883129410570137e-05, "loss": 0.0849, "step": 27380 }, { "epoch": 1.37, "learning_rate": 1.5870437122404427e-05, "loss": 0.0387, "step": 27390 }, { "epoch": 1.37, "learning_rate": 1.5857744834238717e-05, "loss": 0.0724, "step": 27400 }, { "epoch": 1.37, "learning_rate": 1.5845052546073007e-05, "loss": 0.0532, "step": 27410 }, { "epoch": 1.37, "learning_rate": 1.5832360257907297e-05, "loss": 0.0808, "step": 27420 }, { "epoch": 1.38, "learning_rate": 1.5819667969741587e-05, "loss": 0.0262, "step": 27430 }, { "epoch": 1.38, "learning_rate": 1.5806975681575874e-05, "loss": 0.0276, "step": 27440 }, { "epoch": 1.38, "learning_rate": 1.5794283393410164e-05, "loss": 0.1126, "step": 27450 }, { "epoch": 1.38, "learning_rate": 1.5781591105244454e-05, "loss": 0.0323, "step": 27460 }, { "epoch": 1.38, "learning_rate": 1.5768898817078744e-05, "loss": 0.0718, "step": 27470 }, { "epoch": 1.38, "learning_rate": 1.5756206528913034e-05, "loss": 0.043, "step": 27480 }, { "epoch": 1.38, "learning_rate": 1.5743514240747324e-05, "loss": 0.0664, "step": 27490 }, { "epoch": 1.38, "learning_rate": 1.573082195258161e-05, "loss": 0.0421, "step": 27500 }, { "epoch": 1.38, "learning_rate": 1.57181296644159e-05, "loss": 0.0621, "step": 27510 }, { "epoch": 1.38, "learning_rate": 1.570543737625019e-05, "loss": 0.0644, "step": 27520 }, { "epoch": 1.38, "learning_rate": 1.569274508808448e-05, "loss": 0.0494, "step": 27530 }, { "epoch": 1.38, "learning_rate": 1.568005279991877e-05, "loss": 0.0727, "step": 27540 }, { "epoch": 1.38, "learning_rate": 1.566736051175306e-05, "loss": 0.0597, "step": 27550 }, { "epoch": 1.38, "learning_rate": 1.565466822358735e-05, "loss": 0.0568, "step": 27560 }, { "epoch": 1.38, "learning_rate": 1.5641975935421637e-05, "loss": 0.0803, "step": 27570 }, { "epoch": 1.38, "learning_rate": 1.5629283647255927e-05, "loss": 0.0499, "step": 27580 }, { "epoch": 1.38, "learning_rate": 1.5616591359090217e-05, "loss": 0.0932, "step": 27590 }, { "epoch": 1.38, "learning_rate": 1.5603899070924507e-05, "loss": 0.0618, "step": 27600 }, { "epoch": 1.38, "learning_rate": 1.5591206782758797e-05, "loss": 0.0462, "step": 27610 }, { "epoch": 1.38, "learning_rate": 1.5578514494593087e-05, "loss": 0.1213, "step": 27620 }, { "epoch": 1.39, "learning_rate": 1.5565822206427373e-05, "loss": 0.1099, "step": 27630 }, { "epoch": 1.39, "learning_rate": 1.5553129918261667e-05, "loss": 0.0279, "step": 27640 }, { "epoch": 1.39, "learning_rate": 1.5540437630095957e-05, "loss": 0.0723, "step": 27650 }, { "epoch": 1.39, "learning_rate": 1.5527745341930243e-05, "loss": 0.0542, "step": 27660 }, { "epoch": 1.39, "learning_rate": 1.5515053053764533e-05, "loss": 0.0801, "step": 27670 }, { "epoch": 1.39, "learning_rate": 1.5502360765598823e-05, "loss": 0.0555, "step": 27680 }, { "epoch": 1.39, "learning_rate": 1.548966847743311e-05, "loss": 0.0321, "step": 27690 }, { "epoch": 1.39, "learning_rate": 1.5476976189267403e-05, "loss": 0.0567, "step": 27700 }, { "epoch": 1.39, "learning_rate": 1.5464283901101693e-05, "loss": 0.0271, "step": 27710 }, { "epoch": 1.39, "learning_rate": 1.545159161293598e-05, "loss": 0.0702, "step": 27720 }, { "epoch": 1.39, "learning_rate": 1.543889932477027e-05, "loss": 0.0538, "step": 27730 }, { "epoch": 1.39, "learning_rate": 1.542620703660456e-05, "loss": 0.0435, "step": 27740 }, { "epoch": 1.39, "learning_rate": 1.5413514748438847e-05, "loss": 0.0473, "step": 27750 }, { "epoch": 1.39, "learning_rate": 1.540082246027314e-05, "loss": 0.0481, "step": 27760 }, { "epoch": 1.39, "learning_rate": 1.538813017210743e-05, "loss": 0.0704, "step": 27770 }, { "epoch": 1.39, "learning_rate": 1.5375437883941716e-05, "loss": 0.0253, "step": 27780 }, { "epoch": 1.39, "learning_rate": 1.5362745595776006e-05, "loss": 0.045, "step": 27790 }, { "epoch": 1.39, "learning_rate": 1.5350053307610296e-05, "loss": 0.0451, "step": 27800 }, { "epoch": 1.39, "learning_rate": 1.5337361019444586e-05, "loss": 0.0383, "step": 27810 }, { "epoch": 1.39, "learning_rate": 1.5324668731278876e-05, "loss": 0.0438, "step": 27820 }, { "epoch": 1.4, "learning_rate": 1.5311976443113166e-05, "loss": 0.075, "step": 27830 }, { "epoch": 1.4, "learning_rate": 1.5299284154947453e-05, "loss": 0.0276, "step": 27840 }, { "epoch": 1.4, "learning_rate": 1.5286591866781743e-05, "loss": 0.0754, "step": 27850 }, { "epoch": 1.4, "learning_rate": 1.5273899578616033e-05, "loss": 0.0623, "step": 27860 }, { "epoch": 1.4, "learning_rate": 1.5261207290450323e-05, "loss": 0.0653, "step": 27870 }, { "epoch": 1.4, "learning_rate": 1.5248515002284613e-05, "loss": 0.1209, "step": 27880 }, { "epoch": 1.4, "learning_rate": 1.5235822714118903e-05, "loss": 0.0315, "step": 27890 }, { "epoch": 1.4, "learning_rate": 1.5223130425953191e-05, "loss": 0.0359, "step": 27900 }, { "epoch": 1.4, "learning_rate": 1.5210438137787481e-05, "loss": 0.0442, "step": 27910 }, { "epoch": 1.4, "learning_rate": 1.519774584962177e-05, "loss": 0.0657, "step": 27920 }, { "epoch": 1.4, "learning_rate": 1.5185053561456061e-05, "loss": 0.0691, "step": 27930 }, { "epoch": 1.4, "learning_rate": 1.517236127329035e-05, "loss": 0.061, "step": 27940 }, { "epoch": 1.4, "learning_rate": 1.515966898512464e-05, "loss": 0.0716, "step": 27950 }, { "epoch": 1.4, "learning_rate": 1.5146976696958928e-05, "loss": 0.0567, "step": 27960 }, { "epoch": 1.4, "learning_rate": 1.5134284408793218e-05, "loss": 0.0351, "step": 27970 }, { "epoch": 1.4, "learning_rate": 1.5121592120627506e-05, "loss": 0.0315, "step": 27980 }, { "epoch": 1.4, "learning_rate": 1.5108899832461798e-05, "loss": 0.0451, "step": 27990 }, { "epoch": 1.4, "learning_rate": 1.5096207544296088e-05, "loss": 0.0461, "step": 28000 }, { "epoch": 1.4, "learning_rate": 1.5083515256130376e-05, "loss": 0.0753, "step": 28010 }, { "epoch": 1.4, "learning_rate": 1.5070822967964664e-05, "loss": 0.0463, "step": 28020 }, { "epoch": 1.41, "learning_rate": 1.5058130679798954e-05, "loss": 0.0321, "step": 28030 }, { "epoch": 1.41, "learning_rate": 1.5045438391633243e-05, "loss": 0.0388, "step": 28040 }, { "epoch": 1.41, "learning_rate": 1.5032746103467534e-05, "loss": 0.0288, "step": 28050 }, { "epoch": 1.41, "learning_rate": 1.5020053815301824e-05, "loss": 0.0893, "step": 28060 }, { "epoch": 1.41, "learning_rate": 1.5007361527136113e-05, "loss": 0.0639, "step": 28070 }, { "epoch": 1.41, "learning_rate": 1.4994669238970403e-05, "loss": 0.0573, "step": 28080 }, { "epoch": 1.41, "learning_rate": 1.4981976950804691e-05, "loss": 0.0559, "step": 28090 }, { "epoch": 1.41, "learning_rate": 1.4969284662638983e-05, "loss": 0.0553, "step": 28100 }, { "epoch": 1.41, "learning_rate": 1.4956592374473271e-05, "loss": 0.0475, "step": 28110 }, { "epoch": 1.41, "learning_rate": 1.4943900086307561e-05, "loss": 0.0446, "step": 28120 }, { "epoch": 1.41, "learning_rate": 1.493120779814185e-05, "loss": 0.0432, "step": 28130 }, { "epoch": 1.41, "learning_rate": 1.491851550997614e-05, "loss": 0.0531, "step": 28140 }, { "epoch": 1.41, "learning_rate": 1.4905823221810427e-05, "loss": 0.0815, "step": 28150 }, { "epoch": 1.41, "learning_rate": 1.489313093364472e-05, "loss": 0.0496, "step": 28160 }, { "epoch": 1.41, "learning_rate": 1.488043864547901e-05, "loss": 0.0408, "step": 28170 }, { "epoch": 1.41, "learning_rate": 1.4867746357313297e-05, "loss": 0.0624, "step": 28180 }, { "epoch": 1.41, "learning_rate": 1.4855054069147586e-05, "loss": 0.0353, "step": 28190 }, { "epoch": 1.41, "learning_rate": 1.4842361780981876e-05, "loss": 0.0409, "step": 28200 }, { "epoch": 1.41, "learning_rate": 1.4829669492816164e-05, "loss": 0.0524, "step": 28210 }, { "epoch": 1.41, "learning_rate": 1.4816977204650456e-05, "loss": 0.0628, "step": 28220 }, { "epoch": 1.42, "learning_rate": 1.4804284916484746e-05, "loss": 0.0624, "step": 28230 }, { "epoch": 1.42, "learning_rate": 1.4791592628319034e-05, "loss": 0.0483, "step": 28240 }, { "epoch": 1.42, "learning_rate": 1.4778900340153324e-05, "loss": 0.0167, "step": 28250 }, { "epoch": 1.42, "learning_rate": 1.4766208051987612e-05, "loss": 0.0592, "step": 28260 }, { "epoch": 1.42, "learning_rate": 1.47535157638219e-05, "loss": 0.0729, "step": 28270 }, { "epoch": 1.42, "learning_rate": 1.4740823475656192e-05, "loss": 0.0765, "step": 28280 }, { "epoch": 1.42, "learning_rate": 1.4728131187490482e-05, "loss": 0.0535, "step": 28290 }, { "epoch": 1.42, "learning_rate": 1.471543889932477e-05, "loss": 0.0239, "step": 28300 }, { "epoch": 1.42, "learning_rate": 1.470274661115906e-05, "loss": 0.1059, "step": 28310 }, { "epoch": 1.42, "learning_rate": 1.4690054322993349e-05, "loss": 0.1008, "step": 28320 }, { "epoch": 1.42, "learning_rate": 1.4677362034827639e-05, "loss": 0.0498, "step": 28330 }, { "epoch": 1.42, "learning_rate": 1.466466974666193e-05, "loss": 0.0543, "step": 28340 }, { "epoch": 1.42, "learning_rate": 1.4651977458496219e-05, "loss": 0.0184, "step": 28350 }, { "epoch": 1.42, "learning_rate": 1.4639285170330507e-05, "loss": 0.0768, "step": 28360 }, { "epoch": 1.42, "learning_rate": 1.4626592882164797e-05, "loss": 0.0465, "step": 28370 }, { "epoch": 1.42, "learning_rate": 1.4613900593999085e-05, "loss": 0.0612, "step": 28380 }, { "epoch": 1.42, "learning_rate": 1.4601208305833377e-05, "loss": 0.0516, "step": 28390 }, { "epoch": 1.42, "learning_rate": 1.4588516017667667e-05, "loss": 0.0758, "step": 28400 }, { "epoch": 1.42, "learning_rate": 1.4575823729501955e-05, "loss": 0.028, "step": 28410 }, { "epoch": 1.42, "learning_rate": 1.4563131441336245e-05, "loss": 0.0502, "step": 28420 }, { "epoch": 1.43, "learning_rate": 1.4550439153170534e-05, "loss": 0.0665, "step": 28430 }, { "epoch": 1.43, "learning_rate": 1.4537746865004822e-05, "loss": 0.0213, "step": 28440 }, { "epoch": 1.43, "learning_rate": 1.4525054576839114e-05, "loss": 0.0532, "step": 28450 }, { "epoch": 1.43, "learning_rate": 1.4512362288673404e-05, "loss": 0.0734, "step": 28460 }, { "epoch": 1.43, "learning_rate": 1.4499670000507692e-05, "loss": 0.0583, "step": 28470 }, { "epoch": 1.43, "learning_rate": 1.4486977712341982e-05, "loss": 0.0932, "step": 28480 }, { "epoch": 1.43, "learning_rate": 1.447428542417627e-05, "loss": 0.0471, "step": 28490 }, { "epoch": 1.43, "learning_rate": 1.446159313601056e-05, "loss": 0.1048, "step": 28500 }, { "epoch": 1.43, "learning_rate": 1.4448900847844852e-05, "loss": 0.0366, "step": 28510 }, { "epoch": 1.43, "learning_rate": 1.443620855967914e-05, "loss": 0.0797, "step": 28520 }, { "epoch": 1.43, "learning_rate": 1.4423516271513428e-05, "loss": 0.0614, "step": 28530 }, { "epoch": 1.43, "learning_rate": 1.4410823983347718e-05, "loss": 0.0742, "step": 28540 }, { "epoch": 1.43, "learning_rate": 1.4398131695182007e-05, "loss": 0.0493, "step": 28550 }, { "epoch": 1.43, "learning_rate": 1.4385439407016297e-05, "loss": 0.087, "step": 28560 }, { "epoch": 1.43, "learning_rate": 1.4372747118850588e-05, "loss": 0.0942, "step": 28570 }, { "epoch": 1.43, "learning_rate": 1.4360054830684877e-05, "loss": 0.0703, "step": 28580 }, { "epoch": 1.43, "learning_rate": 1.4347362542519167e-05, "loss": 0.0774, "step": 28590 }, { "epoch": 1.43, "learning_rate": 1.4334670254353455e-05, "loss": 0.0565, "step": 28600 }, { "epoch": 1.43, "learning_rate": 1.4321977966187743e-05, "loss": 0.0383, "step": 28610 }, { "epoch": 1.43, "learning_rate": 1.4309285678022035e-05, "loss": 0.0662, "step": 28620 }, { "epoch": 1.44, "learning_rate": 1.4296593389856325e-05, "loss": 0.0582, "step": 28630 }, { "epoch": 1.44, "learning_rate": 1.4283901101690613e-05, "loss": 0.0513, "step": 28640 }, { "epoch": 1.44, "learning_rate": 1.4271208813524903e-05, "loss": 0.0346, "step": 28650 }, { "epoch": 1.44, "learning_rate": 1.4258516525359192e-05, "loss": 0.0334, "step": 28660 }, { "epoch": 1.44, "learning_rate": 1.4245824237193482e-05, "loss": 0.0332, "step": 28670 }, { "epoch": 1.44, "learning_rate": 1.4233131949027773e-05, "loss": 0.0753, "step": 28680 }, { "epoch": 1.44, "learning_rate": 1.4220439660862062e-05, "loss": 0.0355, "step": 28690 }, { "epoch": 1.44, "learning_rate": 1.420774737269635e-05, "loss": 0.0538, "step": 28700 }, { "epoch": 1.44, "learning_rate": 1.419505508453064e-05, "loss": 0.0659, "step": 28710 }, { "epoch": 1.44, "learning_rate": 1.4182362796364928e-05, "loss": 0.0679, "step": 28720 }, { "epoch": 1.44, "learning_rate": 1.4169670508199218e-05, "loss": 0.0587, "step": 28730 }, { "epoch": 1.44, "learning_rate": 1.415697822003351e-05, "loss": 0.0383, "step": 28740 }, { "epoch": 1.44, "learning_rate": 1.4144285931867798e-05, "loss": 0.0442, "step": 28750 }, { "epoch": 1.44, "learning_rate": 1.4131593643702088e-05, "loss": 0.0347, "step": 28760 }, { "epoch": 1.44, "learning_rate": 1.4118901355536376e-05, "loss": 0.0626, "step": 28770 }, { "epoch": 1.44, "learning_rate": 1.4106209067370665e-05, "loss": 0.0562, "step": 28780 }, { "epoch": 1.44, "learning_rate": 1.4093516779204955e-05, "loss": 0.0391, "step": 28790 }, { "epoch": 1.44, "learning_rate": 1.4080824491039246e-05, "loss": 0.0895, "step": 28800 }, { "epoch": 1.44, "learning_rate": 1.4068132202873535e-05, "loss": 0.0702, "step": 28810 }, { "epoch": 1.44, "learning_rate": 1.4055439914707825e-05, "loss": 0.0805, "step": 28820 }, { "epoch": 1.45, "learning_rate": 1.4042747626542113e-05, "loss": 0.0546, "step": 28830 }, { "epoch": 1.45, "learning_rate": 1.4030055338376403e-05, "loss": 0.0544, "step": 28840 }, { "epoch": 1.45, "learning_rate": 1.4017363050210695e-05, "loss": 0.0251, "step": 28850 }, { "epoch": 1.45, "learning_rate": 1.4004670762044983e-05, "loss": 0.0779, "step": 28860 }, { "epoch": 1.45, "learning_rate": 1.3991978473879271e-05, "loss": 0.0656, "step": 28870 }, { "epoch": 1.45, "learning_rate": 1.3979286185713561e-05, "loss": 0.0435, "step": 28880 }, { "epoch": 1.45, "learning_rate": 1.396659389754785e-05, "loss": 0.0396, "step": 28890 }, { "epoch": 1.45, "learning_rate": 1.395390160938214e-05, "loss": 0.055, "step": 28900 }, { "epoch": 1.45, "learning_rate": 1.3941209321216431e-05, "loss": 0.0452, "step": 28910 }, { "epoch": 1.45, "learning_rate": 1.392851703305072e-05, "loss": 0.0559, "step": 28920 }, { "epoch": 1.45, "learning_rate": 1.391582474488501e-05, "loss": 0.0171, "step": 28930 }, { "epoch": 1.45, "learning_rate": 1.3903132456719298e-05, "loss": 0.0977, "step": 28940 }, { "epoch": 1.45, "learning_rate": 1.3890440168553586e-05, "loss": 0.0727, "step": 28950 }, { "epoch": 1.45, "learning_rate": 1.3877747880387876e-05, "loss": 0.0556, "step": 28960 }, { "epoch": 1.45, "learning_rate": 1.3865055592222168e-05, "loss": 0.0426, "step": 28970 }, { "epoch": 1.45, "learning_rate": 1.3852363304056456e-05, "loss": 0.0313, "step": 28980 }, { "epoch": 1.45, "learning_rate": 1.3839671015890746e-05, "loss": 0.0537, "step": 28990 }, { "epoch": 1.45, "learning_rate": 1.3826978727725034e-05, "loss": 0.0414, "step": 29000 }, { "epoch": 1.45, "learning_rate": 1.3814286439559324e-05, "loss": 0.0689, "step": 29010 }, { "epoch": 1.45, "learning_rate": 1.3801594151393613e-05, "loss": 0.0526, "step": 29020 }, { "epoch": 1.46, "learning_rate": 1.3788901863227904e-05, "loss": 0.0384, "step": 29030 }, { "epoch": 1.46, "learning_rate": 1.3776209575062193e-05, "loss": 0.0517, "step": 29040 }, { "epoch": 1.46, "learning_rate": 1.3763517286896483e-05, "loss": 0.0951, "step": 29050 }, { "epoch": 1.46, "learning_rate": 1.3750824998730771e-05, "loss": 0.0616, "step": 29060 }, { "epoch": 1.46, "learning_rate": 1.3738132710565061e-05, "loss": 0.1497, "step": 29070 }, { "epoch": 1.46, "learning_rate": 1.3725440422399353e-05, "loss": 0.064, "step": 29080 }, { "epoch": 1.46, "learning_rate": 1.371274813423364e-05, "loss": 0.0802, "step": 29090 }, { "epoch": 1.46, "learning_rate": 1.370005584606793e-05, "loss": 0.0281, "step": 29100 }, { "epoch": 1.46, "learning_rate": 1.3687363557902219e-05, "loss": 0.0385, "step": 29110 }, { "epoch": 1.46, "learning_rate": 1.3674671269736507e-05, "loss": 0.0799, "step": 29120 }, { "epoch": 1.46, "learning_rate": 1.3661978981570797e-05, "loss": 0.0319, "step": 29130 }, { "epoch": 1.46, "learning_rate": 1.3649286693405089e-05, "loss": 0.0318, "step": 29140 }, { "epoch": 1.46, "learning_rate": 1.3636594405239377e-05, "loss": 0.0693, "step": 29150 }, { "epoch": 1.46, "learning_rate": 1.3623902117073667e-05, "loss": 0.0465, "step": 29160 }, { "epoch": 1.46, "learning_rate": 1.3611209828907956e-05, "loss": 0.0445, "step": 29170 }, { "epoch": 1.46, "learning_rate": 1.3598517540742246e-05, "loss": 0.0486, "step": 29180 }, { "epoch": 1.46, "learning_rate": 1.3585825252576534e-05, "loss": 0.0221, "step": 29190 }, { "epoch": 1.46, "learning_rate": 1.3573132964410826e-05, "loss": 0.032, "step": 29200 }, { "epoch": 1.46, "learning_rate": 1.3560440676245114e-05, "loss": 0.0616, "step": 29210 }, { "epoch": 1.46, "learning_rate": 1.3547748388079404e-05, "loss": 0.0321, "step": 29220 }, { "epoch": 1.47, "learning_rate": 1.3535056099913692e-05, "loss": 0.0743, "step": 29230 }, { "epoch": 1.47, "learning_rate": 1.3522363811747982e-05, "loss": 0.0553, "step": 29240 }, { "epoch": 1.47, "learning_rate": 1.350967152358227e-05, "loss": 0.1142, "step": 29250 }, { "epoch": 1.47, "learning_rate": 1.3496979235416562e-05, "loss": 0.0848, "step": 29260 }, { "epoch": 1.47, "learning_rate": 1.3484286947250852e-05, "loss": 0.0611, "step": 29270 }, { "epoch": 1.47, "learning_rate": 1.347159465908514e-05, "loss": 0.1, "step": 29280 }, { "epoch": 1.47, "learning_rate": 1.3458902370919429e-05, "loss": 0.0697, "step": 29290 }, { "epoch": 1.47, "learning_rate": 1.3446210082753719e-05, "loss": 0.0355, "step": 29300 }, { "epoch": 1.47, "learning_rate": 1.343351779458801e-05, "loss": 0.0334, "step": 29310 }, { "epoch": 1.47, "learning_rate": 1.3420825506422299e-05, "loss": 0.0732, "step": 29320 }, { "epoch": 1.47, "learning_rate": 1.3408133218256589e-05, "loss": 0.044, "step": 29330 }, { "epoch": 1.47, "learning_rate": 1.3395440930090877e-05, "loss": 0.0783, "step": 29340 }, { "epoch": 1.47, "learning_rate": 1.3382748641925167e-05, "loss": 0.0303, "step": 29350 }, { "epoch": 1.47, "learning_rate": 1.3370056353759455e-05, "loss": 0.0476, "step": 29360 }, { "epoch": 1.47, "learning_rate": 1.3357364065593747e-05, "loss": 0.0503, "step": 29370 }, { "epoch": 1.47, "learning_rate": 1.3344671777428035e-05, "loss": 0.0413, "step": 29380 }, { "epoch": 1.47, "learning_rate": 1.3331979489262325e-05, "loss": 0.0653, "step": 29390 }, { "epoch": 1.47, "learning_rate": 1.3319287201096614e-05, "loss": 0.0338, "step": 29400 }, { "epoch": 1.47, "learning_rate": 1.3306594912930904e-05, "loss": 0.0657, "step": 29410 }, { "epoch": 1.47, "learning_rate": 1.3293902624765192e-05, "loss": 0.0285, "step": 29420 }, { "epoch": 1.48, "learning_rate": 1.3281210336599484e-05, "loss": 0.0829, "step": 29430 }, { "epoch": 1.48, "learning_rate": 1.3268518048433774e-05, "loss": 0.0411, "step": 29440 }, { "epoch": 1.48, "learning_rate": 1.3255825760268062e-05, "loss": 0.1142, "step": 29450 }, { "epoch": 1.48, "learning_rate": 1.324313347210235e-05, "loss": 0.1031, "step": 29460 }, { "epoch": 1.48, "learning_rate": 1.323044118393664e-05, "loss": 0.0845, "step": 29470 }, { "epoch": 1.48, "learning_rate": 1.3217748895770928e-05, "loss": 0.047, "step": 29480 }, { "epoch": 1.48, "learning_rate": 1.320505660760522e-05, "loss": 0.0503, "step": 29490 }, { "epoch": 1.48, "learning_rate": 1.319236431943951e-05, "loss": 0.0617, "step": 29500 }, { "epoch": 1.48, "learning_rate": 1.3179672031273798e-05, "loss": 0.0578, "step": 29510 }, { "epoch": 1.48, "learning_rate": 1.3166979743108088e-05, "loss": 0.0567, "step": 29520 }, { "epoch": 1.48, "learning_rate": 1.3154287454942377e-05, "loss": 0.0325, "step": 29530 }, { "epoch": 1.48, "learning_rate": 1.3141595166776668e-05, "loss": 0.0733, "step": 29540 }, { "epoch": 1.48, "learning_rate": 1.3128902878610957e-05, "loss": 0.0773, "step": 29550 }, { "epoch": 1.48, "learning_rate": 1.3116210590445247e-05, "loss": 0.0375, "step": 29560 }, { "epoch": 1.48, "learning_rate": 1.3103518302279535e-05, "loss": 0.0839, "step": 29570 }, { "epoch": 1.48, "learning_rate": 1.3090826014113825e-05, "loss": 0.0648, "step": 29580 }, { "epoch": 1.48, "learning_rate": 1.3078133725948113e-05, "loss": 0.0444, "step": 29590 }, { "epoch": 1.48, "learning_rate": 1.3065441437782405e-05, "loss": 0.0591, "step": 29600 }, { "epoch": 1.48, "learning_rate": 1.3052749149616695e-05, "loss": 0.0325, "step": 29610 }, { "epoch": 1.48, "learning_rate": 1.3040056861450983e-05, "loss": 0.0731, "step": 29620 }, { "epoch": 1.49, "learning_rate": 1.3027364573285271e-05, "loss": 0.0875, "step": 29630 }, { "epoch": 1.49, "learning_rate": 1.3014672285119561e-05, "loss": 0.038, "step": 29640 }, { "epoch": 1.49, "learning_rate": 1.300197999695385e-05, "loss": 0.0689, "step": 29650 }, { "epoch": 1.49, "learning_rate": 1.2989287708788141e-05, "loss": 0.0907, "step": 29660 }, { "epoch": 1.49, "learning_rate": 1.2976595420622431e-05, "loss": 0.0651, "step": 29670 }, { "epoch": 1.49, "learning_rate": 1.296390313245672e-05, "loss": 0.0464, "step": 29680 }, { "epoch": 1.49, "learning_rate": 1.295121084429101e-05, "loss": 0.0198, "step": 29690 }, { "epoch": 1.49, "learning_rate": 1.2938518556125298e-05, "loss": 0.0529, "step": 29700 }, { "epoch": 1.49, "learning_rate": 1.2925826267959586e-05, "loss": 0.0935, "step": 29710 }, { "epoch": 1.49, "learning_rate": 1.2913133979793878e-05, "loss": 0.0678, "step": 29720 }, { "epoch": 1.49, "learning_rate": 1.2900441691628168e-05, "loss": 0.019, "step": 29730 }, { "epoch": 1.49, "learning_rate": 1.2887749403462456e-05, "loss": 0.0439, "step": 29740 }, { "epoch": 1.49, "learning_rate": 1.2875057115296746e-05, "loss": 0.0797, "step": 29750 }, { "epoch": 1.49, "learning_rate": 1.2862364827131035e-05, "loss": 0.0417, "step": 29760 }, { "epoch": 1.49, "learning_rate": 1.2849672538965326e-05, "loss": 0.0366, "step": 29770 }, { "epoch": 1.49, "learning_rate": 1.2836980250799616e-05, "loss": 0.0346, "step": 29780 }, { "epoch": 1.49, "learning_rate": 1.2824287962633905e-05, "loss": 0.0216, "step": 29790 }, { "epoch": 1.49, "learning_rate": 1.2811595674468193e-05, "loss": 0.0463, "step": 29800 }, { "epoch": 1.49, "learning_rate": 1.2798903386302483e-05, "loss": 0.1157, "step": 29810 }, { "epoch": 1.49, "learning_rate": 1.2786211098136771e-05, "loss": 0.0508, "step": 29820 }, { "epoch": 1.5, "learning_rate": 1.2773518809971063e-05, "loss": 0.11, "step": 29830 }, { "epoch": 1.5, "learning_rate": 1.2760826521805353e-05, "loss": 0.0983, "step": 29840 }, { "epoch": 1.5, "learning_rate": 1.2748134233639641e-05, "loss": 0.0584, "step": 29850 }, { "epoch": 1.5, "learning_rate": 1.2735441945473931e-05, "loss": 0.0284, "step": 29860 }, { "epoch": 1.5, "learning_rate": 1.272274965730822e-05, "loss": 0.0467, "step": 29870 }, { "epoch": 1.5, "learning_rate": 1.2710057369142508e-05, "loss": 0.0372, "step": 29880 }, { "epoch": 1.5, "learning_rate": 1.26973650809768e-05, "loss": 0.0609, "step": 29890 }, { "epoch": 1.5, "learning_rate": 1.268467279281109e-05, "loss": 0.0312, "step": 29900 }, { "epoch": 1.5, "learning_rate": 1.2671980504645378e-05, "loss": 0.0405, "step": 29910 }, { "epoch": 1.5, "learning_rate": 1.2659288216479668e-05, "loss": 0.0696, "step": 29920 }, { "epoch": 1.5, "learning_rate": 1.2646595928313956e-05, "loss": 0.073, "step": 29930 }, { "epoch": 1.5, "learning_rate": 1.2633903640148246e-05, "loss": 0.0531, "step": 29940 }, { "epoch": 1.5, "learning_rate": 1.2621211351982538e-05, "loss": 0.0089, "step": 29950 }, { "epoch": 1.5, "learning_rate": 1.2608519063816826e-05, "loss": 0.0403, "step": 29960 }, { "epoch": 1.5, "learning_rate": 1.2595826775651114e-05, "loss": 0.0354, "step": 29970 }, { "epoch": 1.5, "learning_rate": 1.2583134487485404e-05, "loss": 0.053, "step": 29980 }, { "epoch": 1.5, "learning_rate": 1.2570442199319693e-05, "loss": 0.0523, "step": 29990 }, { "epoch": 1.5, "learning_rate": 1.2557749911153984e-05, "loss": 0.0496, "step": 30000 }, { "epoch": 1.5, "learning_rate": 1.2545057622988274e-05, "loss": 0.0518, "step": 30010 }, { "epoch": 1.5, "learning_rate": 1.2532365334822562e-05, "loss": 0.0598, "step": 30020 }, { "epoch": 1.51, "learning_rate": 1.2519673046656852e-05, "loss": 0.0592, "step": 30030 }, { "epoch": 1.51, "learning_rate": 1.250698075849114e-05, "loss": 0.0618, "step": 30040 }, { "epoch": 1.51, "learning_rate": 1.249428847032543e-05, "loss": 0.0588, "step": 30050 }, { "epoch": 1.51, "learning_rate": 1.248159618215972e-05, "loss": 0.0811, "step": 30060 }, { "epoch": 1.51, "learning_rate": 1.2468903893994009e-05, "loss": 0.0564, "step": 30070 }, { "epoch": 1.51, "learning_rate": 1.2456211605828299e-05, "loss": 0.0343, "step": 30080 }, { "epoch": 1.51, "learning_rate": 1.2443519317662589e-05, "loss": 0.0484, "step": 30090 }, { "epoch": 1.51, "learning_rate": 1.2430827029496877e-05, "loss": 0.0439, "step": 30100 }, { "epoch": 1.51, "learning_rate": 1.2418134741331167e-05, "loss": 0.0486, "step": 30110 }, { "epoch": 1.51, "learning_rate": 1.2405442453165457e-05, "loss": 0.0309, "step": 30120 }, { "epoch": 1.51, "learning_rate": 1.2392750164999747e-05, "loss": 0.0374, "step": 30130 }, { "epoch": 1.51, "learning_rate": 1.2380057876834036e-05, "loss": 0.0503, "step": 30140 }, { "epoch": 1.51, "learning_rate": 1.2367365588668326e-05, "loss": 0.0394, "step": 30150 }, { "epoch": 1.51, "learning_rate": 1.2354673300502616e-05, "loss": 0.0409, "step": 30160 }, { "epoch": 1.51, "learning_rate": 1.2341981012336906e-05, "loss": 0.1038, "step": 30170 }, { "epoch": 1.51, "learning_rate": 1.2329288724171194e-05, "loss": 0.0418, "step": 30180 }, { "epoch": 1.51, "learning_rate": 1.2316596436005484e-05, "loss": 0.0455, "step": 30190 }, { "epoch": 1.51, "learning_rate": 1.2303904147839774e-05, "loss": 0.0579, "step": 30200 }, { "epoch": 1.51, "learning_rate": 1.2291211859674062e-05, "loss": 0.0362, "step": 30210 }, { "epoch": 1.52, "learning_rate": 1.2278519571508352e-05, "loss": 0.044, "step": 30220 }, { "epoch": 1.52, "learning_rate": 1.2265827283342642e-05, "loss": 0.0564, "step": 30230 }, { "epoch": 1.52, "learning_rate": 1.225313499517693e-05, "loss": 0.0509, "step": 30240 }, { "epoch": 1.52, "learning_rate": 1.224044270701122e-05, "loss": 0.0425, "step": 30250 }, { "epoch": 1.52, "learning_rate": 1.222775041884551e-05, "loss": 0.0402, "step": 30260 }, { "epoch": 1.52, "learning_rate": 1.2215058130679799e-05, "loss": 0.0468, "step": 30270 }, { "epoch": 1.52, "learning_rate": 1.2202365842514089e-05, "loss": 0.0519, "step": 30280 }, { "epoch": 1.52, "learning_rate": 1.2189673554348379e-05, "loss": 0.0553, "step": 30290 }, { "epoch": 1.52, "learning_rate": 1.2176981266182667e-05, "loss": 0.0479, "step": 30300 }, { "epoch": 1.52, "learning_rate": 1.2164288978016957e-05, "loss": 0.0635, "step": 30310 }, { "epoch": 1.52, "learning_rate": 1.2151596689851247e-05, "loss": 0.0707, "step": 30320 }, { "epoch": 1.52, "learning_rate": 1.2138904401685535e-05, "loss": 0.0319, "step": 30330 }, { "epoch": 1.52, "learning_rate": 1.2126212113519827e-05, "loss": 0.0546, "step": 30340 }, { "epoch": 1.52, "learning_rate": 1.2113519825354115e-05, "loss": 0.0416, "step": 30350 }, { "epoch": 1.52, "learning_rate": 1.2100827537188405e-05, "loss": 0.0435, "step": 30360 }, { "epoch": 1.52, "learning_rate": 1.2088135249022695e-05, "loss": 0.0487, "step": 30370 }, { "epoch": 1.52, "learning_rate": 1.2075442960856983e-05, "loss": 0.0181, "step": 30380 }, { "epoch": 1.52, "learning_rate": 1.2062750672691273e-05, "loss": 0.0514, "step": 30390 }, { "epoch": 1.52, "learning_rate": 1.2050058384525563e-05, "loss": 0.0554, "step": 30400 }, { "epoch": 1.52, "learning_rate": 1.2037366096359852e-05, "loss": 0.0304, "step": 30410 }, { "epoch": 1.53, "learning_rate": 1.2024673808194142e-05, "loss": 0.0454, "step": 30420 }, { "epoch": 1.53, "learning_rate": 1.2011981520028432e-05, "loss": 0.0495, "step": 30430 }, { "epoch": 1.53, "learning_rate": 1.199928923186272e-05, "loss": 0.0369, "step": 30440 }, { "epoch": 1.53, "learning_rate": 1.198659694369701e-05, "loss": 0.0832, "step": 30450 }, { "epoch": 1.53, "learning_rate": 1.19739046555313e-05, "loss": 0.0516, "step": 30460 }, { "epoch": 1.53, "learning_rate": 1.1961212367365588e-05, "loss": 0.0413, "step": 30470 }, { "epoch": 1.53, "learning_rate": 1.1948520079199878e-05, "loss": 0.0522, "step": 30480 }, { "epoch": 1.53, "learning_rate": 1.1935827791034168e-05, "loss": 0.0411, "step": 30490 }, { "epoch": 1.53, "learning_rate": 1.1923135502868457e-05, "loss": 0.053, "step": 30500 }, { "epoch": 1.53, "learning_rate": 1.1910443214702748e-05, "loss": 0.0533, "step": 30510 }, { "epoch": 1.53, "learning_rate": 1.1897750926537037e-05, "loss": 0.0374, "step": 30520 }, { "epoch": 1.53, "learning_rate": 1.1885058638371325e-05, "loss": 0.038, "step": 30530 }, { "epoch": 1.53, "learning_rate": 1.1872366350205617e-05, "loss": 0.0803, "step": 30540 }, { "epoch": 1.53, "learning_rate": 1.1859674062039905e-05, "loss": 0.1071, "step": 30550 }, { "epoch": 1.53, "learning_rate": 1.1846981773874193e-05, "loss": 0.0878, "step": 30560 }, { "epoch": 1.53, "learning_rate": 1.1834289485708485e-05, "loss": 0.1034, "step": 30570 }, { "epoch": 1.53, "learning_rate": 1.1821597197542773e-05, "loss": 0.0328, "step": 30580 }, { "epoch": 1.53, "learning_rate": 1.1808904909377063e-05, "loss": 0.0328, "step": 30590 }, { "epoch": 1.53, "learning_rate": 1.1796212621211353e-05, "loss": 0.0615, "step": 30600 }, { "epoch": 1.53, "learning_rate": 1.1783520333045641e-05, "loss": 0.057, "step": 30610 }, { "epoch": 1.54, "learning_rate": 1.1770828044879931e-05, "loss": 0.0585, "step": 30620 }, { "epoch": 1.54, "learning_rate": 1.1758135756714221e-05, "loss": 0.0416, "step": 30630 }, { "epoch": 1.54, "learning_rate": 1.174544346854851e-05, "loss": 0.0497, "step": 30640 }, { "epoch": 1.54, "learning_rate": 1.1732751180382801e-05, "loss": 0.0552, "step": 30650 }, { "epoch": 1.54, "learning_rate": 1.172005889221709e-05, "loss": 0.0431, "step": 30660 }, { "epoch": 1.54, "learning_rate": 1.1707366604051378e-05, "loss": 0.0662, "step": 30670 }, { "epoch": 1.54, "learning_rate": 1.169467431588567e-05, "loss": 0.0406, "step": 30680 }, { "epoch": 1.54, "learning_rate": 1.1681982027719958e-05, "loss": 0.0763, "step": 30690 }, { "epoch": 1.54, "learning_rate": 1.1669289739554246e-05, "loss": 0.0317, "step": 30700 }, { "epoch": 1.54, "learning_rate": 1.1656597451388538e-05, "loss": 0.1215, "step": 30710 }, { "epoch": 1.54, "learning_rate": 1.1643905163222826e-05, "loss": 0.049, "step": 30720 }, { "epoch": 1.54, "learning_rate": 1.1631212875057115e-05, "loss": 0.0569, "step": 30730 }, { "epoch": 1.54, "learning_rate": 1.1618520586891406e-05, "loss": 0.0607, "step": 30740 }, { "epoch": 1.54, "learning_rate": 1.1605828298725694e-05, "loss": 0.0483, "step": 30750 }, { "epoch": 1.54, "learning_rate": 1.1593136010559984e-05, "loss": 0.0869, "step": 30760 }, { "epoch": 1.54, "learning_rate": 1.1580443722394274e-05, "loss": 0.0416, "step": 30770 }, { "epoch": 1.54, "learning_rate": 1.1567751434228563e-05, "loss": 0.0395, "step": 30780 }, { "epoch": 1.54, "learning_rate": 1.1555059146062853e-05, "loss": 0.0656, "step": 30790 }, { "epoch": 1.54, "learning_rate": 1.1542366857897143e-05, "loss": 0.0208, "step": 30800 }, { "epoch": 1.54, "learning_rate": 1.1529674569731431e-05, "loss": 0.0567, "step": 30810 }, { "epoch": 1.55, "learning_rate": 1.1516982281565723e-05, "loss": 0.0637, "step": 30820 }, { "epoch": 1.55, "learning_rate": 1.1504289993400011e-05, "loss": 0.0554, "step": 30830 }, { "epoch": 1.55, "learning_rate": 1.14915977052343e-05, "loss": 0.0935, "step": 30840 }, { "epoch": 1.55, "learning_rate": 1.1478905417068591e-05, "loss": 0.0523, "step": 30850 }, { "epoch": 1.55, "learning_rate": 1.146621312890288e-05, "loss": 0.0282, "step": 30860 }, { "epoch": 1.55, "learning_rate": 1.1453520840737168e-05, "loss": 0.0434, "step": 30870 }, { "epoch": 1.55, "learning_rate": 1.144082855257146e-05, "loss": 0.1221, "step": 30880 }, { "epoch": 1.55, "learning_rate": 1.1428136264405748e-05, "loss": 0.0458, "step": 30890 }, { "epoch": 1.55, "learning_rate": 1.1415443976240036e-05, "loss": 0.0242, "step": 30900 }, { "epoch": 1.55, "learning_rate": 1.1402751688074328e-05, "loss": 0.0606, "step": 30910 }, { "epoch": 1.55, "learning_rate": 1.1390059399908616e-05, "loss": 0.0392, "step": 30920 }, { "epoch": 1.55, "learning_rate": 1.1377367111742906e-05, "loss": 0.0628, "step": 30930 }, { "epoch": 1.55, "learning_rate": 1.1364674823577196e-05, "loss": 0.0847, "step": 30940 }, { "epoch": 1.55, "learning_rate": 1.1351982535411484e-05, "loss": 0.0344, "step": 30950 }, { "epoch": 1.55, "learning_rate": 1.1339290247245774e-05, "loss": 0.0779, "step": 30960 }, { "epoch": 1.55, "learning_rate": 1.1326597959080064e-05, "loss": 0.0342, "step": 30970 }, { "epoch": 1.55, "learning_rate": 1.1313905670914352e-05, "loss": 0.0823, "step": 30980 }, { "epoch": 1.55, "learning_rate": 1.1301213382748642e-05, "loss": 0.036, "step": 30990 }, { "epoch": 1.55, "learning_rate": 1.1288521094582932e-05, "loss": 0.0466, "step": 31000 }, { "epoch": 1.55, "learning_rate": 1.127582880641722e-05, "loss": 0.0272, "step": 31010 }, { "epoch": 1.56, "learning_rate": 1.126313651825151e-05, "loss": 0.045, "step": 31020 }, { "epoch": 1.56, "learning_rate": 1.12504442300858e-05, "loss": 0.0564, "step": 31030 }, { "epoch": 1.56, "learning_rate": 1.1237751941920089e-05, "loss": 0.0769, "step": 31040 }, { "epoch": 1.56, "learning_rate": 1.122505965375438e-05, "loss": 0.0464, "step": 31050 }, { "epoch": 1.56, "learning_rate": 1.1212367365588669e-05, "loss": 0.066, "step": 31060 }, { "epoch": 1.56, "learning_rate": 1.1199675077422957e-05, "loss": 0.0588, "step": 31070 }, { "epoch": 1.56, "learning_rate": 1.1186982789257249e-05, "loss": 0.0597, "step": 31080 }, { "epoch": 1.56, "learning_rate": 1.1174290501091537e-05, "loss": 0.0531, "step": 31090 }, { "epoch": 1.56, "learning_rate": 1.1161598212925827e-05, "loss": 0.0326, "step": 31100 }, { "epoch": 1.56, "learning_rate": 1.1148905924760117e-05, "loss": 0.0392, "step": 31110 }, { "epoch": 1.56, "learning_rate": 1.1136213636594405e-05, "loss": 0.0984, "step": 31120 }, { "epoch": 1.56, "learning_rate": 1.1123521348428695e-05, "loss": 0.0518, "step": 31130 }, { "epoch": 1.56, "learning_rate": 1.1110829060262985e-05, "loss": 0.0771, "step": 31140 }, { "epoch": 1.56, "learning_rate": 1.1098136772097274e-05, "loss": 0.0572, "step": 31150 }, { "epoch": 1.56, "learning_rate": 1.1085444483931564e-05, "loss": 0.0403, "step": 31160 }, { "epoch": 1.56, "learning_rate": 1.1072752195765854e-05, "loss": 0.0567, "step": 31170 }, { "epoch": 1.56, "learning_rate": 1.1060059907600142e-05, "loss": 0.0599, "step": 31180 }, { "epoch": 1.56, "learning_rate": 1.1047367619434432e-05, "loss": 0.051, "step": 31190 }, { "epoch": 1.56, "learning_rate": 1.1034675331268722e-05, "loss": 0.0484, "step": 31200 }, { "epoch": 1.56, "learning_rate": 1.102198304310301e-05, "loss": 0.0317, "step": 31210 }, { "epoch": 1.57, "learning_rate": 1.10092907549373e-05, "loss": 0.069, "step": 31220 }, { "epoch": 1.57, "learning_rate": 1.099659846677159e-05, "loss": 0.0423, "step": 31230 }, { "epoch": 1.57, "learning_rate": 1.098390617860588e-05, "loss": 0.0438, "step": 31240 }, { "epoch": 1.57, "learning_rate": 1.0971213890440169e-05, "loss": 0.0642, "step": 31250 }, { "epoch": 1.57, "learning_rate": 1.0958521602274459e-05, "loss": 0.0996, "step": 31260 }, { "epoch": 1.57, "learning_rate": 1.0945829314108749e-05, "loss": 0.0456, "step": 31270 }, { "epoch": 1.57, "learning_rate": 1.0933137025943037e-05, "loss": 0.0498, "step": 31280 }, { "epoch": 1.57, "learning_rate": 1.0920444737777327e-05, "loss": 0.0394, "step": 31290 }, { "epoch": 1.57, "learning_rate": 1.0907752449611617e-05, "loss": 0.0853, "step": 31300 }, { "epoch": 1.57, "learning_rate": 1.0895060161445907e-05, "loss": 0.035, "step": 31310 }, { "epoch": 1.57, "learning_rate": 1.0882367873280195e-05, "loss": 0.0369, "step": 31320 }, { "epoch": 1.57, "learning_rate": 1.0869675585114485e-05, "loss": 0.0422, "step": 31330 }, { "epoch": 1.57, "learning_rate": 1.0856983296948775e-05, "loss": 0.0652, "step": 31340 }, { "epoch": 1.57, "learning_rate": 1.0844291008783063e-05, "loss": 0.0557, "step": 31350 }, { "epoch": 1.57, "learning_rate": 1.0831598720617353e-05, "loss": 0.0601, "step": 31360 }, { "epoch": 1.57, "learning_rate": 1.0818906432451643e-05, "loss": 0.0708, "step": 31370 }, { "epoch": 1.57, "learning_rate": 1.0806214144285932e-05, "loss": 0.0666, "step": 31380 }, { "epoch": 1.57, "learning_rate": 1.0793521856120222e-05, "loss": 0.0408, "step": 31390 }, { "epoch": 1.57, "learning_rate": 1.0780829567954512e-05, "loss": 0.0416, "step": 31400 }, { "epoch": 1.57, "learning_rate": 1.0768137279788802e-05, "loss": 0.0432, "step": 31410 }, { "epoch": 1.58, "learning_rate": 1.075544499162309e-05, "loss": 0.0649, "step": 31420 }, { "epoch": 1.58, "learning_rate": 1.074275270345738e-05, "loss": 0.0598, "step": 31430 }, { "epoch": 1.58, "learning_rate": 1.073006041529167e-05, "loss": 0.0491, "step": 31440 }, { "epoch": 1.58, "learning_rate": 1.0717368127125958e-05, "loss": 0.0827, "step": 31450 }, { "epoch": 1.58, "learning_rate": 1.0704675838960248e-05, "loss": 0.0437, "step": 31460 }, { "epoch": 1.58, "learning_rate": 1.0691983550794538e-05, "loss": 0.0467, "step": 31470 }, { "epoch": 1.58, "learning_rate": 1.0679291262628826e-05, "loss": 0.023, "step": 31480 }, { "epoch": 1.58, "learning_rate": 1.0666598974463116e-05, "loss": 0.0476, "step": 31490 }, { "epoch": 1.58, "learning_rate": 1.0653906686297406e-05, "loss": 0.1045, "step": 31500 }, { "epoch": 1.58, "learning_rate": 1.0641214398131695e-05, "loss": 0.0675, "step": 31510 }, { "epoch": 1.58, "learning_rate": 1.0628522109965985e-05, "loss": 0.0526, "step": 31520 }, { "epoch": 1.58, "learning_rate": 1.0615829821800275e-05, "loss": 0.0656, "step": 31530 }, { "epoch": 1.58, "learning_rate": 1.0603137533634565e-05, "loss": 0.0236, "step": 31540 }, { "epoch": 1.58, "learning_rate": 1.0590445245468853e-05, "loss": 0.0503, "step": 31550 }, { "epoch": 1.58, "learning_rate": 1.0577752957303143e-05, "loss": 0.0339, "step": 31560 }, { "epoch": 1.58, "learning_rate": 1.0565060669137433e-05, "loss": 0.0422, "step": 31570 }, { "epoch": 1.58, "learning_rate": 1.0552368380971723e-05, "loss": 0.0675, "step": 31580 }, { "epoch": 1.58, "learning_rate": 1.0539676092806011e-05, "loss": 0.0215, "step": 31590 }, { "epoch": 1.58, "learning_rate": 1.0526983804640301e-05, "loss": 0.0264, "step": 31600 }, { "epoch": 1.58, "learning_rate": 1.0514291516474591e-05, "loss": 0.0555, "step": 31610 }, { "epoch": 1.59, "learning_rate": 1.050159922830888e-05, "loss": 0.0318, "step": 31620 }, { "epoch": 1.59, "learning_rate": 1.048890694014317e-05, "loss": 0.0347, "step": 31630 }, { "epoch": 1.59, "learning_rate": 1.047621465197746e-05, "loss": 0.0414, "step": 31640 }, { "epoch": 1.59, "learning_rate": 1.0463522363811748e-05, "loss": 0.0463, "step": 31650 }, { "epoch": 1.59, "learning_rate": 1.0450830075646038e-05, "loss": 0.0549, "step": 31660 }, { "epoch": 1.59, "learning_rate": 1.0438137787480328e-05, "loss": 0.0803, "step": 31670 }, { "epoch": 1.59, "learning_rate": 1.0425445499314616e-05, "loss": 0.0401, "step": 31680 }, { "epoch": 1.59, "learning_rate": 1.0412753211148906e-05, "loss": 0.0289, "step": 31690 }, { "epoch": 1.59, "learning_rate": 1.0400060922983196e-05, "loss": 0.0289, "step": 31700 }, { "epoch": 1.59, "learning_rate": 1.0387368634817484e-05, "loss": 0.0348, "step": 31710 }, { "epoch": 1.59, "learning_rate": 1.0374676346651774e-05, "loss": 0.037, "step": 31720 }, { "epoch": 1.59, "learning_rate": 1.0361984058486064e-05, "loss": 0.06, "step": 31730 }, { "epoch": 1.59, "learning_rate": 1.0349291770320353e-05, "loss": 0.0594, "step": 31740 }, { "epoch": 1.59, "learning_rate": 1.0336599482154644e-05, "loss": 0.0587, "step": 31750 }, { "epoch": 1.59, "learning_rate": 1.0323907193988933e-05, "loss": 0.0313, "step": 31760 }, { "epoch": 1.59, "learning_rate": 1.0311214905823223e-05, "loss": 0.0527, "step": 31770 }, { "epoch": 1.59, "learning_rate": 1.0298522617657513e-05, "loss": 0.0287, "step": 31780 }, { "epoch": 1.59, "learning_rate": 1.0285830329491801e-05, "loss": 0.086, "step": 31790 }, { "epoch": 1.59, "learning_rate": 1.0273138041326091e-05, "loss": 0.0364, "step": 31800 }, { "epoch": 1.59, "learning_rate": 1.0260445753160381e-05, "loss": 0.0974, "step": 31810 }, { "epoch": 1.6, "learning_rate": 1.024775346499467e-05, "loss": 0.0674, "step": 31820 }, { "epoch": 1.6, "learning_rate": 1.023506117682896e-05, "loss": 0.0671, "step": 31830 }, { "epoch": 1.6, "learning_rate": 1.022236888866325e-05, "loss": 0.0111, "step": 31840 }, { "epoch": 1.6, "learning_rate": 1.0209676600497537e-05, "loss": 0.0797, "step": 31850 }, { "epoch": 1.6, "learning_rate": 1.0196984312331827e-05, "loss": 0.0538, "step": 31860 }, { "epoch": 1.6, "learning_rate": 1.0184292024166117e-05, "loss": 0.03, "step": 31870 }, { "epoch": 1.6, "learning_rate": 1.0171599736000406e-05, "loss": 0.0632, "step": 31880 }, { "epoch": 1.6, "learning_rate": 1.0158907447834696e-05, "loss": 0.0724, "step": 31890 }, { "epoch": 1.6, "learning_rate": 1.0146215159668986e-05, "loss": 0.0777, "step": 31900 }, { "epoch": 1.6, "learning_rate": 1.0133522871503274e-05, "loss": 0.0797, "step": 31910 }, { "epoch": 1.6, "learning_rate": 1.0120830583337566e-05, "loss": 0.055, "step": 31920 }, { "epoch": 1.6, "learning_rate": 1.0108138295171854e-05, "loss": 0.0194, "step": 31930 }, { "epoch": 1.6, "learning_rate": 1.0095446007006142e-05, "loss": 0.0931, "step": 31940 }, { "epoch": 1.6, "learning_rate": 1.0082753718840434e-05, "loss": 0.0106, "step": 31950 }, { "epoch": 1.6, "learning_rate": 1.0070061430674722e-05, "loss": 0.0634, "step": 31960 }, { "epoch": 1.6, "learning_rate": 1.005736914250901e-05, "loss": 0.0348, "step": 31970 }, { "epoch": 1.6, "learning_rate": 1.0044676854343302e-05, "loss": 0.0758, "step": 31980 }, { "epoch": 1.6, "learning_rate": 1.003198456617759e-05, "loss": 0.0409, "step": 31990 }, { "epoch": 1.6, "learning_rate": 1.001929227801188e-05, "loss": 0.0444, "step": 32000 }, { "epoch": 1.6, "learning_rate": 1.000659998984617e-05, "loss": 0.0415, "step": 32010 }, { "epoch": 1.61, "learning_rate": 9.993907701680459e-06, "loss": 0.0602, "step": 32020 }, { "epoch": 1.61, "learning_rate": 9.981215413514749e-06, "loss": 0.0605, "step": 32030 }, { "epoch": 1.61, "learning_rate": 9.968523125349039e-06, "loss": 0.0464, "step": 32040 }, { "epoch": 1.61, "learning_rate": 9.955830837183327e-06, "loss": 0.0283, "step": 32050 }, { "epoch": 1.61, "learning_rate": 9.943138549017617e-06, "loss": 0.0495, "step": 32060 }, { "epoch": 1.61, "learning_rate": 9.930446260851907e-06, "loss": 0.066, "step": 32070 }, { "epoch": 1.61, "learning_rate": 9.917753972686195e-06, "loss": 0.0564, "step": 32080 }, { "epoch": 1.61, "learning_rate": 9.905061684520487e-06, "loss": 0.0646, "step": 32090 }, { "epoch": 1.61, "learning_rate": 9.892369396354775e-06, "loss": 0.0418, "step": 32100 }, { "epoch": 1.61, "learning_rate": 9.879677108189064e-06, "loss": 0.0349, "step": 32110 }, { "epoch": 1.61, "learning_rate": 9.866984820023355e-06, "loss": 0.0806, "step": 32120 }, { "epoch": 1.61, "learning_rate": 9.854292531857644e-06, "loss": 0.0466, "step": 32130 }, { "epoch": 1.61, "learning_rate": 9.841600243691932e-06, "loss": 0.0489, "step": 32140 }, { "epoch": 1.61, "learning_rate": 9.828907955526224e-06, "loss": 0.0662, "step": 32150 }, { "epoch": 1.61, "learning_rate": 9.816215667360512e-06, "loss": 0.0515, "step": 32160 }, { "epoch": 1.61, "learning_rate": 9.803523379194802e-06, "loss": 0.0972, "step": 32170 }, { "epoch": 1.61, "learning_rate": 9.790831091029092e-06, "loss": 0.0332, "step": 32180 }, { "epoch": 1.61, "learning_rate": 9.77813880286338e-06, "loss": 0.0583, "step": 32190 }, { "epoch": 1.61, "learning_rate": 9.76544651469767e-06, "loss": 0.0655, "step": 32200 }, { "epoch": 1.61, "learning_rate": 9.75275422653196e-06, "loss": 0.014, "step": 32210 }, { "epoch": 1.62, "learning_rate": 9.740061938366248e-06, "loss": 0.048, "step": 32220 }, { "epoch": 1.62, "learning_rate": 9.727369650200538e-06, "loss": 0.0414, "step": 32230 }, { "epoch": 1.62, "learning_rate": 9.714677362034828e-06, "loss": 0.0712, "step": 32240 }, { "epoch": 1.62, "learning_rate": 9.701985073869117e-06, "loss": 0.1092, "step": 32250 }, { "epoch": 1.62, "learning_rate": 9.689292785703408e-06, "loss": 0.0395, "step": 32260 }, { "epoch": 1.62, "learning_rate": 9.676600497537697e-06, "loss": 0.0487, "step": 32270 }, { "epoch": 1.62, "learning_rate": 9.663908209371985e-06, "loss": 0.0435, "step": 32280 }, { "epoch": 1.62, "learning_rate": 9.651215921206277e-06, "loss": 0.0262, "step": 32290 }, { "epoch": 1.62, "learning_rate": 9.638523633040565e-06, "loss": 0.073, "step": 32300 }, { "epoch": 1.62, "learning_rate": 9.625831344874853e-06, "loss": 0.0312, "step": 32310 }, { "epoch": 1.62, "learning_rate": 9.613139056709145e-06, "loss": 0.0349, "step": 32320 }, { "epoch": 1.62, "learning_rate": 9.600446768543433e-06, "loss": 0.0329, "step": 32330 }, { "epoch": 1.62, "learning_rate": 9.587754480377723e-06, "loss": 0.0265, "step": 32340 }, { "epoch": 1.62, "learning_rate": 9.575062192212013e-06, "loss": 0.0439, "step": 32350 }, { "epoch": 1.62, "learning_rate": 9.562369904046302e-06, "loss": 0.0483, "step": 32360 }, { "epoch": 1.62, "learning_rate": 9.549677615880592e-06, "loss": 0.094, "step": 32370 }, { "epoch": 1.62, "learning_rate": 9.536985327714882e-06, "loss": 0.0169, "step": 32380 }, { "epoch": 1.62, "learning_rate": 9.52429303954917e-06, "loss": 0.0354, "step": 32390 }, { "epoch": 1.62, "learning_rate": 9.51160075138346e-06, "loss": 0.0345, "step": 32400 }, { "epoch": 1.62, "learning_rate": 9.49890846321775e-06, "loss": 0.05, "step": 32410 }, { "epoch": 1.63, "learning_rate": 9.486216175052038e-06, "loss": 0.0758, "step": 32420 }, { "epoch": 1.63, "learning_rate": 9.473523886886328e-06, "loss": 0.0322, "step": 32430 }, { "epoch": 1.63, "learning_rate": 9.460831598720618e-06, "loss": 0.0214, "step": 32440 }, { "epoch": 1.63, "learning_rate": 9.448139310554906e-06, "loss": 0.0554, "step": 32450 }, { "epoch": 1.63, "learning_rate": 9.435447022389196e-06, "loss": 0.0844, "step": 32460 }, { "epoch": 1.63, "learning_rate": 9.422754734223486e-06, "loss": 0.0444, "step": 32470 }, { "epoch": 1.63, "learning_rate": 9.410062446057775e-06, "loss": 0.0264, "step": 32480 }, { "epoch": 1.63, "learning_rate": 9.397370157892066e-06, "loss": 0.0339, "step": 32490 }, { "epoch": 1.63, "learning_rate": 9.384677869726355e-06, "loss": 0.1181, "step": 32500 }, { "epoch": 1.63, "learning_rate": 9.371985581560645e-06, "loss": 0.0608, "step": 32510 }, { "epoch": 1.63, "learning_rate": 9.359293293394935e-06, "loss": 0.0467, "step": 32520 }, { "epoch": 1.63, "learning_rate": 9.346601005229223e-06, "loss": 0.0639, "step": 32530 }, { "epoch": 1.63, "learning_rate": 9.333908717063513e-06, "loss": 0.0507, "step": 32540 }, { "epoch": 1.63, "learning_rate": 9.321216428897803e-06, "loss": 0.066, "step": 32550 }, { "epoch": 1.63, "learning_rate": 9.308524140732091e-06, "loss": 0.0536, "step": 32560 }, { "epoch": 1.63, "learning_rate": 9.295831852566381e-06, "loss": 0.0556, "step": 32570 }, { "epoch": 1.63, "learning_rate": 9.283139564400671e-06, "loss": 0.0281, "step": 32580 }, { "epoch": 1.63, "learning_rate": 9.27044727623496e-06, "loss": 0.0574, "step": 32590 }, { "epoch": 1.63, "learning_rate": 9.25775498806925e-06, "loss": 0.0489, "step": 32600 }, { "epoch": 1.63, "learning_rate": 9.24506269990354e-06, "loss": 0.046, "step": 32610 }, { "epoch": 1.64, "learning_rate": 9.232370411737828e-06, "loss": 0.106, "step": 32620 }, { "epoch": 1.64, "learning_rate": 9.219678123572118e-06, "loss": 0.0237, "step": 32630 }, { "epoch": 1.64, "learning_rate": 9.206985835406408e-06, "loss": 0.0194, "step": 32640 }, { "epoch": 1.64, "learning_rate": 9.194293547240696e-06, "loss": 0.0382, "step": 32650 }, { "epoch": 1.64, "learning_rate": 9.181601259074986e-06, "loss": 0.0385, "step": 32660 }, { "epoch": 1.64, "learning_rate": 9.168908970909276e-06, "loss": 0.0245, "step": 32670 }, { "epoch": 1.64, "learning_rate": 9.156216682743566e-06, "loss": 0.042, "step": 32680 }, { "epoch": 1.64, "learning_rate": 9.143524394577854e-06, "loss": 0.0571, "step": 32690 }, { "epoch": 1.64, "learning_rate": 9.130832106412144e-06, "loss": 0.0372, "step": 32700 }, { "epoch": 1.64, "learning_rate": 9.118139818246434e-06, "loss": 0.0416, "step": 32710 }, { "epoch": 1.64, "learning_rate": 9.105447530080724e-06, "loss": 0.0614, "step": 32720 }, { "epoch": 1.64, "learning_rate": 9.092755241915013e-06, "loss": 0.091, "step": 32730 }, { "epoch": 1.64, "learning_rate": 9.080062953749303e-06, "loss": 0.0409, "step": 32740 }, { "epoch": 1.64, "learning_rate": 9.067370665583593e-06, "loss": 0.0352, "step": 32750 }, { "epoch": 1.64, "learning_rate": 9.054678377417881e-06, "loss": 0.0909, "step": 32760 }, { "epoch": 1.64, "learning_rate": 9.041986089252171e-06, "loss": 0.0338, "step": 32770 }, { "epoch": 1.64, "learning_rate": 9.02929380108646e-06, "loss": 0.0311, "step": 32780 }, { "epoch": 1.64, "learning_rate": 9.016601512920749e-06, "loss": 0.0895, "step": 32790 }, { "epoch": 1.64, "learning_rate": 9.003909224755039e-06, "loss": 0.0297, "step": 32800 }, { "epoch": 1.64, "learning_rate": 8.991216936589329e-06, "loss": 0.0766, "step": 32810 }, { "epoch": 1.65, "learning_rate": 8.978524648423617e-06, "loss": 0.0723, "step": 32820 }, { "epoch": 1.65, "learning_rate": 8.965832360257907e-06, "loss": 0.0357, "step": 32830 }, { "epoch": 1.65, "learning_rate": 8.953140072092197e-06, "loss": 0.0367, "step": 32840 }, { "epoch": 1.65, "learning_rate": 8.940447783926487e-06, "loss": 0.0412, "step": 32850 }, { "epoch": 1.65, "learning_rate": 8.927755495760776e-06, "loss": 0.0293, "step": 32860 }, { "epoch": 1.65, "learning_rate": 8.915063207595066e-06, "loss": 0.0558, "step": 32870 }, { "epoch": 1.65, "learning_rate": 8.902370919429356e-06, "loss": 0.0171, "step": 32880 }, { "epoch": 1.65, "learning_rate": 8.889678631263644e-06, "loss": 0.031, "step": 32890 }, { "epoch": 1.65, "learning_rate": 8.876986343097934e-06, "loss": 0.0498, "step": 32900 }, { "epoch": 1.65, "learning_rate": 8.864294054932224e-06, "loss": 0.0447, "step": 32910 }, { "epoch": 1.65, "learning_rate": 8.851601766766512e-06, "loss": 0.0411, "step": 32920 }, { "epoch": 1.65, "learning_rate": 8.838909478600802e-06, "loss": 0.0842, "step": 32930 }, { "epoch": 1.65, "learning_rate": 8.826217190435092e-06, "loss": 0.0398, "step": 32940 }, { "epoch": 1.65, "learning_rate": 8.813524902269382e-06, "loss": 0.0477, "step": 32950 }, { "epoch": 1.65, "learning_rate": 8.80083261410367e-06, "loss": 0.0556, "step": 32960 }, { "epoch": 1.65, "learning_rate": 8.78814032593796e-06, "loss": 0.0441, "step": 32970 }, { "epoch": 1.65, "learning_rate": 8.77544803777225e-06, "loss": 0.0669, "step": 32980 }, { "epoch": 1.65, "learning_rate": 8.762755749606539e-06, "loss": 0.0822, "step": 32990 }, { "epoch": 1.65, "learning_rate": 8.750063461440829e-06, "loss": 0.033, "step": 33000 }, { "epoch": 1.65, "learning_rate": 8.737371173275119e-06, "loss": 0.0357, "step": 33010 }, { "epoch": 1.66, "learning_rate": 8.724678885109409e-06, "loss": 0.0598, "step": 33020 }, { "epoch": 1.66, "learning_rate": 8.711986596943697e-06, "loss": 0.0636, "step": 33030 }, { "epoch": 1.66, "learning_rate": 8.699294308777987e-06, "loss": 0.0532, "step": 33040 }, { "epoch": 1.66, "learning_rate": 8.686602020612277e-06, "loss": 0.0915, "step": 33050 }, { "epoch": 1.66, "learning_rate": 8.673909732446565e-06, "loss": 0.0988, "step": 33060 }, { "epoch": 1.66, "learning_rate": 8.661217444280855e-06, "loss": 0.0501, "step": 33070 }, { "epoch": 1.66, "learning_rate": 8.648525156115145e-06, "loss": 0.0151, "step": 33080 }, { "epoch": 1.66, "learning_rate": 8.635832867949434e-06, "loss": 0.0412, "step": 33090 }, { "epoch": 1.66, "learning_rate": 8.623140579783724e-06, "loss": 0.0254, "step": 33100 }, { "epoch": 1.66, "learning_rate": 8.610448291618014e-06, "loss": 0.0513, "step": 33110 }, { "epoch": 1.66, "learning_rate": 8.597756003452302e-06, "loss": 0.0506, "step": 33120 }, { "epoch": 1.66, "learning_rate": 8.585063715286592e-06, "loss": 0.0397, "step": 33130 }, { "epoch": 1.66, "learning_rate": 8.572371427120882e-06, "loss": 0.0752, "step": 33140 }, { "epoch": 1.66, "learning_rate": 8.55967913895517e-06, "loss": 0.0503, "step": 33150 }, { "epoch": 1.66, "learning_rate": 8.546986850789462e-06, "loss": 0.027, "step": 33160 }, { "epoch": 1.66, "learning_rate": 8.53429456262375e-06, "loss": 0.064, "step": 33170 }, { "epoch": 1.66, "learning_rate": 8.52160227445804e-06, "loss": 0.0687, "step": 33180 }, { "epoch": 1.66, "learning_rate": 8.50890998629233e-06, "loss": 0.0779, "step": 33190 }, { "epoch": 1.66, "learning_rate": 8.496217698126618e-06, "loss": 0.057, "step": 33200 }, { "epoch": 1.66, "learning_rate": 8.483525409960908e-06, "loss": 0.0185, "step": 33210 }, { "epoch": 1.67, "learning_rate": 8.470833121795198e-06, "loss": 0.0285, "step": 33220 }, { "epoch": 1.67, "learning_rate": 8.458140833629487e-06, "loss": 0.0364, "step": 33230 }, { "epoch": 1.67, "learning_rate": 8.445448545463777e-06, "loss": 0.0323, "step": 33240 }, { "epoch": 1.67, "learning_rate": 8.432756257298067e-06, "loss": 0.0693, "step": 33250 }, { "epoch": 1.67, "learning_rate": 8.420063969132355e-06, "loss": 0.0661, "step": 33260 }, { "epoch": 1.67, "learning_rate": 8.407371680966645e-06, "loss": 0.0689, "step": 33270 }, { "epoch": 1.67, "learning_rate": 8.394679392800935e-06, "loss": 0.0747, "step": 33280 }, { "epoch": 1.67, "learning_rate": 8.381987104635223e-06, "loss": 0.0498, "step": 33290 }, { "epoch": 1.67, "learning_rate": 8.369294816469513e-06, "loss": 0.0627, "step": 33300 }, { "epoch": 1.67, "learning_rate": 8.356602528303803e-06, "loss": 0.0392, "step": 33310 }, { "epoch": 1.67, "learning_rate": 8.343910240138092e-06, "loss": 0.035, "step": 33320 }, { "epoch": 1.67, "learning_rate": 8.331217951972383e-06, "loss": 0.0391, "step": 33330 }, { "epoch": 1.67, "learning_rate": 8.318525663806671e-06, "loss": 0.0518, "step": 33340 }, { "epoch": 1.67, "learning_rate": 8.30583337564096e-06, "loss": 0.0451, "step": 33350 }, { "epoch": 1.67, "learning_rate": 8.293141087475251e-06, "loss": 0.0702, "step": 33360 }, { "epoch": 1.67, "learning_rate": 8.28044879930954e-06, "loss": 0.0721, "step": 33370 }, { "epoch": 1.67, "learning_rate": 8.267756511143828e-06, "loss": 0.0405, "step": 33380 }, { "epoch": 1.67, "learning_rate": 8.25506422297812e-06, "loss": 0.0422, "step": 33390 }, { "epoch": 1.67, "learning_rate": 8.242371934812408e-06, "loss": 0.032, "step": 33400 }, { "epoch": 1.67, "learning_rate": 8.229679646646698e-06, "loss": 0.0463, "step": 33410 }, { "epoch": 1.68, "learning_rate": 8.216987358480988e-06, "loss": 0.0477, "step": 33420 }, { "epoch": 1.68, "learning_rate": 8.204295070315276e-06, "loss": 0.0589, "step": 33430 }, { "epoch": 1.68, "learning_rate": 8.191602782149566e-06, "loss": 0.0479, "step": 33440 }, { "epoch": 1.68, "learning_rate": 8.178910493983856e-06, "loss": 0.0724, "step": 33450 }, { "epoch": 1.68, "learning_rate": 8.166218205818145e-06, "loss": 0.0947, "step": 33460 }, { "epoch": 1.68, "learning_rate": 8.153525917652435e-06, "loss": 0.0542, "step": 33470 }, { "epoch": 1.68, "learning_rate": 8.140833629486725e-06, "loss": 0.0709, "step": 33480 }, { "epoch": 1.68, "learning_rate": 8.128141341321013e-06, "loss": 0.0429, "step": 33490 }, { "epoch": 1.68, "learning_rate": 8.115449053155305e-06, "loss": 0.0391, "step": 33500 }, { "epoch": 1.68, "learning_rate": 8.102756764989593e-06, "loss": 0.0669, "step": 33510 }, { "epoch": 1.68, "learning_rate": 8.090064476823881e-06, "loss": 0.0271, "step": 33520 }, { "epoch": 1.68, "learning_rate": 8.077372188658173e-06, "loss": 0.0848, "step": 33530 }, { "epoch": 1.68, "learning_rate": 8.064679900492461e-06, "loss": 0.0349, "step": 33540 }, { "epoch": 1.68, "learning_rate": 8.05198761232675e-06, "loss": 0.1014, "step": 33550 }, { "epoch": 1.68, "learning_rate": 8.039295324161041e-06, "loss": 0.0773, "step": 33560 }, { "epoch": 1.68, "learning_rate": 8.02660303599533e-06, "loss": 0.0667, "step": 33570 }, { "epoch": 1.68, "learning_rate": 8.013910747829618e-06, "loss": 0.1177, "step": 33580 }, { "epoch": 1.68, "learning_rate": 8.00121845966391e-06, "loss": 0.0419, "step": 33590 }, { "epoch": 1.68, "learning_rate": 7.988526171498198e-06, "loss": 0.0763, "step": 33600 }, { "epoch": 1.68, "learning_rate": 7.975833883332488e-06, "loss": 0.076, "step": 33610 }, { "epoch": 1.69, "learning_rate": 7.963141595166778e-06, "loss": 0.0276, "step": 33620 }, { "epoch": 1.69, "learning_rate": 7.950449307001066e-06, "loss": 0.065, "step": 33630 }, { "epoch": 1.69, "learning_rate": 7.937757018835356e-06, "loss": 0.0754, "step": 33640 }, { "epoch": 1.69, "learning_rate": 7.925064730669646e-06, "loss": 0.0596, "step": 33650 }, { "epoch": 1.69, "learning_rate": 7.912372442503934e-06, "loss": 0.0932, "step": 33660 }, { "epoch": 1.69, "learning_rate": 7.899680154338226e-06, "loss": 0.0807, "step": 33670 }, { "epoch": 1.69, "learning_rate": 7.886987866172514e-06, "loss": 0.039, "step": 33680 }, { "epoch": 1.69, "learning_rate": 7.874295578006803e-06, "loss": 0.0526, "step": 33690 }, { "epoch": 1.69, "learning_rate": 7.861603289841094e-06, "loss": 0.0846, "step": 33700 }, { "epoch": 1.69, "learning_rate": 7.848911001675382e-06, "loss": 0.0858, "step": 33710 }, { "epoch": 1.69, "learning_rate": 7.83621871350967e-06, "loss": 0.0951, "step": 33720 }, { "epoch": 1.69, "learning_rate": 7.823526425343962e-06, "loss": 0.0719, "step": 33730 }, { "epoch": 1.69, "learning_rate": 7.81083413717825e-06, "loss": 0.0694, "step": 33740 }, { "epoch": 1.69, "learning_rate": 7.79814184901254e-06, "loss": 0.0458, "step": 33750 }, { "epoch": 1.69, "learning_rate": 7.78544956084683e-06, "loss": 0.0564, "step": 33760 }, { "epoch": 1.69, "learning_rate": 7.772757272681119e-06, "loss": 0.0674, "step": 33770 }, { "epoch": 1.69, "learning_rate": 7.760064984515409e-06, "loss": 0.0474, "step": 33780 }, { "epoch": 1.69, "learning_rate": 7.747372696349699e-06, "loss": 0.0744, "step": 33790 }, { "epoch": 1.69, "learning_rate": 7.734680408183987e-06, "loss": 0.073, "step": 33800 }, { "epoch": 1.69, "learning_rate": 7.721988120018277e-06, "loss": 0.0289, "step": 33810 }, { "epoch": 1.7, "learning_rate": 7.709295831852567e-06, "loss": 0.052, "step": 33820 }, { "epoch": 1.7, "learning_rate": 7.696603543686856e-06, "loss": 0.0389, "step": 33830 }, { "epoch": 1.7, "learning_rate": 7.683911255521146e-06, "loss": 0.0692, "step": 33840 }, { "epoch": 1.7, "learning_rate": 7.671218967355436e-06, "loss": 0.0377, "step": 33850 }, { "epoch": 1.7, "learning_rate": 7.658526679189724e-06, "loss": 0.0597, "step": 33860 }, { "epoch": 1.7, "learning_rate": 7.645834391024014e-06, "loss": 0.0241, "step": 33870 }, { "epoch": 1.7, "learning_rate": 7.633142102858304e-06, "loss": 0.0403, "step": 33880 }, { "epoch": 1.7, "learning_rate": 7.620449814692593e-06, "loss": 0.038, "step": 33890 }, { "epoch": 1.7, "learning_rate": 7.607757526526883e-06, "loss": 0.0346, "step": 33900 }, { "epoch": 1.7, "learning_rate": 7.595065238361172e-06, "loss": 0.0735, "step": 33910 }, { "epoch": 1.7, "learning_rate": 7.582372950195461e-06, "loss": 0.0474, "step": 33920 }, { "epoch": 1.7, "learning_rate": 7.569680662029751e-06, "loss": 0.0764, "step": 33930 }, { "epoch": 1.7, "learning_rate": 7.55698837386404e-06, "loss": 0.038, "step": 33940 }, { "epoch": 1.7, "learning_rate": 7.5442960856983295e-06, "loss": 0.0568, "step": 33950 }, { "epoch": 1.7, "learning_rate": 7.53160379753262e-06, "loss": 0.055, "step": 33960 }, { "epoch": 1.7, "learning_rate": 7.518911509366909e-06, "loss": 0.0607, "step": 33970 }, { "epoch": 1.7, "learning_rate": 7.506219221201198e-06, "loss": 0.0337, "step": 33980 }, { "epoch": 1.7, "learning_rate": 7.493526933035489e-06, "loss": 0.0407, "step": 33990 }, { "epoch": 1.7, "learning_rate": 7.480834644869778e-06, "loss": 0.0516, "step": 34000 }, { "epoch": 1.71, "learning_rate": 7.468142356704066e-06, "loss": 0.0739, "step": 34010 }, { "epoch": 1.71, "learning_rate": 7.455450068538357e-06, "loss": 0.053, "step": 34020 }, { "epoch": 1.71, "learning_rate": 7.442757780372646e-06, "loss": 0.0277, "step": 34030 }, { "epoch": 1.71, "learning_rate": 7.430065492206935e-06, "loss": 0.059, "step": 34040 }, { "epoch": 1.71, "learning_rate": 7.417373204041225e-06, "loss": 0.0819, "step": 34050 }, { "epoch": 1.71, "learning_rate": 7.404680915875514e-06, "loss": 0.0222, "step": 34060 }, { "epoch": 1.71, "learning_rate": 7.3919886277098035e-06, "loss": 0.0281, "step": 34070 }, { "epoch": 1.71, "learning_rate": 7.3792963395440935e-06, "loss": 0.0527, "step": 34080 }, { "epoch": 1.71, "learning_rate": 7.366604051378383e-06, "loss": 0.1382, "step": 34090 }, { "epoch": 1.71, "learning_rate": 7.353911763212672e-06, "loss": 0.0759, "step": 34100 }, { "epoch": 1.71, "learning_rate": 7.341219475046962e-06, "loss": 0.0877, "step": 34110 }, { "epoch": 1.71, "learning_rate": 7.328527186881251e-06, "loss": 0.0601, "step": 34120 }, { "epoch": 1.71, "learning_rate": 7.315834898715542e-06, "loss": 0.0489, "step": 34130 }, { "epoch": 1.71, "learning_rate": 7.30314261054983e-06, "loss": 0.0462, "step": 34140 }, { "epoch": 1.71, "learning_rate": 7.290450322384119e-06, "loss": 0.0789, "step": 34150 }, { "epoch": 1.71, "learning_rate": 7.27775803421841e-06, "loss": 0.0392, "step": 34160 }, { "epoch": 1.71, "learning_rate": 7.265065746052699e-06, "loss": 0.0542, "step": 34170 }, { "epoch": 1.71, "learning_rate": 7.2523734578869875e-06, "loss": 0.0491, "step": 34180 }, { "epoch": 1.71, "learning_rate": 7.239681169721278e-06, "loss": 0.0838, "step": 34190 }, { "epoch": 1.71, "learning_rate": 7.2269888815555674e-06, "loss": 0.0407, "step": 34200 }, { "epoch": 1.72, "learning_rate": 7.214296593389857e-06, "loss": 0.0272, "step": 34210 }, { "epoch": 1.72, "learning_rate": 7.2016043052241466e-06, "loss": 0.0587, "step": 34220 }, { "epoch": 1.72, "learning_rate": 7.188912017058436e-06, "loss": 0.0254, "step": 34230 }, { "epoch": 1.72, "learning_rate": 7.176219728892725e-06, "loss": 0.0515, "step": 34240 }, { "epoch": 1.72, "learning_rate": 7.163527440727015e-06, "loss": 0.0544, "step": 34250 }, { "epoch": 1.72, "learning_rate": 7.150835152561304e-06, "loss": 0.0501, "step": 34260 }, { "epoch": 1.72, "learning_rate": 7.138142864395593e-06, "loss": 0.0551, "step": 34270 }, { "epoch": 1.72, "learning_rate": 7.125450576229883e-06, "loss": 0.052, "step": 34280 }, { "epoch": 1.72, "learning_rate": 7.112758288064172e-06, "loss": 0.0973, "step": 34290 }, { "epoch": 1.72, "learning_rate": 7.100065999898461e-06, "loss": 0.0655, "step": 34300 }, { "epoch": 1.72, "learning_rate": 7.087373711732752e-06, "loss": 0.0737, "step": 34310 }, { "epoch": 1.72, "learning_rate": 7.0746814235670405e-06, "loss": 0.0482, "step": 34320 }, { "epoch": 1.72, "learning_rate": 7.06198913540133e-06, "loss": 0.0638, "step": 34330 }, { "epoch": 1.72, "learning_rate": 7.0492968472356205e-06, "loss": 0.0778, "step": 34340 }, { "epoch": 1.72, "learning_rate": 7.036604559069909e-06, "loss": 0.0935, "step": 34350 }, { "epoch": 1.72, "learning_rate": 7.0239122709042e-06, "loss": 0.0608, "step": 34360 }, { "epoch": 1.72, "learning_rate": 7.011219982738489e-06, "loss": 0.0604, "step": 34370 }, { "epoch": 1.72, "learning_rate": 6.998527694572778e-06, "loss": 0.0578, "step": 34380 }, { "epoch": 1.72, "learning_rate": 6.985835406407068e-06, "loss": 0.0731, "step": 34390 }, { "epoch": 1.72, "learning_rate": 6.973143118241357e-06, "loss": 0.0574, "step": 34400 }, { "epoch": 1.73, "learning_rate": 6.960450830075646e-06, "loss": 0.0419, "step": 34410 }, { "epoch": 1.73, "learning_rate": 6.947758541909936e-06, "loss": 0.0871, "step": 34420 }, { "epoch": 1.73, "learning_rate": 6.935066253744225e-06, "loss": 0.0781, "step": 34430 }, { "epoch": 1.73, "learning_rate": 6.9223739655785145e-06, "loss": 0.0436, "step": 34440 }, { "epoch": 1.73, "learning_rate": 6.9096816774128045e-06, "loss": 0.0347, "step": 34450 }, { "epoch": 1.73, "learning_rate": 6.896989389247094e-06, "loss": 0.1066, "step": 34460 }, { "epoch": 1.73, "learning_rate": 6.884297101081383e-06, "loss": 0.0933, "step": 34470 }, { "epoch": 1.73, "learning_rate": 6.871604812915674e-06, "loss": 0.0297, "step": 34480 }, { "epoch": 1.73, "learning_rate": 6.858912524749962e-06, "loss": 0.0484, "step": 34490 }, { "epoch": 1.73, "learning_rate": 6.846220236584251e-06, "loss": 0.0578, "step": 34500 }, { "epoch": 1.73, "learning_rate": 6.833527948418542e-06, "loss": 0.0469, "step": 34510 }, { "epoch": 1.73, "learning_rate": 6.82083566025283e-06, "loss": 0.0356, "step": 34520 }, { "epoch": 1.73, "learning_rate": 6.808143372087119e-06, "loss": 0.0445, "step": 34530 }, { "epoch": 1.73, "learning_rate": 6.79545108392141e-06, "loss": 0.0566, "step": 34540 }, { "epoch": 1.73, "learning_rate": 6.782758795755699e-06, "loss": 0.0643, "step": 34550 }, { "epoch": 1.73, "learning_rate": 6.770066507589988e-06, "loss": 0.09, "step": 34560 }, { "epoch": 1.73, "learning_rate": 6.7573742194242784e-06, "loss": 0.0803, "step": 34570 }, { "epoch": 1.73, "learning_rate": 6.744681931258568e-06, "loss": 0.098, "step": 34580 }, { "epoch": 1.73, "learning_rate": 6.7319896430928576e-06, "loss": 0.0423, "step": 34590 }, { "epoch": 1.73, "learning_rate": 6.719297354927147e-06, "loss": 0.0474, "step": 34600 }, { "epoch": 1.74, "learning_rate": 6.706605066761436e-06, "loss": 0.0605, "step": 34610 }, { "epoch": 1.74, "learning_rate": 6.693912778595726e-06, "loss": 0.0408, "step": 34620 }, { "epoch": 1.74, "learning_rate": 6.681220490430015e-06, "loss": 0.0473, "step": 34630 }, { "epoch": 1.74, "learning_rate": 6.668528202264304e-06, "loss": 0.0704, "step": 34640 }, { "epoch": 1.74, "learning_rate": 6.655835914098595e-06, "loss": 0.0468, "step": 34650 }, { "epoch": 1.74, "learning_rate": 6.643143625932883e-06, "loss": 0.0495, "step": 34660 }, { "epoch": 1.74, "learning_rate": 6.630451337767172e-06, "loss": 0.051, "step": 34670 }, { "epoch": 1.74, "learning_rate": 6.617759049601463e-06, "loss": 0.0421, "step": 34680 }, { "epoch": 1.74, "learning_rate": 6.605066761435752e-06, "loss": 0.0624, "step": 34690 }, { "epoch": 1.74, "learning_rate": 6.592374473270041e-06, "loss": 0.0406, "step": 34700 }, { "epoch": 1.74, "learning_rate": 6.5796821851043315e-06, "loss": 0.0524, "step": 34710 }, { "epoch": 1.74, "learning_rate": 6.566989896938621e-06, "loss": 0.0707, "step": 34720 }, { "epoch": 1.74, "learning_rate": 6.554297608772909e-06, "loss": 0.0513, "step": 34730 }, { "epoch": 1.74, "learning_rate": 6.5416053206072e-06, "loss": 0.0499, "step": 34740 }, { "epoch": 1.74, "learning_rate": 6.528913032441489e-06, "loss": 0.0677, "step": 34750 }, { "epoch": 1.74, "learning_rate": 6.516220744275778e-06, "loss": 0.0574, "step": 34760 }, { "epoch": 1.74, "learning_rate": 6.503528456110068e-06, "loss": 0.0587, "step": 34770 }, { "epoch": 1.74, "learning_rate": 6.490836167944357e-06, "loss": 0.0633, "step": 34780 }, { "epoch": 1.74, "learning_rate": 6.478143879778646e-06, "loss": 0.0356, "step": 34790 }, { "epoch": 1.74, "learning_rate": 6.465451591612936e-06, "loss": 0.0718, "step": 34800 }, { "epoch": 1.75, "learning_rate": 6.4527593034472255e-06, "loss": 0.0662, "step": 34810 }, { "epoch": 1.75, "learning_rate": 6.440067015281515e-06, "loss": 0.0346, "step": 34820 }, { "epoch": 1.75, "learning_rate": 6.427374727115805e-06, "loss": 0.045, "step": 34830 }, { "epoch": 1.75, "learning_rate": 6.414682438950094e-06, "loss": 0.0389, "step": 34840 }, { "epoch": 1.75, "learning_rate": 6.401990150784385e-06, "loss": 0.0407, "step": 34850 }, { "epoch": 1.75, "learning_rate": 6.389297862618674e-06, "loss": 0.0494, "step": 34860 }, { "epoch": 1.75, "learning_rate": 6.376605574452962e-06, "loss": 0.0921, "step": 34870 }, { "epoch": 1.75, "learning_rate": 6.363913286287253e-06, "loss": 0.0879, "step": 34880 }, { "epoch": 1.75, "learning_rate": 6.351220998121542e-06, "loss": 0.0787, "step": 34890 }, { "epoch": 1.75, "learning_rate": 6.338528709955831e-06, "loss": 0.0571, "step": 34900 }, { "epoch": 1.75, "learning_rate": 6.325836421790121e-06, "loss": 0.0522, "step": 34910 }, { "epoch": 1.75, "learning_rate": 6.31314413362441e-06, "loss": 0.0676, "step": 34920 }, { "epoch": 1.75, "learning_rate": 6.3004518454586995e-06, "loss": 0.0483, "step": 34930 }, { "epoch": 1.75, "learning_rate": 6.2877595572929894e-06, "loss": 0.0385, "step": 34940 }, { "epoch": 1.75, "learning_rate": 6.275067269127279e-06, "loss": 0.0443, "step": 34950 }, { "epoch": 1.75, "learning_rate": 6.262374980961568e-06, "loss": 0.0404, "step": 34960 }, { "epoch": 1.75, "learning_rate": 6.249682692795857e-06, "loss": 0.0516, "step": 34970 }, { "epoch": 1.75, "learning_rate": 6.236990404630147e-06, "loss": 0.0652, "step": 34980 }, { "epoch": 1.75, "learning_rate": 6.224298116464437e-06, "loss": 0.092, "step": 34990 }, { "epoch": 1.75, "learning_rate": 6.211605828298726e-06, "loss": 0.0446, "step": 35000 }, { "epoch": 1.76, "learning_rate": 6.198913540133015e-06, "loss": 0.0583, "step": 35010 }, { "epoch": 1.76, "learning_rate": 6.186221251967305e-06, "loss": 0.0345, "step": 35020 }, { "epoch": 1.76, "learning_rate": 6.173528963801595e-06, "loss": 0.0258, "step": 35030 }, { "epoch": 1.76, "learning_rate": 6.160836675635883e-06, "loss": 0.0496, "step": 35040 }, { "epoch": 1.76, "learning_rate": 6.148144387470173e-06, "loss": 0.0834, "step": 35050 }, { "epoch": 1.76, "learning_rate": 6.135452099304463e-06, "loss": 0.0497, "step": 35060 }, { "epoch": 1.76, "learning_rate": 6.1227598111387525e-06, "loss": 0.0613, "step": 35070 }, { "epoch": 1.76, "learning_rate": 6.110067522973042e-06, "loss": 0.0479, "step": 35080 }, { "epoch": 1.76, "learning_rate": 6.097375234807332e-06, "loss": 0.0787, "step": 35090 }, { "epoch": 1.76, "learning_rate": 6.084682946641621e-06, "loss": 0.0753, "step": 35100 }, { "epoch": 1.76, "learning_rate": 6.07199065847591e-06, "loss": 0.0537, "step": 35110 }, { "epoch": 1.76, "learning_rate": 6.0592983703102e-06, "loss": 0.0846, "step": 35120 }, { "epoch": 1.76, "learning_rate": 6.04660608214449e-06, "loss": 0.0377, "step": 35130 }, { "epoch": 1.76, "learning_rate": 6.033913793978778e-06, "loss": 0.0416, "step": 35140 }, { "epoch": 1.76, "learning_rate": 6.021221505813068e-06, "loss": 0.0554, "step": 35150 }, { "epoch": 1.76, "learning_rate": 6.008529217647358e-06, "loss": 0.0327, "step": 35160 }, { "epoch": 1.76, "learning_rate": 5.995836929481647e-06, "loss": 0.0986, "step": 35170 }, { "epoch": 1.76, "learning_rate": 5.9831446413159365e-06, "loss": 0.0571, "step": 35180 }, { "epoch": 1.76, "learning_rate": 5.9704523531502265e-06, "loss": 0.0828, "step": 35190 }, { "epoch": 1.76, "learning_rate": 5.957760064984516e-06, "loss": 0.0705, "step": 35200 }, { "epoch": 1.77, "learning_rate": 5.945067776818805e-06, "loss": 0.0873, "step": 35210 }, { "epoch": 1.77, "learning_rate": 5.932375488653095e-06, "loss": 0.0465, "step": 35220 }, { "epoch": 1.77, "learning_rate": 5.919683200487384e-06, "loss": 0.0582, "step": 35230 }, { "epoch": 1.77, "learning_rate": 5.906990912321674e-06, "loss": 0.0506, "step": 35240 }, { "epoch": 1.77, "learning_rate": 5.894298624155963e-06, "loss": 0.0372, "step": 35250 }, { "epoch": 1.77, "learning_rate": 5.881606335990253e-06, "loss": 0.0389, "step": 35260 }, { "epoch": 1.77, "learning_rate": 5.868914047824542e-06, "loss": 0.0614, "step": 35270 }, { "epoch": 1.77, "learning_rate": 5.856221759658831e-06, "loss": 0.018, "step": 35280 }, { "epoch": 1.77, "learning_rate": 5.843529471493121e-06, "loss": 0.0712, "step": 35290 }, { "epoch": 1.77, "learning_rate": 5.8308371833274105e-06, "loss": 0.0251, "step": 35300 }, { "epoch": 1.77, "learning_rate": 5.8181448951617e-06, "loss": 0.0852, "step": 35310 }, { "epoch": 1.77, "learning_rate": 5.80545260699599e-06, "loss": 0.0476, "step": 35320 }, { "epoch": 1.77, "learning_rate": 5.792760318830279e-06, "loss": 0.0394, "step": 35330 }, { "epoch": 1.77, "learning_rate": 5.780068030664569e-06, "loss": 0.0146, "step": 35340 }, { "epoch": 1.77, "learning_rate": 5.767375742498858e-06, "loss": 0.0385, "step": 35350 }, { "epoch": 1.77, "learning_rate": 5.754683454333148e-06, "loss": 0.0263, "step": 35360 }, { "epoch": 1.77, "learning_rate": 5.741991166167437e-06, "loss": 0.0384, "step": 35370 }, { "epoch": 1.77, "learning_rate": 5.729298878001726e-06, "loss": 0.0548, "step": 35380 }, { "epoch": 1.77, "learning_rate": 5.716606589836016e-06, "loss": 0.0259, "step": 35390 }, { "epoch": 1.77, "learning_rate": 5.703914301670305e-06, "loss": 0.0548, "step": 35400 }, { "epoch": 1.78, "learning_rate": 5.691222013504595e-06, "loss": 0.0608, "step": 35410 }, { "epoch": 1.78, "learning_rate": 5.678529725338884e-06, "loss": 0.0538, "step": 35420 }, { "epoch": 1.78, "learning_rate": 5.6658374371731735e-06, "loss": 0.0387, "step": 35430 }, { "epoch": 1.78, "learning_rate": 5.6531451490074635e-06, "loss": 0.0149, "step": 35440 }, { "epoch": 1.78, "learning_rate": 5.640452860841753e-06, "loss": 0.0543, "step": 35450 }, { "epoch": 1.78, "learning_rate": 5.627760572676042e-06, "loss": 0.0493, "step": 35460 }, { "epoch": 1.78, "learning_rate": 5.615068284510332e-06, "loss": 0.0479, "step": 35470 }, { "epoch": 1.78, "learning_rate": 5.602375996344621e-06, "loss": 0.0357, "step": 35480 }, { "epoch": 1.78, "learning_rate": 5.589683708178911e-06, "loss": 0.0461, "step": 35490 }, { "epoch": 1.78, "learning_rate": 5.5769914200132e-06, "loss": 0.0504, "step": 35500 }, { "epoch": 1.78, "learning_rate": 5.56429913184749e-06, "loss": 0.0351, "step": 35510 }, { "epoch": 1.78, "learning_rate": 5.551606843681779e-06, "loss": 0.065, "step": 35520 }, { "epoch": 1.78, "learning_rate": 5.538914555516068e-06, "loss": 0.0435, "step": 35530 }, { "epoch": 1.78, "learning_rate": 5.526222267350358e-06, "loss": 0.0614, "step": 35540 }, { "epoch": 1.78, "learning_rate": 5.5135299791846475e-06, "loss": 0.0569, "step": 35550 }, { "epoch": 1.78, "learning_rate": 5.500837691018937e-06, "loss": 0.0648, "step": 35560 }, { "epoch": 1.78, "learning_rate": 5.488145402853227e-06, "loss": 0.0521, "step": 35570 }, { "epoch": 1.78, "learning_rate": 5.475453114687517e-06, "loss": 0.0443, "step": 35580 }, { "epoch": 1.78, "learning_rate": 5.462760826521806e-06, "loss": 0.0271, "step": 35590 }, { "epoch": 1.78, "learning_rate": 5.450068538356095e-06, "loss": 0.0407, "step": 35600 }, { "epoch": 1.79, "learning_rate": 5.437376250190385e-06, "loss": 0.0494, "step": 35610 }, { "epoch": 1.79, "learning_rate": 5.424683962024674e-06, "loss": 0.0981, "step": 35620 }, { "epoch": 1.79, "learning_rate": 5.411991673858963e-06, "loss": 0.122, "step": 35630 }, { "epoch": 1.79, "learning_rate": 5.399299385693253e-06, "loss": 0.0329, "step": 35640 }, { "epoch": 1.79, "learning_rate": 5.386607097527543e-06, "loss": 0.0571, "step": 35650 }, { "epoch": 1.79, "learning_rate": 5.3739148093618315e-06, "loss": 0.0314, "step": 35660 }, { "epoch": 1.79, "learning_rate": 5.3612225211961215e-06, "loss": 0.0416, "step": 35670 }, { "epoch": 1.79, "learning_rate": 5.3485302330304114e-06, "loss": 0.0622, "step": 35680 }, { "epoch": 1.79, "learning_rate": 5.3358379448647e-06, "loss": 0.0619, "step": 35690 }, { "epoch": 1.79, "learning_rate": 5.32314565669899e-06, "loss": 0.062, "step": 35700 }, { "epoch": 1.79, "learning_rate": 5.31045336853328e-06, "loss": 0.0686, "step": 35710 }, { "epoch": 1.79, "learning_rate": 5.297761080367569e-06, "loss": 0.0543, "step": 35720 }, { "epoch": 1.79, "learning_rate": 5.285068792201858e-06, "loss": 0.0815, "step": 35730 }, { "epoch": 1.79, "learning_rate": 5.272376504036148e-06, "loss": 0.0575, "step": 35740 }, { "epoch": 1.79, "learning_rate": 5.259684215870438e-06, "loss": 0.1075, "step": 35750 }, { "epoch": 1.79, "learning_rate": 5.246991927704726e-06, "loss": 0.104, "step": 35760 }, { "epoch": 1.79, "learning_rate": 5.234299639539016e-06, "loss": 0.0667, "step": 35770 }, { "epoch": 1.79, "learning_rate": 5.221607351373306e-06, "loss": 0.0676, "step": 35780 }, { "epoch": 1.79, "learning_rate": 5.208915063207595e-06, "loss": 0.034, "step": 35790 }, { "epoch": 1.79, "learning_rate": 5.1962227750418845e-06, "loss": 0.0518, "step": 35800 }, { "epoch": 1.8, "learning_rate": 5.1835304868761745e-06, "loss": 0.0849, "step": 35810 }, { "epoch": 1.8, "learning_rate": 5.170838198710464e-06, "loss": 0.0351, "step": 35820 }, { "epoch": 1.8, "learning_rate": 5.158145910544753e-06, "loss": 0.0561, "step": 35830 }, { "epoch": 1.8, "learning_rate": 5.145453622379043e-06, "loss": 0.0669, "step": 35840 }, { "epoch": 1.8, "learning_rate": 5.132761334213333e-06, "loss": 0.0392, "step": 35850 }, { "epoch": 1.8, "learning_rate": 5.120069046047622e-06, "loss": 0.0703, "step": 35860 }, { "epoch": 1.8, "learning_rate": 5.107376757881911e-06, "loss": 0.0628, "step": 35870 }, { "epoch": 1.8, "learning_rate": 5.094684469716201e-06, "loss": 0.0758, "step": 35880 }, { "epoch": 1.8, "learning_rate": 5.08199218155049e-06, "loss": 0.1277, "step": 35890 }, { "epoch": 1.8, "learning_rate": 5.069299893384779e-06, "loss": 0.0485, "step": 35900 }, { "epoch": 1.8, "learning_rate": 5.056607605219069e-06, "loss": 0.0948, "step": 35910 }, { "epoch": 1.8, "learning_rate": 5.0439153170533585e-06, "loss": 0.0931, "step": 35920 }, { "epoch": 1.8, "learning_rate": 5.031223028887648e-06, "loss": 0.0406, "step": 35930 }, { "epoch": 1.8, "learning_rate": 5.018530740721938e-06, "loss": 0.0429, "step": 35940 }, { "epoch": 1.8, "learning_rate": 5.005838452556228e-06, "loss": 0.0158, "step": 35950 }, { "epoch": 1.8, "learning_rate": 4.993146164390517e-06, "loss": 0.0686, "step": 35960 }, { "epoch": 1.8, "learning_rate": 4.980453876224806e-06, "loss": 0.0515, "step": 35970 }, { "epoch": 1.8, "learning_rate": 4.967761588059096e-06, "loss": 0.0301, "step": 35980 }, { "epoch": 1.8, "learning_rate": 4.955069299893385e-06, "loss": 0.0546, "step": 35990 }, { "epoch": 1.8, "learning_rate": 4.942377011727674e-06, "loss": 0.0541, "step": 36000 }, { "epoch": 1.81, "learning_rate": 4.929684723561964e-06, "loss": 0.0326, "step": 36010 }, { "epoch": 1.81, "learning_rate": 4.916992435396253e-06, "loss": 0.0583, "step": 36020 }, { "epoch": 1.81, "learning_rate": 4.904300147230543e-06, "loss": 0.109, "step": 36030 }, { "epoch": 1.81, "learning_rate": 4.8916078590648325e-06, "loss": 0.0399, "step": 36040 }, { "epoch": 1.81, "learning_rate": 4.878915570899122e-06, "loss": 0.0593, "step": 36050 }, { "epoch": 1.81, "learning_rate": 4.866223282733412e-06, "loss": 0.0408, "step": 36060 }, { "epoch": 1.81, "learning_rate": 4.853530994567701e-06, "loss": 0.0592, "step": 36070 }, { "epoch": 1.81, "learning_rate": 4.840838706401991e-06, "loss": 0.0488, "step": 36080 }, { "epoch": 1.81, "learning_rate": 4.82814641823628e-06, "loss": 0.051, "step": 36090 }, { "epoch": 1.81, "learning_rate": 4.815454130070569e-06, "loss": 0.0547, "step": 36100 }, { "epoch": 1.81, "learning_rate": 4.802761841904859e-06, "loss": 0.0344, "step": 36110 }, { "epoch": 1.81, "learning_rate": 4.790069553739148e-06, "loss": 0.0636, "step": 36120 }, { "epoch": 1.81, "learning_rate": 4.777377265573438e-06, "loss": 0.04, "step": 36130 }, { "epoch": 1.81, "learning_rate": 4.764684977407727e-06, "loss": 0.0195, "step": 36140 }, { "epoch": 1.81, "learning_rate": 4.751992689242016e-06, "loss": 0.031, "step": 36150 }, { "epoch": 1.81, "learning_rate": 4.739300401076306e-06, "loss": 0.0469, "step": 36160 }, { "epoch": 1.81, "learning_rate": 4.7266081129105955e-06, "loss": 0.0507, "step": 36170 }, { "epoch": 1.81, "learning_rate": 4.7139158247448855e-06, "loss": 0.0633, "step": 36180 }, { "epoch": 1.81, "learning_rate": 4.701223536579175e-06, "loss": 0.0719, "step": 36190 }, { "epoch": 1.81, "learning_rate": 4.688531248413465e-06, "loss": 0.0483, "step": 36200 }, { "epoch": 1.82, "learning_rate": 4.675838960247754e-06, "loss": 0.0531, "step": 36210 }, { "epoch": 1.82, "learning_rate": 4.663146672082043e-06, "loss": 0.0371, "step": 36220 }, { "epoch": 1.82, "learning_rate": 4.650454383916333e-06, "loss": 0.0387, "step": 36230 }, { "epoch": 1.82, "learning_rate": 4.637762095750622e-06, "loss": 0.0238, "step": 36240 }, { "epoch": 1.82, "learning_rate": 4.625069807584911e-06, "loss": 0.0524, "step": 36250 }, { "epoch": 1.82, "learning_rate": 4.612377519419201e-06, "loss": 0.0625, "step": 36260 }, { "epoch": 1.82, "learning_rate": 4.59968523125349e-06, "loss": 0.0445, "step": 36270 }, { "epoch": 1.82, "learning_rate": 4.5869929430877795e-06, "loss": 0.0686, "step": 36280 }, { "epoch": 1.82, "learning_rate": 4.5743006549220695e-06, "loss": 0.0271, "step": 36290 }, { "epoch": 1.82, "learning_rate": 4.5616083667563595e-06, "loss": 0.0382, "step": 36300 }, { "epoch": 1.82, "learning_rate": 4.548916078590649e-06, "loss": 0.0645, "step": 36310 }, { "epoch": 1.82, "learning_rate": 4.536223790424938e-06, "loss": 0.0692, "step": 36320 }, { "epoch": 1.82, "learning_rate": 4.523531502259228e-06, "loss": 0.0264, "step": 36330 }, { "epoch": 1.82, "learning_rate": 4.510839214093517e-06, "loss": 0.0802, "step": 36340 }, { "epoch": 1.82, "learning_rate": 4.498146925927806e-06, "loss": 0.0309, "step": 36350 }, { "epoch": 1.82, "learning_rate": 4.485454637762096e-06, "loss": 0.0761, "step": 36360 }, { "epoch": 1.82, "learning_rate": 4.472762349596386e-06, "loss": 0.0286, "step": 36370 }, { "epoch": 1.82, "learning_rate": 4.460070061430674e-06, "loss": 0.0598, "step": 36380 }, { "epoch": 1.82, "learning_rate": 4.447377773264964e-06, "loss": 0.0699, "step": 36390 }, { "epoch": 1.82, "learning_rate": 4.434685485099254e-06, "loss": 0.0504, "step": 36400 }, { "epoch": 1.83, "learning_rate": 4.4219931969335435e-06, "loss": 0.1059, "step": 36410 }, { "epoch": 1.83, "learning_rate": 4.409300908767833e-06, "loss": 0.0642, "step": 36420 }, { "epoch": 1.83, "learning_rate": 4.396608620602123e-06, "loss": 0.0539, "step": 36430 }, { "epoch": 1.83, "learning_rate": 4.383916332436412e-06, "loss": 0.1003, "step": 36440 }, { "epoch": 1.83, "learning_rate": 4.371224044270701e-06, "loss": 0.0606, "step": 36450 }, { "epoch": 1.83, "learning_rate": 4.358531756104991e-06, "loss": 0.074, "step": 36460 }, { "epoch": 1.83, "learning_rate": 4.345839467939281e-06, "loss": 0.0756, "step": 36470 }, { "epoch": 1.83, "learning_rate": 4.333147179773569e-06, "loss": 0.0673, "step": 36480 }, { "epoch": 1.83, "learning_rate": 4.320454891607859e-06, "loss": 0.0385, "step": 36490 }, { "epoch": 1.83, "learning_rate": 4.307762603442149e-06, "loss": 0.0734, "step": 36500 }, { "epoch": 1.83, "learning_rate": 4.295070315276438e-06, "loss": 0.0641, "step": 36510 }, { "epoch": 1.83, "learning_rate": 4.282378027110727e-06, "loss": 0.0821, "step": 36520 }, { "epoch": 1.83, "learning_rate": 4.269685738945017e-06, "loss": 0.0648, "step": 36530 }, { "epoch": 1.83, "learning_rate": 4.256993450779307e-06, "loss": 0.0555, "step": 36540 }, { "epoch": 1.83, "learning_rate": 4.244301162613596e-06, "loss": 0.058, "step": 36550 }, { "epoch": 1.83, "learning_rate": 4.231608874447886e-06, "loss": 0.0462, "step": 36560 }, { "epoch": 1.83, "learning_rate": 4.218916586282176e-06, "loss": 0.0577, "step": 36570 }, { "epoch": 1.83, "learning_rate": 4.206224298116465e-06, "loss": 0.0521, "step": 36580 }, { "epoch": 1.83, "learning_rate": 4.193532009950754e-06, "loss": 0.0423, "step": 36590 }, { "epoch": 1.83, "learning_rate": 4.180839721785044e-06, "loss": 0.0301, "step": 36600 }, { "epoch": 1.84, "learning_rate": 4.168147433619333e-06, "loss": 0.0332, "step": 36610 }, { "epoch": 1.84, "learning_rate": 4.155455145453622e-06, "loss": 0.0419, "step": 36620 }, { "epoch": 1.84, "learning_rate": 4.142762857287912e-06, "loss": 0.039, "step": 36630 }, { "epoch": 1.84, "learning_rate": 4.130070569122201e-06, "loss": 0.0137, "step": 36640 }, { "epoch": 1.84, "learning_rate": 4.1173782809564905e-06, "loss": 0.0391, "step": 36650 }, { "epoch": 1.84, "learning_rate": 4.1046859927907805e-06, "loss": 0.0615, "step": 36660 }, { "epoch": 1.84, "learning_rate": 4.0919937046250705e-06, "loss": 0.0398, "step": 36670 }, { "epoch": 1.84, "learning_rate": 4.07930141645936e-06, "loss": 0.0773, "step": 36680 }, { "epoch": 1.84, "learning_rate": 4.066609128293649e-06, "loss": 0.0465, "step": 36690 }, { "epoch": 1.84, "learning_rate": 4.053916840127939e-06, "loss": 0.0575, "step": 36700 }, { "epoch": 1.84, "learning_rate": 4.041224551962228e-06, "loss": 0.0625, "step": 36710 }, { "epoch": 1.84, "learning_rate": 4.028532263796517e-06, "loss": 0.1074, "step": 36720 }, { "epoch": 1.84, "learning_rate": 4.015839975630807e-06, "loss": 0.0878, "step": 36730 }, { "epoch": 1.84, "learning_rate": 4.003147687465096e-06, "loss": 0.0241, "step": 36740 }, { "epoch": 1.84, "learning_rate": 3.990455399299386e-06, "loss": 0.061, "step": 36750 }, { "epoch": 1.84, "learning_rate": 3.977763111133675e-06, "loss": 0.028, "step": 36760 }, { "epoch": 1.84, "learning_rate": 3.965070822967965e-06, "loss": 0.0759, "step": 36770 }, { "epoch": 1.84, "learning_rate": 3.9523785348022545e-06, "loss": 0.0345, "step": 36780 }, { "epoch": 1.84, "learning_rate": 3.939686246636544e-06, "loss": 0.0548, "step": 36790 }, { "epoch": 1.84, "learning_rate": 3.926993958470834e-06, "loss": 0.0344, "step": 36800 }, { "epoch": 1.85, "learning_rate": 3.914301670305123e-06, "loss": 0.0684, "step": 36810 }, { "epoch": 1.85, "learning_rate": 3.901609382139412e-06, "loss": 0.0622, "step": 36820 }, { "epoch": 1.85, "learning_rate": 3.888917093973702e-06, "loss": 0.074, "step": 36830 }, { "epoch": 1.85, "learning_rate": 3.876224805807991e-06, "loss": 0.0497, "step": 36840 }, { "epoch": 1.85, "learning_rate": 3.863532517642281e-06, "loss": 0.0786, "step": 36850 }, { "epoch": 1.85, "learning_rate": 3.85084022947657e-06, "loss": 0.1081, "step": 36860 }, { "epoch": 1.85, "learning_rate": 3.838147941310859e-06, "loss": 0.0449, "step": 36870 }, { "epoch": 1.85, "learning_rate": 3.825455653145149e-06, "loss": 0.0497, "step": 36880 }, { "epoch": 1.85, "learning_rate": 3.812763364979439e-06, "loss": 0.0543, "step": 36890 }, { "epoch": 1.85, "learning_rate": 3.8000710768137284e-06, "loss": 0.0522, "step": 36900 }, { "epoch": 1.85, "learning_rate": 3.7873787886480175e-06, "loss": 0.0697, "step": 36910 }, { "epoch": 1.85, "learning_rate": 3.774686500482307e-06, "loss": 0.059, "step": 36920 }, { "epoch": 1.85, "learning_rate": 3.7619942123165967e-06, "loss": 0.0321, "step": 36930 }, { "epoch": 1.85, "learning_rate": 3.749301924150886e-06, "loss": 0.0408, "step": 36940 }, { "epoch": 1.85, "learning_rate": 3.7366096359851754e-06, "loss": 0.1177, "step": 36950 }, { "epoch": 1.85, "learning_rate": 3.7239173478194654e-06, "loss": 0.0186, "step": 36960 }, { "epoch": 1.85, "learning_rate": 3.711225059653754e-06, "loss": 0.0345, "step": 36970 }, { "epoch": 1.85, "learning_rate": 3.698532771488044e-06, "loss": 0.0747, "step": 36980 }, { "epoch": 1.85, "learning_rate": 3.6858404833223337e-06, "loss": 0.0431, "step": 36990 }, { "epoch": 1.85, "learning_rate": 3.673148195156623e-06, "loss": 0.0324, "step": 37000 }, { "epoch": 1.86, "learning_rate": 3.6604559069909124e-06, "loss": 0.0856, "step": 37010 }, { "epoch": 1.86, "learning_rate": 3.647763618825202e-06, "loss": 0.0854, "step": 37020 }, { "epoch": 1.86, "learning_rate": 3.635071330659492e-06, "loss": 0.0409, "step": 37030 }, { "epoch": 1.86, "learning_rate": 3.6223790424937806e-06, "loss": 0.0262, "step": 37040 }, { "epoch": 1.86, "learning_rate": 3.6096867543280706e-06, "loss": 0.0223, "step": 37050 }, { "epoch": 1.86, "learning_rate": 3.59699446616236e-06, "loss": 0.08, "step": 37060 }, { "epoch": 1.86, "learning_rate": 3.5843021779966493e-06, "loss": 0.0626, "step": 37070 }, { "epoch": 1.86, "learning_rate": 3.571609889830939e-06, "loss": 0.0189, "step": 37080 }, { "epoch": 1.86, "learning_rate": 3.5589176016652285e-06, "loss": 0.0527, "step": 37090 }, { "epoch": 1.86, "learning_rate": 3.5462253134995176e-06, "loss": 0.0302, "step": 37100 }, { "epoch": 1.86, "learning_rate": 3.533533025333807e-06, "loss": 0.0797, "step": 37110 }, { "epoch": 1.86, "learning_rate": 3.5208407371680968e-06, "loss": 0.0864, "step": 37120 }, { "epoch": 1.86, "learning_rate": 3.5081484490023867e-06, "loss": 0.0415, "step": 37130 }, { "epoch": 1.86, "learning_rate": 3.4954561608366755e-06, "loss": 0.102, "step": 37140 }, { "epoch": 1.86, "learning_rate": 3.4827638726709655e-06, "loss": 0.0603, "step": 37150 }, { "epoch": 1.86, "learning_rate": 3.470071584505255e-06, "loss": 0.0339, "step": 37160 }, { "epoch": 1.86, "learning_rate": 3.457379296339544e-06, "loss": 0.0525, "step": 37170 }, { "epoch": 1.86, "learning_rate": 3.4446870081738337e-06, "loss": 0.0562, "step": 37180 }, { "epoch": 1.86, "learning_rate": 3.4319947200081233e-06, "loss": 0.0444, "step": 37190 }, { "epoch": 1.86, "learning_rate": 3.4193024318424124e-06, "loss": 0.075, "step": 37200 }, { "epoch": 1.87, "learning_rate": 3.406610143676702e-06, "loss": 0.0274, "step": 37210 }, { "epoch": 1.87, "learning_rate": 3.393917855510992e-06, "loss": 0.0385, "step": 37220 }, { "epoch": 1.87, "learning_rate": 3.3812255673452807e-06, "loss": 0.0698, "step": 37230 }, { "epoch": 1.87, "learning_rate": 3.3685332791795707e-06, "loss": 0.0967, "step": 37240 }, { "epoch": 1.87, "learning_rate": 3.3558409910138603e-06, "loss": 0.0958, "step": 37250 }, { "epoch": 1.87, "learning_rate": 3.34314870284815e-06, "loss": 0.045, "step": 37260 }, { "epoch": 1.87, "learning_rate": 3.330456414682439e-06, "loss": 0.0507, "step": 37270 }, { "epoch": 1.87, "learning_rate": 3.3177641265167285e-06, "loss": 0.0715, "step": 37280 }, { "epoch": 1.87, "learning_rate": 3.3050718383510185e-06, "loss": 0.0332, "step": 37290 }, { "epoch": 1.87, "learning_rate": 3.2923795501853073e-06, "loss": 0.0424, "step": 37300 }, { "epoch": 1.87, "learning_rate": 3.279687262019597e-06, "loss": 0.054, "step": 37310 }, { "epoch": 1.87, "learning_rate": 3.266994973853887e-06, "loss": 0.051, "step": 37320 }, { "epoch": 1.87, "learning_rate": 3.2543026856881755e-06, "loss": 0.0687, "step": 37330 }, { "epoch": 1.87, "learning_rate": 3.2416103975224655e-06, "loss": 0.0756, "step": 37340 }, { "epoch": 1.87, "learning_rate": 3.228918109356755e-06, "loss": 0.0792, "step": 37350 }, { "epoch": 1.87, "learning_rate": 3.2162258211910447e-06, "loss": 0.0153, "step": 37360 }, { "epoch": 1.87, "learning_rate": 3.203533533025334e-06, "loss": 0.0321, "step": 37370 }, { "epoch": 1.87, "learning_rate": 3.1908412448596234e-06, "loss": 0.0376, "step": 37380 }, { "epoch": 1.87, "learning_rate": 3.1781489566939134e-06, "loss": 0.0422, "step": 37390 }, { "epoch": 1.87, "learning_rate": 3.165456668528202e-06, "loss": 0.0543, "step": 37400 }, { "epoch": 1.88, "learning_rate": 3.152764380362492e-06, "loss": 0.0759, "step": 37410 }, { "epoch": 1.88, "learning_rate": 3.1400720921967816e-06, "loss": 0.0806, "step": 37420 }, { "epoch": 1.88, "learning_rate": 3.1273798040310708e-06, "loss": 0.0759, "step": 37430 }, { "epoch": 1.88, "learning_rate": 3.1146875158653603e-06, "loss": 0.0893, "step": 37440 }, { "epoch": 1.88, "learning_rate": 3.10199522769965e-06, "loss": 0.0399, "step": 37450 }, { "epoch": 1.88, "learning_rate": 3.0893029395339395e-06, "loss": 0.0334, "step": 37460 }, { "epoch": 1.88, "learning_rate": 3.0766106513682286e-06, "loss": 0.0479, "step": 37470 }, { "epoch": 1.88, "learning_rate": 3.0639183632025186e-06, "loss": 0.0351, "step": 37480 }, { "epoch": 1.88, "learning_rate": 3.0512260750368078e-06, "loss": 0.0856, "step": 37490 }, { "epoch": 1.88, "learning_rate": 3.038533786871097e-06, "loss": 0.0274, "step": 37500 }, { "epoch": 1.88, "learning_rate": 3.025841498705387e-06, "loss": 0.0661, "step": 37510 }, { "epoch": 1.88, "learning_rate": 3.013149210539676e-06, "loss": 0.1191, "step": 37520 }, { "epoch": 1.88, "learning_rate": 3.000456922373966e-06, "loss": 0.0681, "step": 37530 }, { "epoch": 1.88, "learning_rate": 2.987764634208255e-06, "loss": 0.0598, "step": 37540 }, { "epoch": 1.88, "learning_rate": 2.9750723460425447e-06, "loss": 0.0807, "step": 37550 }, { "epoch": 1.88, "learning_rate": 2.9623800578768343e-06, "loss": 0.054, "step": 37560 }, { "epoch": 1.88, "learning_rate": 2.9496877697111234e-06, "loss": 0.0483, "step": 37570 }, { "epoch": 1.88, "learning_rate": 2.9369954815454134e-06, "loss": 0.0529, "step": 37580 }, { "epoch": 1.88, "learning_rate": 2.9243031933797026e-06, "loss": 0.0988, "step": 37590 }, { "epoch": 1.88, "learning_rate": 2.911610905213992e-06, "loss": 0.0755, "step": 37600 }, { "epoch": 1.89, "learning_rate": 2.8989186170482817e-06, "loss": 0.0726, "step": 37610 }, { "epoch": 1.89, "learning_rate": 2.886226328882571e-06, "loss": 0.0445, "step": 37620 }, { "epoch": 1.89, "learning_rate": 2.873534040716861e-06, "loss": 0.078, "step": 37630 }, { "epoch": 1.89, "learning_rate": 2.86084175255115e-06, "loss": 0.0711, "step": 37640 }, { "epoch": 1.89, "learning_rate": 2.8481494643854395e-06, "loss": 0.0577, "step": 37650 }, { "epoch": 1.89, "learning_rate": 2.835457176219729e-06, "loss": 0.064, "step": 37660 }, { "epoch": 1.89, "learning_rate": 2.8227648880540187e-06, "loss": 0.0455, "step": 37670 }, { "epoch": 1.89, "learning_rate": 2.810072599888308e-06, "loss": 0.0327, "step": 37680 }, { "epoch": 1.89, "learning_rate": 2.7973803117225974e-06, "loss": 0.0633, "step": 37690 }, { "epoch": 1.89, "learning_rate": 2.784688023556887e-06, "loss": 0.0891, "step": 37700 }, { "epoch": 1.89, "learning_rate": 2.7719957353911765e-06, "loss": 0.0757, "step": 37710 }, { "epoch": 1.89, "learning_rate": 2.759303447225466e-06, "loss": 0.0318, "step": 37720 }, { "epoch": 1.89, "learning_rate": 2.7466111590597552e-06, "loss": 0.0718, "step": 37730 }, { "epoch": 1.89, "learning_rate": 2.733918870894045e-06, "loss": 0.0687, "step": 37740 }, { "epoch": 1.89, "learning_rate": 2.7212265827283344e-06, "loss": 0.0438, "step": 37750 }, { "epoch": 1.89, "learning_rate": 2.708534294562624e-06, "loss": 0.0241, "step": 37760 }, { "epoch": 1.89, "learning_rate": 2.6958420063969135e-06, "loss": 0.08, "step": 37770 }, { "epoch": 1.89, "learning_rate": 2.6831497182312026e-06, "loss": 0.0448, "step": 37780 }, { "epoch": 1.89, "learning_rate": 2.670457430065492e-06, "loss": 0.0542, "step": 37790 }, { "epoch": 1.9, "learning_rate": 2.6577651418997818e-06, "loss": 0.1252, "step": 37800 }, { "epoch": 1.9, "learning_rate": 2.6450728537340713e-06, "loss": 0.0552, "step": 37810 }, { "epoch": 1.9, "learning_rate": 2.632380565568361e-06, "loss": 0.0293, "step": 37820 }, { "epoch": 1.9, "learning_rate": 2.61968827740265e-06, "loss": 0.0495, "step": 37830 }, { "epoch": 1.9, "learning_rate": 2.60699598923694e-06, "loss": 0.0854, "step": 37840 }, { "epoch": 1.9, "learning_rate": 2.594303701071229e-06, "loss": 0.0324, "step": 37850 }, { "epoch": 1.9, "learning_rate": 2.5816114129055188e-06, "loss": 0.0531, "step": 37860 }, { "epoch": 1.9, "learning_rate": 2.5689191247398083e-06, "loss": 0.0246, "step": 37870 }, { "epoch": 1.9, "learning_rate": 2.5562268365740975e-06, "loss": 0.0412, "step": 37880 }, { "epoch": 1.9, "learning_rate": 2.5435345484083875e-06, "loss": 0.1031, "step": 37890 }, { "epoch": 1.9, "learning_rate": 2.5308422602426766e-06, "loss": 0.0492, "step": 37900 }, { "epoch": 1.9, "learning_rate": 2.518149972076966e-06, "loss": 0.0504, "step": 37910 }, { "epoch": 1.9, "learning_rate": 2.5054576839112557e-06, "loss": 0.0924, "step": 37920 }, { "epoch": 1.9, "learning_rate": 2.492765395745545e-06, "loss": 0.0463, "step": 37930 }, { "epoch": 1.9, "learning_rate": 2.480073107579835e-06, "loss": 0.0303, "step": 37940 }, { "epoch": 1.9, "learning_rate": 2.467380819414124e-06, "loss": 0.0894, "step": 37950 }, { "epoch": 1.9, "learning_rate": 2.4546885312484136e-06, "loss": 0.0381, "step": 37960 }, { "epoch": 1.9, "learning_rate": 2.441996243082703e-06, "loss": 0.0334, "step": 37970 }, { "epoch": 1.9, "learning_rate": 2.4293039549169923e-06, "loss": 0.0651, "step": 37980 }, { "epoch": 1.9, "learning_rate": 2.4166116667512823e-06, "loss": 0.0434, "step": 37990 }, { "epoch": 1.91, "learning_rate": 2.4039193785855714e-06, "loss": 0.0408, "step": 38000 }, { "epoch": 1.91, "learning_rate": 2.391227090419861e-06, "loss": 0.0494, "step": 38010 }, { "epoch": 1.91, "learning_rate": 2.3785348022541505e-06, "loss": 0.065, "step": 38020 }, { "epoch": 1.91, "learning_rate": 2.36584251408844e-06, "loss": 0.0659, "step": 38030 }, { "epoch": 1.91, "learning_rate": 2.3531502259227297e-06, "loss": 0.0687, "step": 38040 }, { "epoch": 1.91, "learning_rate": 2.340457937757019e-06, "loss": 0.0317, "step": 38050 }, { "epoch": 1.91, "learning_rate": 2.3277656495913084e-06, "loss": 0.0561, "step": 38060 }, { "epoch": 1.91, "learning_rate": 2.315073361425598e-06, "loss": 0.0387, "step": 38070 }, { "epoch": 1.91, "learning_rate": 2.3023810732598875e-06, "loss": 0.0503, "step": 38080 }, { "epoch": 1.91, "learning_rate": 2.2896887850941767e-06, "loss": 0.0475, "step": 38090 }, { "epoch": 1.91, "learning_rate": 2.2769964969284662e-06, "loss": 0.0272, "step": 38100 }, { "epoch": 1.91, "learning_rate": 2.264304208762756e-06, "loss": 0.0477, "step": 38110 }, { "epoch": 1.91, "learning_rate": 2.2516119205970454e-06, "loss": 0.0679, "step": 38120 }, { "epoch": 1.91, "learning_rate": 2.238919632431335e-06, "loss": 0.0397, "step": 38130 }, { "epoch": 1.91, "learning_rate": 2.226227344265624e-06, "loss": 0.0975, "step": 38140 }, { "epoch": 1.91, "learning_rate": 2.213535056099914e-06, "loss": 0.1126, "step": 38150 }, { "epoch": 1.91, "learning_rate": 2.200842767934203e-06, "loss": 0.0595, "step": 38160 }, { "epoch": 1.91, "learning_rate": 2.1881504797684928e-06, "loss": 0.0754, "step": 38170 }, { "epoch": 1.91, "learning_rate": 2.1754581916027823e-06, "loss": 0.0425, "step": 38180 }, { "epoch": 1.91, "learning_rate": 2.1627659034370715e-06, "loss": 0.0526, "step": 38190 }, { "epoch": 1.92, "learning_rate": 2.1500736152713615e-06, "loss": 0.0462, "step": 38200 }, { "epoch": 1.92, "learning_rate": 2.1373813271056506e-06, "loss": 0.0361, "step": 38210 }, { "epoch": 1.92, "learning_rate": 2.12468903893994e-06, "loss": 0.0948, "step": 38220 }, { "epoch": 1.92, "learning_rate": 2.1119967507742298e-06, "loss": 0.0866, "step": 38230 }, { "epoch": 1.92, "learning_rate": 2.099304462608519e-06, "loss": 0.0419, "step": 38240 }, { "epoch": 1.92, "learning_rate": 2.086612174442809e-06, "loss": 0.0588, "step": 38250 }, { "epoch": 1.92, "learning_rate": 2.073919886277098e-06, "loss": 0.0596, "step": 38260 }, { "epoch": 1.92, "learning_rate": 2.0612275981113876e-06, "loss": 0.0249, "step": 38270 }, { "epoch": 1.92, "learning_rate": 2.048535309945677e-06, "loss": 0.0625, "step": 38280 }, { "epoch": 1.92, "learning_rate": 2.0358430217799663e-06, "loss": 0.0383, "step": 38290 }, { "epoch": 1.92, "learning_rate": 2.0231507336142563e-06, "loss": 0.0786, "step": 38300 }, { "epoch": 1.92, "learning_rate": 2.0104584454485454e-06, "loss": 0.0247, "step": 38310 }, { "epoch": 1.92, "learning_rate": 1.997766157282835e-06, "loss": 0.0454, "step": 38320 }, { "epoch": 1.92, "learning_rate": 1.9850738691171246e-06, "loss": 0.0249, "step": 38330 }, { "epoch": 1.92, "learning_rate": 1.972381580951414e-06, "loss": 0.0526, "step": 38340 }, { "epoch": 1.92, "learning_rate": 1.9596892927857037e-06, "loss": 0.0534, "step": 38350 }, { "epoch": 1.92, "learning_rate": 1.946997004619993e-06, "loss": 0.0317, "step": 38360 }, { "epoch": 1.92, "learning_rate": 1.9343047164542824e-06, "loss": 0.0556, "step": 38370 }, { "epoch": 1.92, "learning_rate": 1.921612428288572e-06, "loss": 0.0838, "step": 38380 }, { "epoch": 1.92, "learning_rate": 1.9089201401228615e-06, "loss": 0.0272, "step": 38390 }, { "epoch": 1.93, "learning_rate": 1.8962278519571511e-06, "loss": 0.0786, "step": 38400 }, { "epoch": 1.93, "learning_rate": 1.8835355637914405e-06, "loss": 0.0433, "step": 38410 }, { "epoch": 1.93, "learning_rate": 1.8708432756257298e-06, "loss": 0.0557, "step": 38420 }, { "epoch": 1.93, "learning_rate": 1.8581509874600194e-06, "loss": 0.0591, "step": 38430 }, { "epoch": 1.93, "learning_rate": 1.8454586992943087e-06, "loss": 0.0475, "step": 38440 }, { "epoch": 1.93, "learning_rate": 1.8327664111285985e-06, "loss": 0.0496, "step": 38450 }, { "epoch": 1.93, "learning_rate": 1.8200741229628879e-06, "loss": 0.0648, "step": 38460 }, { "epoch": 1.93, "learning_rate": 1.8073818347971772e-06, "loss": 0.0666, "step": 38470 }, { "epoch": 1.93, "learning_rate": 1.794689546631467e-06, "loss": 0.078, "step": 38480 }, { "epoch": 1.93, "learning_rate": 1.7819972584657564e-06, "loss": 0.0469, "step": 38490 }, { "epoch": 1.93, "learning_rate": 1.7693049703000457e-06, "loss": 0.0432, "step": 38500 }, { "epoch": 1.93, "learning_rate": 1.7566126821343353e-06, "loss": 0.0573, "step": 38510 }, { "epoch": 1.93, "learning_rate": 1.7439203939686246e-06, "loss": 0.0869, "step": 38520 }, { "epoch": 1.93, "learning_rate": 1.7312281058029144e-06, "loss": 0.0261, "step": 38530 }, { "epoch": 1.93, "learning_rate": 1.7185358176372038e-06, "loss": 0.0894, "step": 38540 }, { "epoch": 1.93, "learning_rate": 1.7058435294714931e-06, "loss": 0.0336, "step": 38550 }, { "epoch": 1.93, "learning_rate": 1.6931512413057827e-06, "loss": 0.0543, "step": 38560 }, { "epoch": 1.93, "learning_rate": 1.680458953140072e-06, "loss": 0.0817, "step": 38570 }, { "epoch": 1.93, "learning_rate": 1.6677666649743618e-06, "loss": 0.066, "step": 38580 }, { "epoch": 1.93, "learning_rate": 1.6550743768086512e-06, "loss": 0.0771, "step": 38590 }, { "epoch": 1.94, "learning_rate": 1.6423820886429405e-06, "loss": 0.0561, "step": 38600 }, { "epoch": 1.94, "learning_rate": 1.6296898004772301e-06, "loss": 0.0485, "step": 38610 }, { "epoch": 1.94, "learning_rate": 1.6169975123115195e-06, "loss": 0.0654, "step": 38620 }, { "epoch": 1.94, "learning_rate": 1.6043052241458092e-06, "loss": 0.0382, "step": 38630 }, { "epoch": 1.94, "learning_rate": 1.5916129359800986e-06, "loss": 0.014, "step": 38640 }, { "epoch": 1.94, "learning_rate": 1.578920647814388e-06, "loss": 0.0758, "step": 38650 }, { "epoch": 1.94, "learning_rate": 1.5662283596486777e-06, "loss": 0.0559, "step": 38660 }, { "epoch": 1.94, "learning_rate": 1.553536071482967e-06, "loss": 0.0268, "step": 38670 }, { "epoch": 1.94, "learning_rate": 1.5408437833172564e-06, "loss": 0.0635, "step": 38680 }, { "epoch": 1.94, "learning_rate": 1.528151495151546e-06, "loss": 0.0484, "step": 38690 }, { "epoch": 1.94, "learning_rate": 1.5154592069858356e-06, "loss": 0.0292, "step": 38700 }, { "epoch": 1.94, "learning_rate": 1.502766918820125e-06, "loss": 0.0273, "step": 38710 }, { "epoch": 1.94, "learning_rate": 1.4900746306544145e-06, "loss": 0.0348, "step": 38720 }, { "epoch": 1.94, "learning_rate": 1.477382342488704e-06, "loss": 0.0511, "step": 38730 }, { "epoch": 1.94, "learning_rate": 1.4646900543229934e-06, "loss": 0.028, "step": 38740 }, { "epoch": 1.94, "learning_rate": 1.4519977661572828e-06, "loss": 0.0366, "step": 38750 }, { "epoch": 1.94, "learning_rate": 1.4393054779915723e-06, "loss": 0.0383, "step": 38760 }, { "epoch": 1.94, "learning_rate": 1.426613189825862e-06, "loss": 0.0707, "step": 38770 }, { "epoch": 1.94, "learning_rate": 1.4139209016601515e-06, "loss": 0.065, "step": 38780 }, { "epoch": 1.94, "learning_rate": 1.4012286134944408e-06, "loss": 0.0963, "step": 38790 }, { "epoch": 1.95, "learning_rate": 1.3885363253287302e-06, "loss": 0.0248, "step": 38800 }, { "epoch": 1.95, "learning_rate": 1.3758440371630197e-06, "loss": 0.0444, "step": 38810 }, { "epoch": 1.95, "learning_rate": 1.3631517489973093e-06, "loss": 0.0533, "step": 38820 }, { "epoch": 1.95, "learning_rate": 1.3504594608315989e-06, "loss": 0.0487, "step": 38830 }, { "epoch": 1.95, "learning_rate": 1.3377671726658882e-06, "loss": 0.0303, "step": 38840 }, { "epoch": 1.95, "learning_rate": 1.3250748845001778e-06, "loss": 0.0408, "step": 38850 }, { "epoch": 1.95, "learning_rate": 1.3123825963344672e-06, "loss": 0.076, "step": 38860 }, { "epoch": 1.95, "learning_rate": 1.2996903081687567e-06, "loss": 0.0431, "step": 38870 }, { "epoch": 1.95, "learning_rate": 1.2869980200030463e-06, "loss": 0.0771, "step": 38880 }, { "epoch": 1.95, "learning_rate": 1.2743057318373356e-06, "loss": 0.0604, "step": 38890 }, { "epoch": 1.95, "learning_rate": 1.2616134436716252e-06, "loss": 0.0816, "step": 38900 }, { "epoch": 1.95, "learning_rate": 1.2489211555059148e-06, "loss": 0.0218, "step": 38910 }, { "epoch": 1.95, "learning_rate": 1.2362288673402041e-06, "loss": 0.0359, "step": 38920 }, { "epoch": 1.95, "learning_rate": 1.2235365791744935e-06, "loss": 0.0396, "step": 38930 }, { "epoch": 1.95, "learning_rate": 1.210844291008783e-06, "loss": 0.035, "step": 38940 }, { "epoch": 1.95, "learning_rate": 1.1981520028430726e-06, "loss": 0.0473, "step": 38950 }, { "epoch": 1.95, "learning_rate": 1.1854597146773622e-06, "loss": 0.0524, "step": 38960 }, { "epoch": 1.95, "learning_rate": 1.1727674265116518e-06, "loss": 0.0255, "step": 38970 }, { "epoch": 1.95, "learning_rate": 1.1600751383459411e-06, "loss": 0.0344, "step": 38980 }, { "epoch": 1.95, "learning_rate": 1.1473828501802305e-06, "loss": 0.0497, "step": 38990 }, { "epoch": 1.96, "learning_rate": 1.13469056201452e-06, "loss": 0.0472, "step": 39000 }, { "epoch": 1.96, "learning_rate": 1.1219982738488096e-06, "loss": 0.0909, "step": 39010 }, { "epoch": 1.96, "learning_rate": 1.109305985683099e-06, "loss": 0.0617, "step": 39020 }, { "epoch": 1.96, "learning_rate": 1.0966136975173885e-06, "loss": 0.0242, "step": 39030 }, { "epoch": 1.96, "learning_rate": 1.0839214093516779e-06, "loss": 0.0705, "step": 39040 }, { "epoch": 1.96, "learning_rate": 1.0712291211859674e-06, "loss": 0.0537, "step": 39050 }, { "epoch": 1.96, "learning_rate": 1.058536833020257e-06, "loss": 0.024, "step": 39060 }, { "epoch": 1.96, "learning_rate": 1.0458445448545464e-06, "loss": 0.0839, "step": 39070 }, { "epoch": 1.96, "learning_rate": 1.033152256688836e-06, "loss": 0.0465, "step": 39080 }, { "epoch": 1.96, "learning_rate": 1.0204599685231255e-06, "loss": 0.0301, "step": 39090 }, { "epoch": 1.96, "learning_rate": 1.0077676803574148e-06, "loss": 0.0336, "step": 39100 }, { "epoch": 1.96, "learning_rate": 9.950753921917042e-07, "loss": 0.0347, "step": 39110 }, { "epoch": 1.96, "learning_rate": 9.823831040259938e-07, "loss": 0.0838, "step": 39120 }, { "epoch": 1.96, "learning_rate": 9.696908158602833e-07, "loss": 0.1165, "step": 39130 }, { "epoch": 1.96, "learning_rate": 9.56998527694573e-07, "loss": 0.0296, "step": 39140 }, { "epoch": 1.96, "learning_rate": 9.443062395288624e-07, "loss": 0.0981, "step": 39150 }, { "epoch": 1.96, "learning_rate": 9.316139513631517e-07, "loss": 0.0519, "step": 39160 }, { "epoch": 1.96, "learning_rate": 9.189216631974413e-07, "loss": 0.0405, "step": 39170 }, { "epoch": 1.96, "learning_rate": 9.062293750317307e-07, "loss": 0.0705, "step": 39180 }, { "epoch": 1.96, "learning_rate": 8.935370868660203e-07, "loss": 0.0419, "step": 39190 }, { "epoch": 1.97, "learning_rate": 8.808447987003097e-07, "loss": 0.072, "step": 39200 }, { "epoch": 1.97, "learning_rate": 8.681525105345991e-07, "loss": 0.0836, "step": 39210 }, { "epoch": 1.97, "learning_rate": 8.554602223688887e-07, "loss": 0.0411, "step": 39220 }, { "epoch": 1.97, "learning_rate": 8.427679342031783e-07, "loss": 0.054, "step": 39230 }, { "epoch": 1.97, "learning_rate": 8.300756460374677e-07, "loss": 0.0641, "step": 39240 }, { "epoch": 1.97, "learning_rate": 8.173833578717571e-07, "loss": 0.0321, "step": 39250 }, { "epoch": 1.97, "learning_rate": 8.046910697060466e-07, "loss": 0.1047, "step": 39260 }, { "epoch": 1.97, "learning_rate": 7.919987815403361e-07, "loss": 0.0383, "step": 39270 }, { "epoch": 1.97, "learning_rate": 7.793064933746256e-07, "loss": 0.0766, "step": 39280 }, { "epoch": 1.97, "learning_rate": 7.666142052089151e-07, "loss": 0.0367, "step": 39290 }, { "epoch": 1.97, "learning_rate": 7.539219170432046e-07, "loss": 0.0259, "step": 39300 }, { "epoch": 1.97, "learning_rate": 7.412296288774941e-07, "loss": 0.076, "step": 39310 }, { "epoch": 1.97, "learning_rate": 7.285373407117836e-07, "loss": 0.0606, "step": 39320 }, { "epoch": 1.97, "learning_rate": 7.15845052546073e-07, "loss": 0.0538, "step": 39330 }, { "epoch": 1.97, "learning_rate": 7.031527643803625e-07, "loss": 0.0757, "step": 39340 }, { "epoch": 1.97, "learning_rate": 6.90460476214652e-07, "loss": 0.0348, "step": 39350 }, { "epoch": 1.97, "learning_rate": 6.777681880489415e-07, "loss": 0.0161, "step": 39360 }, { "epoch": 1.97, "learning_rate": 6.650758998832309e-07, "loss": 0.0511, "step": 39370 }, { "epoch": 1.97, "learning_rate": 6.523836117175205e-07, "loss": 0.0696, "step": 39380 }, { "epoch": 1.97, "learning_rate": 6.3969132355181e-07, "loss": 0.0437, "step": 39390 }, { "epoch": 1.98, "learning_rate": 6.269990353860994e-07, "loss": 0.0936, "step": 39400 }, { "epoch": 1.98, "learning_rate": 6.14306747220389e-07, "loss": 0.0437, "step": 39410 }, { "epoch": 1.98, "learning_rate": 6.016144590546783e-07, "loss": 0.0552, "step": 39420 }, { "epoch": 1.98, "learning_rate": 5.889221708889679e-07, "loss": 0.0433, "step": 39430 }, { "epoch": 1.98, "learning_rate": 5.762298827232574e-07, "loss": 0.0689, "step": 39440 }, { "epoch": 1.98, "learning_rate": 5.635375945575468e-07, "loss": 0.0329, "step": 39450 }, { "epoch": 1.98, "learning_rate": 5.508453063918364e-07, "loss": 0.0841, "step": 39460 }, { "epoch": 1.98, "learning_rate": 5.381530182261258e-07, "loss": 0.025, "step": 39470 }, { "epoch": 1.98, "learning_rate": 5.254607300604153e-07, "loss": 0.0414, "step": 39480 }, { "epoch": 1.98, "learning_rate": 5.127684418947048e-07, "loss": 0.0621, "step": 39490 }, { "epoch": 1.98, "learning_rate": 5.000761537289943e-07, "loss": 0.0562, "step": 39500 } ], "max_steps": 39894, "num_train_epochs": 2, "total_flos": 8.314476392438784e+16, "trial_name": null, "trial_params": null }