{ "best_metric": 0.2080189436674118, "best_model_checkpoint": "/content/drive/MyDrive/Seizure_EEG_Research/ViT_Seizure_Detection/checkpoint-10000", "epoch": 4.0, "eval_steps": 10000, "global_step": 50912, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.996071653048398e-05, "loss": 0.1727, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.992143306096795e-05, "loss": 0.2207, "step": 40 }, { "epoch": 0.0, "learning_rate": 9.988214959145193e-05, "loss": 0.1533, "step": 60 }, { "epoch": 0.01, "learning_rate": 9.98428661219359e-05, "loss": 0.1644, "step": 80 }, { "epoch": 0.01, "learning_rate": 9.980358265241988e-05, "loss": 0.1586, "step": 100 }, { "epoch": 0.01, "learning_rate": 9.976429918290384e-05, "loss": 0.1566, "step": 120 }, { "epoch": 0.01, "learning_rate": 9.972501571338781e-05, "loss": 0.1715, "step": 140 }, { "epoch": 0.01, "learning_rate": 9.968573224387178e-05, "loss": 0.178, "step": 160 }, { "epoch": 0.01, "learning_rate": 9.964644877435576e-05, "loss": 0.1879, "step": 180 }, { "epoch": 0.02, "learning_rate": 9.960716530483973e-05, "loss": 0.167, "step": 200 }, { "epoch": 0.02, "learning_rate": 9.956788183532369e-05, "loss": 0.1486, "step": 220 }, { "epoch": 0.02, "learning_rate": 9.952859836580767e-05, "loss": 0.1393, "step": 240 }, { "epoch": 0.02, "learning_rate": 9.948931489629164e-05, "loss": 0.1424, "step": 260 }, { "epoch": 0.02, "learning_rate": 9.945003142677562e-05, "loss": 0.1646, "step": 280 }, { "epoch": 0.02, "learning_rate": 9.941074795725959e-05, "loss": 0.1754, "step": 300 }, { "epoch": 0.03, "learning_rate": 9.937146448774356e-05, "loss": 0.1553, "step": 320 }, { "epoch": 0.03, "learning_rate": 9.933218101822754e-05, "loss": 0.152, "step": 340 }, { "epoch": 0.03, "learning_rate": 9.929289754871151e-05, "loss": 0.1504, "step": 360 }, { "epoch": 0.03, "learning_rate": 9.925361407919547e-05, "loss": 0.1272, "step": 380 }, { "epoch": 0.03, "learning_rate": 9.921433060967945e-05, "loss": 0.1522, "step": 400 }, { "epoch": 0.03, "learning_rate": 9.917504714016342e-05, "loss": 0.1662, "step": 420 }, { "epoch": 0.03, "learning_rate": 9.91357636706474e-05, "loss": 0.1423, "step": 440 }, { "epoch": 0.04, "learning_rate": 9.909648020113137e-05, "loss": 0.1338, "step": 460 }, { "epoch": 0.04, "learning_rate": 9.905719673161534e-05, "loss": 0.1314, "step": 480 }, { "epoch": 0.04, "learning_rate": 9.901791326209932e-05, "loss": 0.1388, "step": 500 }, { "epoch": 0.04, "learning_rate": 9.897862979258329e-05, "loss": 0.1467, "step": 520 }, { "epoch": 0.04, "learning_rate": 9.893934632306727e-05, "loss": 0.1396, "step": 540 }, { "epoch": 0.04, "learning_rate": 9.890006285355124e-05, "loss": 0.1615, "step": 560 }, { "epoch": 0.05, "learning_rate": 9.88607793840352e-05, "loss": 0.13, "step": 580 }, { "epoch": 0.05, "learning_rate": 9.882149591451917e-05, "loss": 0.1424, "step": 600 }, { "epoch": 0.05, "learning_rate": 9.878221244500315e-05, "loss": 0.1412, "step": 620 }, { "epoch": 0.05, "learning_rate": 9.874292897548711e-05, "loss": 0.1311, "step": 640 }, { "epoch": 0.05, "learning_rate": 9.870364550597108e-05, "loss": 0.1738, "step": 660 }, { "epoch": 0.05, "learning_rate": 9.866436203645506e-05, "loss": 0.1671, "step": 680 }, { "epoch": 0.05, "learning_rate": 9.862507856693903e-05, "loss": 0.1185, "step": 700 }, { "epoch": 0.06, "learning_rate": 9.8585795097423e-05, "loss": 0.1456, "step": 720 }, { "epoch": 0.06, "learning_rate": 9.854651162790698e-05, "loss": 0.1252, "step": 740 }, { "epoch": 0.06, "learning_rate": 9.850722815839095e-05, "loss": 0.1533, "step": 760 }, { "epoch": 0.06, "learning_rate": 9.846794468887493e-05, "loss": 0.1678, "step": 780 }, { "epoch": 0.06, "learning_rate": 9.84286612193589e-05, "loss": 0.14, "step": 800 }, { "epoch": 0.06, "learning_rate": 9.838937774984288e-05, "loss": 0.1288, "step": 820 }, { "epoch": 0.07, "learning_rate": 9.835009428032684e-05, "loss": 0.1076, "step": 840 }, { "epoch": 0.07, "learning_rate": 9.831081081081081e-05, "loss": 0.1279, "step": 860 }, { "epoch": 0.07, "learning_rate": 9.827152734129479e-05, "loss": 0.1244, "step": 880 }, { "epoch": 0.07, "learning_rate": 9.823224387177876e-05, "loss": 0.1374, "step": 900 }, { "epoch": 0.07, "learning_rate": 9.819296040226273e-05, "loss": 0.1507, "step": 920 }, { "epoch": 0.07, "learning_rate": 9.815367693274671e-05, "loss": 0.1342, "step": 940 }, { "epoch": 0.08, "learning_rate": 9.811439346323068e-05, "loss": 0.1605, "step": 960 }, { "epoch": 0.08, "learning_rate": 9.807510999371466e-05, "loss": 0.1238, "step": 980 }, { "epoch": 0.08, "learning_rate": 9.803582652419863e-05, "loss": 0.117, "step": 1000 }, { "epoch": 0.08, "learning_rate": 9.79965430546826e-05, "loss": 0.1404, "step": 1020 }, { "epoch": 0.08, "learning_rate": 9.795725958516658e-05, "loss": 0.118, "step": 1040 }, { "epoch": 0.08, "learning_rate": 9.791797611565054e-05, "loss": 0.1335, "step": 1060 }, { "epoch": 0.08, "learning_rate": 9.787869264613451e-05, "loss": 0.1274, "step": 1080 }, { "epoch": 0.09, "learning_rate": 9.783940917661847e-05, "loss": 0.1276, "step": 1100 }, { "epoch": 0.09, "learning_rate": 9.780012570710245e-05, "loss": 0.143, "step": 1120 }, { "epoch": 0.09, "learning_rate": 9.776084223758642e-05, "loss": 0.1241, "step": 1140 }, { "epoch": 0.09, "learning_rate": 9.77215587680704e-05, "loss": 0.1388, "step": 1160 }, { "epoch": 0.09, "learning_rate": 9.768227529855437e-05, "loss": 0.1255, "step": 1180 }, { "epoch": 0.09, "learning_rate": 9.764299182903834e-05, "loss": 0.1027, "step": 1200 }, { "epoch": 0.1, "learning_rate": 9.760370835952232e-05, "loss": 0.1305, "step": 1220 }, { "epoch": 0.1, "learning_rate": 9.756442489000629e-05, "loss": 0.1445, "step": 1240 }, { "epoch": 0.1, "learning_rate": 9.752514142049027e-05, "loss": 0.1064, "step": 1260 }, { "epoch": 0.1, "learning_rate": 9.748585795097424e-05, "loss": 0.1231, "step": 1280 }, { "epoch": 0.1, "learning_rate": 9.744657448145821e-05, "loss": 0.1435, "step": 1300 }, { "epoch": 0.1, "learning_rate": 9.740729101194218e-05, "loss": 0.1568, "step": 1320 }, { "epoch": 0.11, "learning_rate": 9.736800754242615e-05, "loss": 0.1192, "step": 1340 }, { "epoch": 0.11, "learning_rate": 9.732872407291012e-05, "loss": 0.1215, "step": 1360 }, { "epoch": 0.11, "learning_rate": 9.72894406033941e-05, "loss": 0.1221, "step": 1380 }, { "epoch": 0.11, "learning_rate": 9.725015713387807e-05, "loss": 0.112, "step": 1400 }, { "epoch": 0.11, "learning_rate": 9.721087366436205e-05, "loss": 0.1279, "step": 1420 }, { "epoch": 0.11, "learning_rate": 9.717159019484602e-05, "loss": 0.1122, "step": 1440 }, { "epoch": 0.11, "learning_rate": 9.713230672533e-05, "loss": 0.0998, "step": 1460 }, { "epoch": 0.12, "learning_rate": 9.709302325581396e-05, "loss": 0.0875, "step": 1480 }, { "epoch": 0.12, "learning_rate": 9.705373978629793e-05, "loss": 0.1377, "step": 1500 }, { "epoch": 0.12, "learning_rate": 9.70144563167819e-05, "loss": 0.1485, "step": 1520 }, { "epoch": 0.12, "learning_rate": 9.697517284726588e-05, "loss": 0.1118, "step": 1540 }, { "epoch": 0.12, "learning_rate": 9.693588937774984e-05, "loss": 0.1359, "step": 1560 }, { "epoch": 0.12, "learning_rate": 9.689660590823381e-05, "loss": 0.1061, "step": 1580 }, { "epoch": 0.13, "learning_rate": 9.685732243871779e-05, "loss": 0.1062, "step": 1600 }, { "epoch": 0.13, "learning_rate": 9.681803896920176e-05, "loss": 0.1202, "step": 1620 }, { "epoch": 0.13, "learning_rate": 9.677875549968573e-05, "loss": 0.1238, "step": 1640 }, { "epoch": 0.13, "learning_rate": 9.673947203016971e-05, "loss": 0.1204, "step": 1660 }, { "epoch": 0.13, "learning_rate": 9.670018856065368e-05, "loss": 0.1232, "step": 1680 }, { "epoch": 0.13, "learning_rate": 9.666090509113766e-05, "loss": 0.1502, "step": 1700 }, { "epoch": 0.14, "learning_rate": 9.662162162162163e-05, "loss": 0.1293, "step": 1720 }, { "epoch": 0.14, "learning_rate": 9.65823381521056e-05, "loss": 0.1192, "step": 1740 }, { "epoch": 0.14, "learning_rate": 9.654305468258958e-05, "loss": 0.1189, "step": 1760 }, { "epoch": 0.14, "learning_rate": 9.650377121307354e-05, "loss": 0.1162, "step": 1780 }, { "epoch": 0.14, "learning_rate": 9.646448774355751e-05, "loss": 0.1388, "step": 1800 }, { "epoch": 0.14, "learning_rate": 9.642520427404149e-05, "loss": 0.1027, "step": 1820 }, { "epoch": 0.14, "learning_rate": 9.638592080452546e-05, "loss": 0.143, "step": 1840 }, { "epoch": 0.15, "learning_rate": 9.634663733500944e-05, "loss": 0.1412, "step": 1860 }, { "epoch": 0.15, "learning_rate": 9.630735386549341e-05, "loss": 0.1048, "step": 1880 }, { "epoch": 0.15, "learning_rate": 9.626807039597737e-05, "loss": 0.1303, "step": 1900 }, { "epoch": 0.15, "learning_rate": 9.622878692646135e-05, "loss": 0.1198, "step": 1920 }, { "epoch": 0.15, "learning_rate": 9.618950345694532e-05, "loss": 0.1138, "step": 1940 }, { "epoch": 0.15, "learning_rate": 9.61502199874293e-05, "loss": 0.0882, "step": 1960 }, { "epoch": 0.16, "learning_rate": 9.611093651791327e-05, "loss": 0.1115, "step": 1980 }, { "epoch": 0.16, "learning_rate": 9.607165304839724e-05, "loss": 0.1143, "step": 2000 }, { "epoch": 0.16, "learning_rate": 9.603236957888122e-05, "loss": 0.1369, "step": 2020 }, { "epoch": 0.16, "learning_rate": 9.599308610936518e-05, "loss": 0.0814, "step": 2040 }, { "epoch": 0.16, "learning_rate": 9.595380263984915e-05, "loss": 0.1277, "step": 2060 }, { "epoch": 0.16, "learning_rate": 9.591451917033312e-05, "loss": 0.1121, "step": 2080 }, { "epoch": 0.16, "learning_rate": 9.58752357008171e-05, "loss": 0.0948, "step": 2100 }, { "epoch": 0.17, "learning_rate": 9.583595223130107e-05, "loss": 0.1256, "step": 2120 }, { "epoch": 0.17, "learning_rate": 9.579666876178505e-05, "loss": 0.1158, "step": 2140 }, { "epoch": 0.17, "learning_rate": 9.575738529226902e-05, "loss": 0.1154, "step": 2160 }, { "epoch": 0.17, "learning_rate": 9.5718101822753e-05, "loss": 0.0966, "step": 2180 }, { "epoch": 0.17, "learning_rate": 9.567881835323697e-05, "loss": 0.1007, "step": 2200 }, { "epoch": 0.17, "learning_rate": 9.563953488372094e-05, "loss": 0.0969, "step": 2220 }, { "epoch": 0.18, "learning_rate": 9.56002514142049e-05, "loss": 0.1247, "step": 2240 }, { "epoch": 0.18, "learning_rate": 9.556096794468888e-05, "loss": 0.1221, "step": 2260 }, { "epoch": 0.18, "learning_rate": 9.552168447517285e-05, "loss": 0.1049, "step": 2280 }, { "epoch": 0.18, "learning_rate": 9.548240100565683e-05, "loss": 0.1249, "step": 2300 }, { "epoch": 0.18, "learning_rate": 9.544311753614079e-05, "loss": 0.1035, "step": 2320 }, { "epoch": 0.18, "learning_rate": 9.540383406662476e-05, "loss": 0.1324, "step": 2340 }, { "epoch": 0.19, "learning_rate": 9.536455059710874e-05, "loss": 0.1085, "step": 2360 }, { "epoch": 0.19, "learning_rate": 9.532526712759271e-05, "loss": 0.1152, "step": 2380 }, { "epoch": 0.19, "learning_rate": 9.528598365807668e-05, "loss": 0.1057, "step": 2400 }, { "epoch": 0.19, "learning_rate": 9.524670018856066e-05, "loss": 0.0825, "step": 2420 }, { "epoch": 0.19, "learning_rate": 9.520741671904463e-05, "loss": 0.1323, "step": 2440 }, { "epoch": 0.19, "learning_rate": 9.51681332495286e-05, "loss": 0.1031, "step": 2460 }, { "epoch": 0.19, "learning_rate": 9.512884978001258e-05, "loss": 0.1148, "step": 2480 }, { "epoch": 0.2, "learning_rate": 9.508956631049654e-05, "loss": 0.1012, "step": 2500 }, { "epoch": 0.2, "learning_rate": 9.505028284098052e-05, "loss": 0.1097, "step": 2520 }, { "epoch": 0.2, "learning_rate": 9.501099937146449e-05, "loss": 0.1238, "step": 2540 }, { "epoch": 0.2, "learning_rate": 9.497171590194846e-05, "loss": 0.0987, "step": 2560 }, { "epoch": 0.2, "learning_rate": 9.493243243243244e-05, "loss": 0.1276, "step": 2580 }, { "epoch": 0.2, "learning_rate": 9.489314896291641e-05, "loss": 0.1218, "step": 2600 }, { "epoch": 0.21, "learning_rate": 9.485386549340039e-05, "loss": 0.1228, "step": 2620 }, { "epoch": 0.21, "learning_rate": 9.481458202388436e-05, "loss": 0.1048, "step": 2640 }, { "epoch": 0.21, "learning_rate": 9.477529855436833e-05, "loss": 0.1005, "step": 2660 }, { "epoch": 0.21, "learning_rate": 9.473601508485231e-05, "loss": 0.1042, "step": 2680 }, { "epoch": 0.21, "learning_rate": 9.469673161533628e-05, "loss": 0.1196, "step": 2700 }, { "epoch": 0.21, "learning_rate": 9.465744814582024e-05, "loss": 0.119, "step": 2720 }, { "epoch": 0.22, "learning_rate": 9.461816467630422e-05, "loss": 0.0872, "step": 2740 }, { "epoch": 0.22, "learning_rate": 9.457888120678818e-05, "loss": 0.0948, "step": 2760 }, { "epoch": 0.22, "learning_rate": 9.453959773727215e-05, "loss": 0.1219, "step": 2780 }, { "epoch": 0.22, "learning_rate": 9.450031426775613e-05, "loss": 0.1102, "step": 2800 }, { "epoch": 0.22, "learning_rate": 9.44610307982401e-05, "loss": 0.1375, "step": 2820 }, { "epoch": 0.22, "learning_rate": 9.442174732872407e-05, "loss": 0.1123, "step": 2840 }, { "epoch": 0.22, "learning_rate": 9.438246385920805e-05, "loss": 0.1016, "step": 2860 }, { "epoch": 0.23, "learning_rate": 9.434318038969202e-05, "loss": 0.0862, "step": 2880 }, { "epoch": 0.23, "learning_rate": 9.4303896920176e-05, "loss": 0.0973, "step": 2900 }, { "epoch": 0.23, "learning_rate": 9.426461345065997e-05, "loss": 0.1071, "step": 2920 }, { "epoch": 0.23, "learning_rate": 9.422532998114394e-05, "loss": 0.115, "step": 2940 }, { "epoch": 0.23, "learning_rate": 9.418604651162792e-05, "loss": 0.1203, "step": 2960 }, { "epoch": 0.23, "learning_rate": 9.414676304211188e-05, "loss": 0.1102, "step": 2980 }, { "epoch": 0.24, "learning_rate": 9.410747957259585e-05, "loss": 0.0987, "step": 3000 }, { "epoch": 0.24, "learning_rate": 9.406819610307983e-05, "loss": 0.1094, "step": 3020 }, { "epoch": 0.24, "learning_rate": 9.40289126335638e-05, "loss": 0.1045, "step": 3040 }, { "epoch": 0.24, "learning_rate": 9.398962916404778e-05, "loss": 0.1025, "step": 3060 }, { "epoch": 0.24, "learning_rate": 9.395034569453175e-05, "loss": 0.0905, "step": 3080 }, { "epoch": 0.24, "learning_rate": 9.391106222501572e-05, "loss": 0.1277, "step": 3100 }, { "epoch": 0.25, "learning_rate": 9.38717787554997e-05, "loss": 0.1298, "step": 3120 }, { "epoch": 0.25, "learning_rate": 9.383249528598367e-05, "loss": 0.0932, "step": 3140 }, { "epoch": 0.25, "learning_rate": 9.379321181646763e-05, "loss": 0.0991, "step": 3160 }, { "epoch": 0.25, "learning_rate": 9.375392834695161e-05, "loss": 0.0978, "step": 3180 }, { "epoch": 0.25, "learning_rate": 9.371464487743558e-05, "loss": 0.0838, "step": 3200 }, { "epoch": 0.25, "learning_rate": 9.367536140791954e-05, "loss": 0.0973, "step": 3220 }, { "epoch": 0.25, "learning_rate": 9.363607793840352e-05, "loss": 0.1173, "step": 3240 }, { "epoch": 0.26, "learning_rate": 9.359679446888749e-05, "loss": 0.1119, "step": 3260 }, { "epoch": 0.26, "learning_rate": 9.355751099937146e-05, "loss": 0.1174, "step": 3280 }, { "epoch": 0.26, "learning_rate": 9.351822752985544e-05, "loss": 0.1155, "step": 3300 }, { "epoch": 0.26, "learning_rate": 9.347894406033941e-05, "loss": 0.0874, "step": 3320 }, { "epoch": 0.26, "learning_rate": 9.343966059082339e-05, "loss": 0.104, "step": 3340 }, { "epoch": 0.26, "learning_rate": 9.340037712130736e-05, "loss": 0.1015, "step": 3360 }, { "epoch": 0.27, "learning_rate": 9.336109365179134e-05, "loss": 0.0874, "step": 3380 }, { "epoch": 0.27, "learning_rate": 9.332181018227531e-05, "loss": 0.1019, "step": 3400 }, { "epoch": 0.27, "learning_rate": 9.328252671275928e-05, "loss": 0.1189, "step": 3420 }, { "epoch": 0.27, "learning_rate": 9.324324324324324e-05, "loss": 0.1072, "step": 3440 }, { "epoch": 0.27, "learning_rate": 9.320395977372722e-05, "loss": 0.112, "step": 3460 }, { "epoch": 0.27, "learning_rate": 9.316467630421119e-05, "loss": 0.1231, "step": 3480 }, { "epoch": 0.27, "learning_rate": 9.312539283469517e-05, "loss": 0.1057, "step": 3500 }, { "epoch": 0.28, "learning_rate": 9.308610936517914e-05, "loss": 0.094, "step": 3520 }, { "epoch": 0.28, "learning_rate": 9.304682589566311e-05, "loss": 0.1074, "step": 3540 }, { "epoch": 0.28, "learning_rate": 9.300754242614708e-05, "loss": 0.1093, "step": 3560 }, { "epoch": 0.28, "learning_rate": 9.296825895663105e-05, "loss": 0.1049, "step": 3580 }, { "epoch": 0.28, "learning_rate": 9.292897548711502e-05, "loss": 0.0894, "step": 3600 }, { "epoch": 0.28, "learning_rate": 9.2889692017599e-05, "loss": 0.103, "step": 3620 }, { "epoch": 0.29, "learning_rate": 9.285040854808297e-05, "loss": 0.0873, "step": 3640 }, { "epoch": 0.29, "learning_rate": 9.281112507856695e-05, "loss": 0.1035, "step": 3660 }, { "epoch": 0.29, "learning_rate": 9.277184160905092e-05, "loss": 0.0912, "step": 3680 }, { "epoch": 0.29, "learning_rate": 9.273255813953488e-05, "loss": 0.104, "step": 3700 }, { "epoch": 0.29, "learning_rate": 9.269327467001885e-05, "loss": 0.0887, "step": 3720 }, { "epoch": 0.29, "learning_rate": 9.265399120050283e-05, "loss": 0.0797, "step": 3740 }, { "epoch": 0.3, "learning_rate": 9.26147077309868e-05, "loss": 0.0865, "step": 3760 }, { "epoch": 0.3, "learning_rate": 9.257542426147078e-05, "loss": 0.1241, "step": 3780 }, { "epoch": 0.3, "learning_rate": 9.253614079195475e-05, "loss": 0.1002, "step": 3800 }, { "epoch": 0.3, "learning_rate": 9.249685732243873e-05, "loss": 0.101, "step": 3820 }, { "epoch": 0.3, "learning_rate": 9.24575738529227e-05, "loss": 0.1081, "step": 3840 }, { "epoch": 0.3, "learning_rate": 9.241829038340667e-05, "loss": 0.0979, "step": 3860 }, { "epoch": 0.3, "learning_rate": 9.237900691389065e-05, "loss": 0.0904, "step": 3880 }, { "epoch": 0.31, "learning_rate": 9.233972344437462e-05, "loss": 0.1014, "step": 3900 }, { "epoch": 0.31, "learning_rate": 9.230043997485858e-05, "loss": 0.0895, "step": 3920 }, { "epoch": 0.31, "learning_rate": 9.226115650534256e-05, "loss": 0.0808, "step": 3940 }, { "epoch": 0.31, "learning_rate": 9.222187303582653e-05, "loss": 0.1062, "step": 3960 }, { "epoch": 0.31, "learning_rate": 9.218258956631049e-05, "loss": 0.0883, "step": 3980 }, { "epoch": 0.31, "learning_rate": 9.214330609679447e-05, "loss": 0.0768, "step": 4000 }, { "epoch": 0.32, "learning_rate": 9.210402262727844e-05, "loss": 0.0883, "step": 4020 }, { "epoch": 0.32, "learning_rate": 9.206473915776241e-05, "loss": 0.0954, "step": 4040 }, { "epoch": 0.32, "learning_rate": 9.202545568824639e-05, "loss": 0.0932, "step": 4060 }, { "epoch": 0.32, "learning_rate": 9.198617221873036e-05, "loss": 0.0937, "step": 4080 }, { "epoch": 0.32, "learning_rate": 9.194688874921434e-05, "loss": 0.1178, "step": 4100 }, { "epoch": 0.32, "learning_rate": 9.190760527969831e-05, "loss": 0.1053, "step": 4120 }, { "epoch": 0.33, "learning_rate": 9.186832181018228e-05, "loss": 0.102, "step": 4140 }, { "epoch": 0.33, "learning_rate": 9.182903834066624e-05, "loss": 0.1034, "step": 4160 }, { "epoch": 0.33, "learning_rate": 9.178975487115022e-05, "loss": 0.0796, "step": 4180 }, { "epoch": 0.33, "learning_rate": 9.175047140163419e-05, "loss": 0.1041, "step": 4200 }, { "epoch": 0.33, "learning_rate": 9.171118793211817e-05, "loss": 0.1027, "step": 4220 }, { "epoch": 0.33, "learning_rate": 9.167190446260214e-05, "loss": 0.0936, "step": 4240 }, { "epoch": 0.33, "learning_rate": 9.163262099308612e-05, "loss": 0.1029, "step": 4260 }, { "epoch": 0.34, "learning_rate": 9.159333752357009e-05, "loss": 0.1045, "step": 4280 }, { "epoch": 0.34, "learning_rate": 9.155405405405406e-05, "loss": 0.1349, "step": 4300 }, { "epoch": 0.34, "learning_rate": 9.151477058453804e-05, "loss": 0.0961, "step": 4320 }, { "epoch": 0.34, "learning_rate": 9.147548711502201e-05, "loss": 0.0823, "step": 4340 }, { "epoch": 0.34, "learning_rate": 9.143620364550599e-05, "loss": 0.1002, "step": 4360 }, { "epoch": 0.34, "learning_rate": 9.139692017598995e-05, "loss": 0.1014, "step": 4380 }, { "epoch": 0.35, "learning_rate": 9.135763670647392e-05, "loss": 0.0984, "step": 4400 }, { "epoch": 0.35, "learning_rate": 9.131835323695788e-05, "loss": 0.0982, "step": 4420 }, { "epoch": 0.35, "learning_rate": 9.127906976744186e-05, "loss": 0.1004, "step": 4440 }, { "epoch": 0.35, "learning_rate": 9.123978629792583e-05, "loss": 0.0821, "step": 4460 }, { "epoch": 0.35, "learning_rate": 9.12005028284098e-05, "loss": 0.0959, "step": 4480 }, { "epoch": 0.35, "learning_rate": 9.116121935889378e-05, "loss": 0.0748, "step": 4500 }, { "epoch": 0.36, "learning_rate": 9.112193588937775e-05, "loss": 0.0996, "step": 4520 }, { "epoch": 0.36, "learning_rate": 9.108265241986173e-05, "loss": 0.083, "step": 4540 }, { "epoch": 0.36, "learning_rate": 9.10433689503457e-05, "loss": 0.0965, "step": 4560 }, { "epoch": 0.36, "learning_rate": 9.100408548082967e-05, "loss": 0.1058, "step": 4580 }, { "epoch": 0.36, "learning_rate": 9.096480201131365e-05, "loss": 0.0815, "step": 4600 }, { "epoch": 0.36, "learning_rate": 9.092551854179762e-05, "loss": 0.1019, "step": 4620 }, { "epoch": 0.36, "learning_rate": 9.088623507228158e-05, "loss": 0.0812, "step": 4640 }, { "epoch": 0.37, "learning_rate": 9.084695160276556e-05, "loss": 0.1065, "step": 4660 }, { "epoch": 0.37, "learning_rate": 9.080766813324953e-05, "loss": 0.0752, "step": 4680 }, { "epoch": 0.37, "learning_rate": 9.07683846637335e-05, "loss": 0.109, "step": 4700 }, { "epoch": 0.37, "learning_rate": 9.072910119421748e-05, "loss": 0.1129, "step": 4720 }, { "epoch": 0.37, "learning_rate": 9.068981772470145e-05, "loss": 0.1153, "step": 4740 }, { "epoch": 0.37, "learning_rate": 9.065053425518543e-05, "loss": 0.1106, "step": 4760 }, { "epoch": 0.38, "learning_rate": 9.06112507856694e-05, "loss": 0.0921, "step": 4780 }, { "epoch": 0.38, "learning_rate": 9.057196731615338e-05, "loss": 0.1, "step": 4800 }, { "epoch": 0.38, "learning_rate": 9.053268384663734e-05, "loss": 0.0919, "step": 4820 }, { "epoch": 0.38, "learning_rate": 9.049340037712131e-05, "loss": 0.1059, "step": 4840 }, { "epoch": 0.38, "learning_rate": 9.045411690760529e-05, "loss": 0.0865, "step": 4860 }, { "epoch": 0.38, "learning_rate": 9.041483343808925e-05, "loss": 0.1179, "step": 4880 }, { "epoch": 0.38, "learning_rate": 9.037554996857322e-05, "loss": 0.0943, "step": 4900 }, { "epoch": 0.39, "learning_rate": 9.03362664990572e-05, "loss": 0.0964, "step": 4920 }, { "epoch": 0.39, "learning_rate": 9.029698302954117e-05, "loss": 0.1136, "step": 4940 }, { "epoch": 0.39, "learning_rate": 9.025769956002514e-05, "loss": 0.1099, "step": 4960 }, { "epoch": 0.39, "learning_rate": 9.021841609050912e-05, "loss": 0.0941, "step": 4980 }, { "epoch": 0.39, "learning_rate": 9.017913262099309e-05, "loss": 0.0883, "step": 5000 }, { "epoch": 0.39, "learning_rate": 9.013984915147706e-05, "loss": 0.0791, "step": 5020 }, { "epoch": 0.4, "learning_rate": 9.010056568196104e-05, "loss": 0.0832, "step": 5040 }, { "epoch": 0.4, "learning_rate": 9.006128221244501e-05, "loss": 0.0737, "step": 5060 }, { "epoch": 0.4, "learning_rate": 9.002199874292899e-05, "loss": 0.0929, "step": 5080 }, { "epoch": 0.4, "learning_rate": 8.998271527341295e-05, "loss": 0.0918, "step": 5100 }, { "epoch": 0.4, "learning_rate": 8.994343180389692e-05, "loss": 0.1042, "step": 5120 }, { "epoch": 0.4, "learning_rate": 8.99041483343809e-05, "loss": 0.0678, "step": 5140 }, { "epoch": 0.41, "learning_rate": 8.986486486486487e-05, "loss": 0.0998, "step": 5160 }, { "epoch": 0.41, "learning_rate": 8.982558139534884e-05, "loss": 0.0787, "step": 5180 }, { "epoch": 0.41, "learning_rate": 8.978629792583282e-05, "loss": 0.1275, "step": 5200 }, { "epoch": 0.41, "learning_rate": 8.974701445631679e-05, "loss": 0.0983, "step": 5220 }, { "epoch": 0.41, "learning_rate": 8.970773098680075e-05, "loss": 0.0952, "step": 5240 }, { "epoch": 0.41, "learning_rate": 8.966844751728473e-05, "loss": 0.0882, "step": 5260 }, { "epoch": 0.41, "learning_rate": 8.96291640477687e-05, "loss": 0.1071, "step": 5280 }, { "epoch": 0.42, "learning_rate": 8.958988057825268e-05, "loss": 0.0954, "step": 5300 }, { "epoch": 0.42, "learning_rate": 8.955059710873665e-05, "loss": 0.0818, "step": 5320 }, { "epoch": 0.42, "learning_rate": 8.951131363922062e-05, "loss": 0.1024, "step": 5340 }, { "epoch": 0.42, "learning_rate": 8.947203016970458e-05, "loss": 0.0833, "step": 5360 }, { "epoch": 0.42, "learning_rate": 8.943274670018856e-05, "loss": 0.0895, "step": 5380 }, { "epoch": 0.42, "learning_rate": 8.939346323067253e-05, "loss": 0.0974, "step": 5400 }, { "epoch": 0.43, "learning_rate": 8.935417976115651e-05, "loss": 0.1, "step": 5420 }, { "epoch": 0.43, "learning_rate": 8.931489629164048e-05, "loss": 0.1069, "step": 5440 }, { "epoch": 0.43, "learning_rate": 8.927561282212446e-05, "loss": 0.0948, "step": 5460 }, { "epoch": 0.43, "learning_rate": 8.923632935260843e-05, "loss": 0.0882, "step": 5480 }, { "epoch": 0.43, "learning_rate": 8.91970458830924e-05, "loss": 0.0932, "step": 5500 }, { "epoch": 0.43, "learning_rate": 8.915776241357638e-05, "loss": 0.1029, "step": 5520 }, { "epoch": 0.44, "learning_rate": 8.911847894406035e-05, "loss": 0.1131, "step": 5540 }, { "epoch": 0.44, "learning_rate": 8.907919547454433e-05, "loss": 0.0788, "step": 5560 }, { "epoch": 0.44, "learning_rate": 8.903991200502829e-05, "loss": 0.0807, "step": 5580 }, { "epoch": 0.44, "learning_rate": 8.900062853551226e-05, "loss": 0.0956, "step": 5600 }, { "epoch": 0.44, "learning_rate": 8.896134506599623e-05, "loss": 0.0755, "step": 5620 }, { "epoch": 0.44, "learning_rate": 8.892206159648021e-05, "loss": 0.092, "step": 5640 }, { "epoch": 0.44, "learning_rate": 8.888277812696417e-05, "loss": 0.0831, "step": 5660 }, { "epoch": 0.45, "learning_rate": 8.884349465744814e-05, "loss": 0.0952, "step": 5680 }, { "epoch": 0.45, "learning_rate": 8.880421118793212e-05, "loss": 0.098, "step": 5700 }, { "epoch": 0.45, "learning_rate": 8.876492771841609e-05, "loss": 0.0932, "step": 5720 }, { "epoch": 0.45, "learning_rate": 8.872564424890007e-05, "loss": 0.0821, "step": 5740 }, { "epoch": 0.45, "learning_rate": 8.868636077938404e-05, "loss": 0.0692, "step": 5760 }, { "epoch": 0.45, "learning_rate": 8.864707730986801e-05, "loss": 0.0937, "step": 5780 }, { "epoch": 0.46, "learning_rate": 8.860779384035199e-05, "loss": 0.1013, "step": 5800 }, { "epoch": 0.46, "learning_rate": 8.856851037083595e-05, "loss": 0.089, "step": 5820 }, { "epoch": 0.46, "learning_rate": 8.852922690131992e-05, "loss": 0.07, "step": 5840 }, { "epoch": 0.46, "learning_rate": 8.84899434318039e-05, "loss": 0.0944, "step": 5860 }, { "epoch": 0.46, "learning_rate": 8.845065996228787e-05, "loss": 0.0814, "step": 5880 }, { "epoch": 0.46, "learning_rate": 8.841137649277185e-05, "loss": 0.0926, "step": 5900 }, { "epoch": 0.47, "learning_rate": 8.837209302325582e-05, "loss": 0.1135, "step": 5920 }, { "epoch": 0.47, "learning_rate": 8.83328095537398e-05, "loss": 0.0838, "step": 5940 }, { "epoch": 0.47, "learning_rate": 8.829352608422377e-05, "loss": 0.0658, "step": 5960 }, { "epoch": 0.47, "learning_rate": 8.825424261470774e-05, "loss": 0.0821, "step": 5980 }, { "epoch": 0.47, "learning_rate": 8.821495914519172e-05, "loss": 0.0919, "step": 6000 }, { "epoch": 0.47, "learning_rate": 8.817567567567569e-05, "loss": 0.0831, "step": 6020 }, { "epoch": 0.47, "learning_rate": 8.813639220615965e-05, "loss": 0.1129, "step": 6040 }, { "epoch": 0.48, "learning_rate": 8.809710873664362e-05, "loss": 0.0991, "step": 6060 }, { "epoch": 0.48, "learning_rate": 8.805782526712759e-05, "loss": 0.0946, "step": 6080 }, { "epoch": 0.48, "learning_rate": 8.801854179761156e-05, "loss": 0.0835, "step": 6100 }, { "epoch": 0.48, "learning_rate": 8.797925832809553e-05, "loss": 0.0737, "step": 6120 }, { "epoch": 0.48, "learning_rate": 8.793997485857951e-05, "loss": 0.0744, "step": 6140 }, { "epoch": 0.48, "learning_rate": 8.790069138906348e-05, "loss": 0.0848, "step": 6160 }, { "epoch": 0.49, "learning_rate": 8.786140791954746e-05, "loss": 0.0822, "step": 6180 }, { "epoch": 0.49, "learning_rate": 8.782212445003143e-05, "loss": 0.0874, "step": 6200 }, { "epoch": 0.49, "learning_rate": 8.77828409805154e-05, "loss": 0.1025, "step": 6220 }, { "epoch": 0.49, "learning_rate": 8.774355751099938e-05, "loss": 0.089, "step": 6240 }, { "epoch": 0.49, "learning_rate": 8.770427404148335e-05, "loss": 0.0972, "step": 6260 }, { "epoch": 0.49, "learning_rate": 8.766499057196733e-05, "loss": 0.0777, "step": 6280 }, { "epoch": 0.49, "learning_rate": 8.762570710245129e-05, "loss": 0.0661, "step": 6300 }, { "epoch": 0.5, "learning_rate": 8.758642363293526e-05, "loss": 0.0942, "step": 6320 }, { "epoch": 0.5, "learning_rate": 8.754714016341924e-05, "loss": 0.0773, "step": 6340 }, { "epoch": 0.5, "learning_rate": 8.750785669390321e-05, "loss": 0.0866, "step": 6360 }, { "epoch": 0.5, "learning_rate": 8.746857322438718e-05, "loss": 0.0869, "step": 6380 }, { "epoch": 0.5, "learning_rate": 8.742928975487116e-05, "loss": 0.1056, "step": 6400 }, { "epoch": 0.5, "learning_rate": 8.739000628535513e-05, "loss": 0.0861, "step": 6420 }, { "epoch": 0.51, "learning_rate": 8.73507228158391e-05, "loss": 0.0998, "step": 6440 }, { "epoch": 0.51, "learning_rate": 8.731143934632308e-05, "loss": 0.0785, "step": 6460 }, { "epoch": 0.51, "learning_rate": 8.727215587680705e-05, "loss": 0.1004, "step": 6480 }, { "epoch": 0.51, "learning_rate": 8.723287240729102e-05, "loss": 0.1156, "step": 6500 }, { "epoch": 0.51, "learning_rate": 8.719358893777499e-05, "loss": 0.0893, "step": 6520 }, { "epoch": 0.51, "learning_rate": 8.715430546825896e-05, "loss": 0.0849, "step": 6540 }, { "epoch": 0.52, "learning_rate": 8.711502199874292e-05, "loss": 0.0696, "step": 6560 }, { "epoch": 0.52, "learning_rate": 8.70757385292269e-05, "loss": 0.0971, "step": 6580 }, { "epoch": 0.52, "learning_rate": 8.703645505971087e-05, "loss": 0.0888, "step": 6600 }, { "epoch": 0.52, "learning_rate": 8.699717159019485e-05, "loss": 0.0887, "step": 6620 }, { "epoch": 0.52, "learning_rate": 8.695788812067882e-05, "loss": 0.1054, "step": 6640 }, { "epoch": 0.52, "learning_rate": 8.69186046511628e-05, "loss": 0.0999, "step": 6660 }, { "epoch": 0.52, "learning_rate": 8.687932118164677e-05, "loss": 0.1055, "step": 6680 }, { "epoch": 0.53, "learning_rate": 8.684003771213074e-05, "loss": 0.0687, "step": 6700 }, { "epoch": 0.53, "learning_rate": 8.680075424261472e-05, "loss": 0.0723, "step": 6720 }, { "epoch": 0.53, "learning_rate": 8.676147077309869e-05, "loss": 0.0872, "step": 6740 }, { "epoch": 0.53, "learning_rate": 8.672218730358265e-05, "loss": 0.0835, "step": 6760 }, { "epoch": 0.53, "learning_rate": 8.668290383406663e-05, "loss": 0.0841, "step": 6780 }, { "epoch": 0.53, "learning_rate": 8.66436203645506e-05, "loss": 0.0883, "step": 6800 }, { "epoch": 0.54, "learning_rate": 8.660433689503457e-05, "loss": 0.0706, "step": 6820 }, { "epoch": 0.54, "learning_rate": 8.656505342551855e-05, "loss": 0.0769, "step": 6840 }, { "epoch": 0.54, "learning_rate": 8.652576995600252e-05, "loss": 0.0928, "step": 6860 }, { "epoch": 0.54, "learning_rate": 8.64864864864865e-05, "loss": 0.0769, "step": 6880 }, { "epoch": 0.54, "learning_rate": 8.644720301697047e-05, "loss": 0.0863, "step": 6900 }, { "epoch": 0.54, "learning_rate": 8.640791954745443e-05, "loss": 0.1131, "step": 6920 }, { "epoch": 0.55, "learning_rate": 8.63686360779384e-05, "loss": 0.0644, "step": 6940 }, { "epoch": 0.55, "learning_rate": 8.632935260842238e-05, "loss": 0.0918, "step": 6960 }, { "epoch": 0.55, "learning_rate": 8.629006913890635e-05, "loss": 0.0804, "step": 6980 }, { "epoch": 0.55, "learning_rate": 8.625078566939033e-05, "loss": 0.0798, "step": 7000 }, { "epoch": 0.55, "learning_rate": 8.621150219987429e-05, "loss": 0.0782, "step": 7020 }, { "epoch": 0.55, "learning_rate": 8.617221873035826e-05, "loss": 0.0838, "step": 7040 }, { "epoch": 0.55, "learning_rate": 8.613293526084224e-05, "loss": 0.0639, "step": 7060 }, { "epoch": 0.56, "learning_rate": 8.609365179132621e-05, "loss": 0.0643, "step": 7080 }, { "epoch": 0.56, "learning_rate": 8.605436832181018e-05, "loss": 0.088, "step": 7100 }, { "epoch": 0.56, "learning_rate": 8.601508485229416e-05, "loss": 0.0921, "step": 7120 }, { "epoch": 0.56, "learning_rate": 8.597580138277813e-05, "loss": 0.0697, "step": 7140 }, { "epoch": 0.56, "learning_rate": 8.593651791326211e-05, "loss": 0.098, "step": 7160 }, { "epoch": 0.56, "learning_rate": 8.589723444374608e-05, "loss": 0.0868, "step": 7180 }, { "epoch": 0.57, "learning_rate": 8.585795097423006e-05, "loss": 0.0776, "step": 7200 }, { "epoch": 0.57, "learning_rate": 8.581866750471403e-05, "loss": 0.0836, "step": 7220 }, { "epoch": 0.57, "learning_rate": 8.577938403519799e-05, "loss": 0.0822, "step": 7240 }, { "epoch": 0.57, "learning_rate": 8.574010056568196e-05, "loss": 0.0816, "step": 7260 }, { "epoch": 0.57, "learning_rate": 8.570081709616594e-05, "loss": 0.0797, "step": 7280 }, { "epoch": 0.57, "learning_rate": 8.566153362664991e-05, "loss": 0.076, "step": 7300 }, { "epoch": 0.58, "learning_rate": 8.562225015713389e-05, "loss": 0.0772, "step": 7320 }, { "epoch": 0.58, "learning_rate": 8.558296668761785e-05, "loss": 0.0835, "step": 7340 }, { "epoch": 0.58, "learning_rate": 8.554368321810182e-05, "loss": 0.0752, "step": 7360 }, { "epoch": 0.58, "learning_rate": 8.55043997485858e-05, "loss": 0.0846, "step": 7380 }, { "epoch": 0.58, "learning_rate": 8.546511627906977e-05, "loss": 0.0665, "step": 7400 }, { "epoch": 0.58, "learning_rate": 8.542583280955374e-05, "loss": 0.0829, "step": 7420 }, { "epoch": 0.58, "learning_rate": 8.538654934003772e-05, "loss": 0.0692, "step": 7440 }, { "epoch": 0.59, "learning_rate": 8.534726587052169e-05, "loss": 0.0896, "step": 7460 }, { "epoch": 0.59, "learning_rate": 8.530798240100565e-05, "loss": 0.086, "step": 7480 }, { "epoch": 0.59, "learning_rate": 8.526869893148963e-05, "loss": 0.1036, "step": 7500 }, { "epoch": 0.59, "learning_rate": 8.52294154619736e-05, "loss": 0.0971, "step": 7520 }, { "epoch": 0.59, "learning_rate": 8.519013199245758e-05, "loss": 0.0898, "step": 7540 }, { "epoch": 0.59, "learning_rate": 8.515084852294155e-05, "loss": 0.0932, "step": 7560 }, { "epoch": 0.6, "learning_rate": 8.511156505342552e-05, "loss": 0.0816, "step": 7580 }, { "epoch": 0.6, "learning_rate": 8.50722815839095e-05, "loss": 0.0797, "step": 7600 }, { "epoch": 0.6, "learning_rate": 8.503299811439347e-05, "loss": 0.0835, "step": 7620 }, { "epoch": 0.6, "learning_rate": 8.499371464487745e-05, "loss": 0.0697, "step": 7640 }, { "epoch": 0.6, "learning_rate": 8.495443117536142e-05, "loss": 0.0754, "step": 7660 }, { "epoch": 0.6, "learning_rate": 8.49151477058454e-05, "loss": 0.0882, "step": 7680 }, { "epoch": 0.6, "learning_rate": 8.487586423632935e-05, "loss": 0.0739, "step": 7700 }, { "epoch": 0.61, "learning_rate": 8.483658076681333e-05, "loss": 0.0743, "step": 7720 }, { "epoch": 0.61, "learning_rate": 8.47972972972973e-05, "loss": 0.087, "step": 7740 }, { "epoch": 0.61, "learning_rate": 8.475801382778126e-05, "loss": 0.0872, "step": 7760 }, { "epoch": 0.61, "learning_rate": 8.471873035826524e-05, "loss": 0.0877, "step": 7780 }, { "epoch": 0.61, "learning_rate": 8.467944688874921e-05, "loss": 0.0723, "step": 7800 }, { "epoch": 0.61, "learning_rate": 8.464016341923319e-05, "loss": 0.0709, "step": 7820 }, { "epoch": 0.62, "learning_rate": 8.460087994971716e-05, "loss": 0.0939, "step": 7840 }, { "epoch": 0.62, "learning_rate": 8.456159648020113e-05, "loss": 0.1033, "step": 7860 }, { "epoch": 0.62, "learning_rate": 8.452231301068511e-05, "loss": 0.0844, "step": 7880 }, { "epoch": 0.62, "learning_rate": 8.448302954116908e-05, "loss": 0.0897, "step": 7900 }, { "epoch": 0.62, "learning_rate": 8.444374607165306e-05, "loss": 0.0895, "step": 7920 }, { "epoch": 0.62, "learning_rate": 8.440446260213703e-05, "loss": 0.0695, "step": 7940 }, { "epoch": 0.63, "learning_rate": 8.436517913262099e-05, "loss": 0.0838, "step": 7960 }, { "epoch": 0.63, "learning_rate": 8.432589566310497e-05, "loss": 0.0752, "step": 7980 }, { "epoch": 0.63, "learning_rate": 8.428661219358894e-05, "loss": 0.0764, "step": 8000 }, { "epoch": 0.63, "learning_rate": 8.424732872407291e-05, "loss": 0.0968, "step": 8020 }, { "epoch": 0.63, "learning_rate": 8.420804525455689e-05, "loss": 0.08, "step": 8040 }, { "epoch": 0.63, "learning_rate": 8.416876178504086e-05, "loss": 0.0822, "step": 8060 }, { "epoch": 0.63, "learning_rate": 8.412947831552484e-05, "loss": 0.058, "step": 8080 }, { "epoch": 0.64, "learning_rate": 8.409019484600881e-05, "loss": 0.0975, "step": 8100 }, { "epoch": 0.64, "learning_rate": 8.405091137649278e-05, "loss": 0.0914, "step": 8120 }, { "epoch": 0.64, "learning_rate": 8.401162790697676e-05, "loss": 0.0845, "step": 8140 }, { "epoch": 0.64, "learning_rate": 8.397234443746073e-05, "loss": 0.0624, "step": 8160 }, { "epoch": 0.64, "learning_rate": 8.393306096794469e-05, "loss": 0.0781, "step": 8180 }, { "epoch": 0.64, "learning_rate": 8.389377749842867e-05, "loss": 0.0852, "step": 8200 }, { "epoch": 0.65, "learning_rate": 8.385449402891263e-05, "loss": 0.0633, "step": 8220 }, { "epoch": 0.65, "learning_rate": 8.38152105593966e-05, "loss": 0.0828, "step": 8240 }, { "epoch": 0.65, "learning_rate": 8.377592708988058e-05, "loss": 0.0764, "step": 8260 }, { "epoch": 0.65, "learning_rate": 8.373664362036455e-05, "loss": 0.0703, "step": 8280 }, { "epoch": 0.65, "learning_rate": 8.369736015084852e-05, "loss": 0.0728, "step": 8300 }, { "epoch": 0.65, "learning_rate": 8.36580766813325e-05, "loss": 0.0861, "step": 8320 }, { "epoch": 0.66, "learning_rate": 8.361879321181647e-05, "loss": 0.0784, "step": 8340 }, { "epoch": 0.66, "learning_rate": 8.357950974230045e-05, "loss": 0.0813, "step": 8360 }, { "epoch": 0.66, "learning_rate": 8.354022627278442e-05, "loss": 0.0733, "step": 8380 }, { "epoch": 0.66, "learning_rate": 8.35009428032684e-05, "loss": 0.0911, "step": 8400 }, { "epoch": 0.66, "learning_rate": 8.346165933375236e-05, "loss": 0.0739, "step": 8420 }, { "epoch": 0.66, "learning_rate": 8.342237586423633e-05, "loss": 0.0742, "step": 8440 }, { "epoch": 0.66, "learning_rate": 8.33830923947203e-05, "loss": 0.0809, "step": 8460 }, { "epoch": 0.67, "learning_rate": 8.334380892520428e-05, "loss": 0.0995, "step": 8480 }, { "epoch": 0.67, "learning_rate": 8.330452545568825e-05, "loss": 0.0832, "step": 8500 }, { "epoch": 0.67, "learning_rate": 8.326524198617223e-05, "loss": 0.0727, "step": 8520 }, { "epoch": 0.67, "learning_rate": 8.32259585166562e-05, "loss": 0.0841, "step": 8540 }, { "epoch": 0.67, "learning_rate": 8.318667504714017e-05, "loss": 0.0704, "step": 8560 }, { "epoch": 0.67, "learning_rate": 8.314739157762415e-05, "loss": 0.0606, "step": 8580 }, { "epoch": 0.68, "learning_rate": 8.310810810810811e-05, "loss": 0.0698, "step": 8600 }, { "epoch": 0.68, "learning_rate": 8.306882463859208e-05, "loss": 0.094, "step": 8620 }, { "epoch": 0.68, "learning_rate": 8.302954116907606e-05, "loss": 0.0711, "step": 8640 }, { "epoch": 0.68, "learning_rate": 8.299025769956003e-05, "loss": 0.0972, "step": 8660 }, { "epoch": 0.68, "learning_rate": 8.295097423004399e-05, "loss": 0.0931, "step": 8680 }, { "epoch": 0.68, "learning_rate": 8.291169076052797e-05, "loss": 0.0871, "step": 8700 }, { "epoch": 0.69, "learning_rate": 8.287240729101194e-05, "loss": 0.0671, "step": 8720 }, { "epoch": 0.69, "learning_rate": 8.283312382149591e-05, "loss": 0.0864, "step": 8740 }, { "epoch": 0.69, "learning_rate": 8.279384035197989e-05, "loss": 0.0761, "step": 8760 }, { "epoch": 0.69, "learning_rate": 8.275455688246386e-05, "loss": 0.0746, "step": 8780 }, { "epoch": 0.69, "learning_rate": 8.271527341294784e-05, "loss": 0.0754, "step": 8800 }, { "epoch": 0.69, "learning_rate": 8.267598994343181e-05, "loss": 0.0972, "step": 8820 }, { "epoch": 0.69, "learning_rate": 8.263670647391579e-05, "loss": 0.0803, "step": 8840 }, { "epoch": 0.7, "learning_rate": 8.259742300439976e-05, "loss": 0.0869, "step": 8860 }, { "epoch": 0.7, "learning_rate": 8.255813953488373e-05, "loss": 0.0739, "step": 8880 }, { "epoch": 0.7, "learning_rate": 8.25188560653677e-05, "loss": 0.0863, "step": 8900 }, { "epoch": 0.7, "learning_rate": 8.247957259585167e-05, "loss": 0.0638, "step": 8920 }, { "epoch": 0.7, "learning_rate": 8.244028912633564e-05, "loss": 0.0986, "step": 8940 }, { "epoch": 0.7, "learning_rate": 8.240100565681962e-05, "loss": 0.0831, "step": 8960 }, { "epoch": 0.71, "learning_rate": 8.236172218730359e-05, "loss": 0.0705, "step": 8980 }, { "epoch": 0.71, "learning_rate": 8.232243871778756e-05, "loss": 0.0794, "step": 9000 }, { "epoch": 0.71, "learning_rate": 8.228315524827153e-05, "loss": 0.0862, "step": 9020 }, { "epoch": 0.71, "learning_rate": 8.22438717787555e-05, "loss": 0.0701, "step": 9040 }, { "epoch": 0.71, "learning_rate": 8.220458830923947e-05, "loss": 0.059, "step": 9060 }, { "epoch": 0.71, "learning_rate": 8.216530483972345e-05, "loss": 0.0699, "step": 9080 }, { "epoch": 0.71, "learning_rate": 8.212602137020742e-05, "loss": 0.0679, "step": 9100 }, { "epoch": 0.72, "learning_rate": 8.20867379006914e-05, "loss": 0.1059, "step": 9120 }, { "epoch": 0.72, "learning_rate": 8.204745443117537e-05, "loss": 0.0662, "step": 9140 }, { "epoch": 0.72, "learning_rate": 8.200817096165933e-05, "loss": 0.0947, "step": 9160 }, { "epoch": 0.72, "learning_rate": 8.19688874921433e-05, "loss": 0.0787, "step": 9180 }, { "epoch": 0.72, "learning_rate": 8.192960402262728e-05, "loss": 0.0988, "step": 9200 }, { "epoch": 0.72, "learning_rate": 8.189032055311125e-05, "loss": 0.0792, "step": 9220 }, { "epoch": 0.73, "learning_rate": 8.185103708359523e-05, "loss": 0.0905, "step": 9240 }, { "epoch": 0.73, "learning_rate": 8.18117536140792e-05, "loss": 0.0714, "step": 9260 }, { "epoch": 0.73, "learning_rate": 8.177247014456318e-05, "loss": 0.0936, "step": 9280 }, { "epoch": 0.73, "learning_rate": 8.173318667504715e-05, "loss": 0.0785, "step": 9300 }, { "epoch": 0.73, "learning_rate": 8.169390320553112e-05, "loss": 0.0713, "step": 9320 }, { "epoch": 0.73, "learning_rate": 8.16546197360151e-05, "loss": 0.0726, "step": 9340 }, { "epoch": 0.74, "learning_rate": 8.161533626649906e-05, "loss": 0.0842, "step": 9360 }, { "epoch": 0.74, "learning_rate": 8.157605279698303e-05, "loss": 0.1054, "step": 9380 }, { "epoch": 0.74, "learning_rate": 8.153676932746701e-05, "loss": 0.0803, "step": 9400 }, { "epoch": 0.74, "learning_rate": 8.149748585795098e-05, "loss": 0.0759, "step": 9420 }, { "epoch": 0.74, "learning_rate": 8.145820238843494e-05, "loss": 0.0675, "step": 9440 }, { "epoch": 0.74, "learning_rate": 8.141891891891892e-05, "loss": 0.1079, "step": 9460 }, { "epoch": 0.74, "learning_rate": 8.137963544940289e-05, "loss": 0.0798, "step": 9480 }, { "epoch": 0.75, "learning_rate": 8.134035197988686e-05, "loss": 0.0632, "step": 9500 }, { "epoch": 0.75, "learning_rate": 8.130106851037084e-05, "loss": 0.0606, "step": 9520 }, { "epoch": 0.75, "learning_rate": 8.126178504085481e-05, "loss": 0.0851, "step": 9540 }, { "epoch": 0.75, "learning_rate": 8.122250157133879e-05, "loss": 0.0658, "step": 9560 }, { "epoch": 0.75, "learning_rate": 8.118321810182276e-05, "loss": 0.0851, "step": 9580 }, { "epoch": 0.75, "learning_rate": 8.114393463230673e-05, "loss": 0.0647, "step": 9600 }, { "epoch": 0.76, "learning_rate": 8.11046511627907e-05, "loss": 0.0949, "step": 9620 }, { "epoch": 0.76, "learning_rate": 8.106536769327467e-05, "loss": 0.0851, "step": 9640 }, { "epoch": 0.76, "learning_rate": 8.102608422375864e-05, "loss": 0.0875, "step": 9660 }, { "epoch": 0.76, "learning_rate": 8.098680075424262e-05, "loss": 0.0741, "step": 9680 }, { "epoch": 0.76, "learning_rate": 8.094751728472659e-05, "loss": 0.0663, "step": 9700 }, { "epoch": 0.76, "learning_rate": 8.090823381521057e-05, "loss": 0.0734, "step": 9720 }, { "epoch": 0.77, "learning_rate": 8.086895034569454e-05, "loss": 0.0957, "step": 9740 }, { "epoch": 0.77, "learning_rate": 8.082966687617851e-05, "loss": 0.0698, "step": 9760 }, { "epoch": 0.77, "learning_rate": 8.079038340666249e-05, "loss": 0.0716, "step": 9780 }, { "epoch": 0.77, "learning_rate": 8.075109993714646e-05, "loss": 0.0743, "step": 9800 }, { "epoch": 0.77, "learning_rate": 8.071181646763044e-05, "loss": 0.0505, "step": 9820 }, { "epoch": 0.77, "learning_rate": 8.06725329981144e-05, "loss": 0.0828, "step": 9840 }, { "epoch": 0.77, "learning_rate": 8.063324952859837e-05, "loss": 0.0715, "step": 9860 }, { "epoch": 0.78, "learning_rate": 8.059396605908233e-05, "loss": 0.0764, "step": 9880 }, { "epoch": 0.78, "learning_rate": 8.05546825895663e-05, "loss": 0.09, "step": 9900 }, { "epoch": 0.78, "learning_rate": 8.051539912005028e-05, "loss": 0.0776, "step": 9920 }, { "epoch": 0.78, "learning_rate": 8.047611565053425e-05, "loss": 0.0777, "step": 9940 }, { "epoch": 0.78, "learning_rate": 8.043683218101823e-05, "loss": 0.0638, "step": 9960 }, { "epoch": 0.78, "learning_rate": 8.03975487115022e-05, "loss": 0.0743, "step": 9980 }, { "epoch": 0.79, "learning_rate": 8.035826524198618e-05, "loss": 0.0742, "step": 10000 }, { "epoch": 0.79, "eval_loss": 0.2080189436674118, "eval_matthews_correlation": 0.4431279927127172, "eval_runtime": 1012.8603, "eval_samples_per_second": 112.587, "eval_steps_per_second": 14.074, "step": 10000 }, { "epoch": 0.79, "learning_rate": 8.031898177247015e-05, "loss": 0.0947, "step": 10020 }, { "epoch": 0.79, "learning_rate": 8.027969830295412e-05, "loss": 0.0826, "step": 10040 }, { "epoch": 0.79, "learning_rate": 8.02404148334381e-05, "loss": 0.0663, "step": 10060 }, { "epoch": 0.79, "learning_rate": 8.020113136392206e-05, "loss": 0.0744, "step": 10080 }, { "epoch": 0.79, "learning_rate": 8.016184789440603e-05, "loss": 0.0758, "step": 10100 }, { "epoch": 0.8, "learning_rate": 8.012256442489001e-05, "loss": 0.0766, "step": 10120 }, { "epoch": 0.8, "learning_rate": 8.008328095537398e-05, "loss": 0.0798, "step": 10140 }, { "epoch": 0.8, "learning_rate": 8.004399748585796e-05, "loss": 0.0881, "step": 10160 }, { "epoch": 0.8, "learning_rate": 8.000471401634193e-05, "loss": 0.0685, "step": 10180 }, { "epoch": 0.8, "learning_rate": 7.99654305468259e-05, "loss": 0.0781, "step": 10200 }, { "epoch": 0.8, "learning_rate": 7.992614707730988e-05, "loss": 0.0835, "step": 10220 }, { "epoch": 0.8, "learning_rate": 7.988686360779385e-05, "loss": 0.0536, "step": 10240 }, { "epoch": 0.81, "learning_rate": 7.984758013827781e-05, "loss": 0.0754, "step": 10260 }, { "epoch": 0.81, "learning_rate": 7.980829666876179e-05, "loss": 0.0763, "step": 10280 }, { "epoch": 0.81, "learning_rate": 7.976901319924576e-05, "loss": 0.0764, "step": 10300 }, { "epoch": 0.81, "learning_rate": 7.972972972972974e-05, "loss": 0.0773, "step": 10320 }, { "epoch": 0.81, "learning_rate": 7.96904462602137e-05, "loss": 0.0879, "step": 10340 }, { "epoch": 0.81, "learning_rate": 7.965116279069767e-05, "loss": 0.0909, "step": 10360 }, { "epoch": 0.82, "learning_rate": 7.961187932118164e-05, "loss": 0.0639, "step": 10380 }, { "epoch": 0.82, "learning_rate": 7.957259585166562e-05, "loss": 0.0665, "step": 10400 }, { "epoch": 0.82, "learning_rate": 7.953331238214959e-05, "loss": 0.0744, "step": 10420 }, { "epoch": 0.82, "learning_rate": 7.949402891263357e-05, "loss": 0.0437, "step": 10440 }, { "epoch": 0.82, "learning_rate": 7.945474544311754e-05, "loss": 0.0861, "step": 10460 }, { "epoch": 0.82, "learning_rate": 7.941546197360152e-05, "loss": 0.0705, "step": 10480 }, { "epoch": 0.82, "learning_rate": 7.937617850408549e-05, "loss": 0.0764, "step": 10500 }, { "epoch": 0.83, "learning_rate": 7.933689503456946e-05, "loss": 0.0796, "step": 10520 }, { "epoch": 0.83, "learning_rate": 7.929761156505344e-05, "loss": 0.0684, "step": 10540 }, { "epoch": 0.83, "learning_rate": 7.92583280955374e-05, "loss": 0.0735, "step": 10560 }, { "epoch": 0.83, "learning_rate": 7.921904462602137e-05, "loss": 0.0897, "step": 10580 }, { "epoch": 0.83, "learning_rate": 7.917976115650535e-05, "loss": 0.0783, "step": 10600 }, { "epoch": 0.83, "learning_rate": 7.914047768698932e-05, "loss": 0.0926, "step": 10620 }, { "epoch": 0.84, "learning_rate": 7.91011942174733e-05, "loss": 0.0751, "step": 10640 }, { "epoch": 0.84, "learning_rate": 7.906191074795727e-05, "loss": 0.0577, "step": 10660 }, { "epoch": 0.84, "learning_rate": 7.902262727844123e-05, "loss": 0.0513, "step": 10680 }, { "epoch": 0.84, "learning_rate": 7.89833438089252e-05, "loss": 0.0767, "step": 10700 }, { "epoch": 0.84, "learning_rate": 7.894406033940918e-05, "loss": 0.0588, "step": 10720 }, { "epoch": 0.84, "learning_rate": 7.890477686989315e-05, "loss": 0.1069, "step": 10740 }, { "epoch": 0.85, "learning_rate": 7.886549340037713e-05, "loss": 0.0657, "step": 10760 }, { "epoch": 0.85, "learning_rate": 7.88262099308611e-05, "loss": 0.083, "step": 10780 }, { "epoch": 0.85, "learning_rate": 7.878692646134507e-05, "loss": 0.0616, "step": 10800 }, { "epoch": 0.85, "learning_rate": 7.874764299182903e-05, "loss": 0.0579, "step": 10820 }, { "epoch": 0.85, "learning_rate": 7.870835952231301e-05, "loss": 0.0699, "step": 10840 }, { "epoch": 0.85, "learning_rate": 7.866907605279698e-05, "loss": 0.0534, "step": 10860 }, { "epoch": 0.85, "learning_rate": 7.862979258328096e-05, "loss": 0.0619, "step": 10880 }, { "epoch": 0.86, "learning_rate": 7.859050911376493e-05, "loss": 0.0707, "step": 10900 }, { "epoch": 0.86, "learning_rate": 7.85512256442489e-05, "loss": 0.0634, "step": 10920 }, { "epoch": 0.86, "learning_rate": 7.851194217473288e-05, "loss": 0.0812, "step": 10940 }, { "epoch": 0.86, "learning_rate": 7.847265870521685e-05, "loss": 0.069, "step": 10960 }, { "epoch": 0.86, "learning_rate": 7.843337523570083e-05, "loss": 0.0694, "step": 10980 }, { "epoch": 0.86, "learning_rate": 7.83940917661848e-05, "loss": 0.0712, "step": 11000 }, { "epoch": 0.87, "learning_rate": 7.835480829666876e-05, "loss": 0.073, "step": 11020 }, { "epoch": 0.87, "learning_rate": 7.831552482715274e-05, "loss": 0.067, "step": 11040 }, { "epoch": 0.87, "learning_rate": 7.827624135763671e-05, "loss": 0.0653, "step": 11060 }, { "epoch": 0.87, "learning_rate": 7.823695788812068e-05, "loss": 0.0884, "step": 11080 }, { "epoch": 0.87, "learning_rate": 7.819767441860465e-05, "loss": 0.0748, "step": 11100 }, { "epoch": 0.87, "learning_rate": 7.815839094908862e-05, "loss": 0.0621, "step": 11120 }, { "epoch": 0.88, "learning_rate": 7.81191074795726e-05, "loss": 0.0952, "step": 11140 }, { "epoch": 0.88, "learning_rate": 7.807982401005657e-05, "loss": 0.0874, "step": 11160 }, { "epoch": 0.88, "learning_rate": 7.804054054054054e-05, "loss": 0.0563, "step": 11180 }, { "epoch": 0.88, "learning_rate": 7.800125707102452e-05, "loss": 0.1007, "step": 11200 }, { "epoch": 0.88, "learning_rate": 7.796197360150849e-05, "loss": 0.0533, "step": 11220 }, { "epoch": 0.88, "learning_rate": 7.792269013199246e-05, "loss": 0.0645, "step": 11240 }, { "epoch": 0.88, "learning_rate": 7.788340666247644e-05, "loss": 0.0708, "step": 11260 }, { "epoch": 0.89, "learning_rate": 7.78441231929604e-05, "loss": 0.0837, "step": 11280 }, { "epoch": 0.89, "learning_rate": 7.780483972344437e-05, "loss": 0.0719, "step": 11300 }, { "epoch": 0.89, "learning_rate": 7.776555625392835e-05, "loss": 0.0616, "step": 11320 }, { "epoch": 0.89, "learning_rate": 7.772627278441232e-05, "loss": 0.066, "step": 11340 }, { "epoch": 0.89, "learning_rate": 7.76869893148963e-05, "loss": 0.0704, "step": 11360 }, { "epoch": 0.89, "learning_rate": 7.764770584538027e-05, "loss": 0.0689, "step": 11380 }, { "epoch": 0.9, "learning_rate": 7.760842237586424e-05, "loss": 0.0424, "step": 11400 }, { "epoch": 0.9, "learning_rate": 7.756913890634822e-05, "loss": 0.0751, "step": 11420 }, { "epoch": 0.9, "learning_rate": 7.752985543683219e-05, "loss": 0.0586, "step": 11440 }, { "epoch": 0.9, "learning_rate": 7.749057196731617e-05, "loss": 0.0747, "step": 11460 }, { "epoch": 0.9, "learning_rate": 7.745128849780014e-05, "loss": 0.0726, "step": 11480 }, { "epoch": 0.9, "learning_rate": 7.74120050282841e-05, "loss": 0.0586, "step": 11500 }, { "epoch": 0.91, "learning_rate": 7.737272155876808e-05, "loss": 0.0715, "step": 11520 }, { "epoch": 0.91, "learning_rate": 7.733343808925204e-05, "loss": 0.0826, "step": 11540 }, { "epoch": 0.91, "learning_rate": 7.729415461973601e-05, "loss": 0.0904, "step": 11560 }, { "epoch": 0.91, "learning_rate": 7.725487115021998e-05, "loss": 0.075, "step": 11580 }, { "epoch": 0.91, "learning_rate": 7.721558768070396e-05, "loss": 0.067, "step": 11600 }, { "epoch": 0.91, "learning_rate": 7.717630421118793e-05, "loss": 0.0746, "step": 11620 }, { "epoch": 0.91, "learning_rate": 7.71370207416719e-05, "loss": 0.0662, "step": 11640 }, { "epoch": 0.92, "learning_rate": 7.709773727215588e-05, "loss": 0.0698, "step": 11660 }, { "epoch": 0.92, "learning_rate": 7.705845380263985e-05, "loss": 0.0948, "step": 11680 }, { "epoch": 0.92, "learning_rate": 7.701917033312383e-05, "loss": 0.0691, "step": 11700 }, { "epoch": 0.92, "learning_rate": 7.69798868636078e-05, "loss": 0.0769, "step": 11720 }, { "epoch": 0.92, "learning_rate": 7.694060339409178e-05, "loss": 0.0791, "step": 11740 }, { "epoch": 0.92, "learning_rate": 7.690131992457574e-05, "loss": 0.0781, "step": 11760 }, { "epoch": 0.93, "learning_rate": 7.686203645505971e-05, "loss": 0.0619, "step": 11780 }, { "epoch": 0.93, "learning_rate": 7.682275298554369e-05, "loss": 0.0796, "step": 11800 }, { "epoch": 0.93, "learning_rate": 7.678346951602766e-05, "loss": 0.0665, "step": 11820 }, { "epoch": 0.93, "learning_rate": 7.674418604651163e-05, "loss": 0.0754, "step": 11840 }, { "epoch": 0.93, "learning_rate": 7.670490257699561e-05, "loss": 0.0687, "step": 11860 }, { "epoch": 0.93, "learning_rate": 7.666561910747958e-05, "loss": 0.0681, "step": 11880 }, { "epoch": 0.93, "learning_rate": 7.662633563796356e-05, "loss": 0.0644, "step": 11900 }, { "epoch": 0.94, "learning_rate": 7.658705216844753e-05, "loss": 0.0562, "step": 11920 }, { "epoch": 0.94, "learning_rate": 7.654776869893149e-05, "loss": 0.0798, "step": 11940 }, { "epoch": 0.94, "learning_rate": 7.650848522941547e-05, "loss": 0.0822, "step": 11960 }, { "epoch": 0.94, "learning_rate": 7.646920175989944e-05, "loss": 0.0589, "step": 11980 }, { "epoch": 0.94, "learning_rate": 7.64299182903834e-05, "loss": 0.0898, "step": 12000 }, { "epoch": 0.94, "learning_rate": 7.639063482086737e-05, "loss": 0.0639, "step": 12020 }, { "epoch": 0.95, "learning_rate": 7.635135135135135e-05, "loss": 0.0683, "step": 12040 }, { "epoch": 0.95, "learning_rate": 7.631206788183532e-05, "loss": 0.0699, "step": 12060 }, { "epoch": 0.95, "learning_rate": 7.62727844123193e-05, "loss": 0.0837, "step": 12080 }, { "epoch": 0.95, "learning_rate": 7.623350094280327e-05, "loss": 0.071, "step": 12100 }, { "epoch": 0.95, "learning_rate": 7.619421747328724e-05, "loss": 0.0753, "step": 12120 }, { "epoch": 0.95, "learning_rate": 7.615493400377122e-05, "loss": 0.0767, "step": 12140 }, { "epoch": 0.96, "learning_rate": 7.611565053425519e-05, "loss": 0.063, "step": 12160 }, { "epoch": 0.96, "learning_rate": 7.607636706473917e-05, "loss": 0.0517, "step": 12180 }, { "epoch": 0.96, "learning_rate": 7.603708359522314e-05, "loss": 0.0788, "step": 12200 }, { "epoch": 0.96, "learning_rate": 7.59978001257071e-05, "loss": 0.0771, "step": 12220 }, { "epoch": 0.96, "learning_rate": 7.595851665619108e-05, "loss": 0.0739, "step": 12240 }, { "epoch": 0.96, "learning_rate": 7.591923318667505e-05, "loss": 0.0723, "step": 12260 }, { "epoch": 0.96, "learning_rate": 7.587994971715902e-05, "loss": 0.0836, "step": 12280 }, { "epoch": 0.97, "learning_rate": 7.5840666247643e-05, "loss": 0.0692, "step": 12300 }, { "epoch": 0.97, "learning_rate": 7.580138277812697e-05, "loss": 0.092, "step": 12320 }, { "epoch": 0.97, "learning_rate": 7.576209930861095e-05, "loss": 0.0529, "step": 12340 }, { "epoch": 0.97, "learning_rate": 7.572281583909491e-05, "loss": 0.0779, "step": 12360 }, { "epoch": 0.97, "learning_rate": 7.568353236957888e-05, "loss": 0.0698, "step": 12380 }, { "epoch": 0.97, "learning_rate": 7.564424890006286e-05, "loss": 0.0589, "step": 12400 }, { "epoch": 0.98, "learning_rate": 7.560496543054683e-05, "loss": 0.0627, "step": 12420 }, { "epoch": 0.98, "learning_rate": 7.55656819610308e-05, "loss": 0.0716, "step": 12440 }, { "epoch": 0.98, "learning_rate": 7.552639849151478e-05, "loss": 0.104, "step": 12460 }, { "epoch": 0.98, "learning_rate": 7.548711502199874e-05, "loss": 0.0549, "step": 12480 }, { "epoch": 0.98, "learning_rate": 7.544783155248271e-05, "loss": 0.0776, "step": 12500 }, { "epoch": 0.98, "learning_rate": 7.540854808296669e-05, "loss": 0.0648, "step": 12520 }, { "epoch": 0.99, "learning_rate": 7.536926461345066e-05, "loss": 0.0787, "step": 12540 }, { "epoch": 0.99, "learning_rate": 7.532998114393464e-05, "loss": 0.0843, "step": 12560 }, { "epoch": 0.99, "learning_rate": 7.529069767441861e-05, "loss": 0.096, "step": 12580 }, { "epoch": 0.99, "learning_rate": 7.525141420490258e-05, "loss": 0.0638, "step": 12600 }, { "epoch": 0.99, "learning_rate": 7.521213073538656e-05, "loss": 0.0684, "step": 12620 }, { "epoch": 0.99, "learning_rate": 7.517284726587053e-05, "loss": 0.0721, "step": 12640 }, { "epoch": 0.99, "learning_rate": 7.51335637963545e-05, "loss": 0.0799, "step": 12660 }, { "epoch": 1.0, "learning_rate": 7.509428032683848e-05, "loss": 0.0623, "step": 12680 }, { "epoch": 1.0, "learning_rate": 7.505499685732244e-05, "loss": 0.0698, "step": 12700 }, { "epoch": 1.0, "learning_rate": 7.501571338780641e-05, "loss": 0.0696, "step": 12720 }, { "epoch": 1.0, "learning_rate": 7.497642991829039e-05, "loss": 0.0566, "step": 12740 }, { "epoch": 1.0, "learning_rate": 7.493714644877436e-05, "loss": 0.0518, "step": 12760 }, { "epoch": 1.0, "learning_rate": 7.489786297925832e-05, "loss": 0.0627, "step": 12780 }, { "epoch": 1.01, "learning_rate": 7.48585795097423e-05, "loss": 0.0601, "step": 12800 }, { "epoch": 1.01, "learning_rate": 7.481929604022627e-05, "loss": 0.0592, "step": 12820 }, { "epoch": 1.01, "learning_rate": 7.478001257071025e-05, "loss": 0.0696, "step": 12840 }, { "epoch": 1.01, "learning_rate": 7.474072910119422e-05, "loss": 0.0443, "step": 12860 }, { "epoch": 1.01, "learning_rate": 7.47014456316782e-05, "loss": 0.0607, "step": 12880 }, { "epoch": 1.01, "learning_rate": 7.466216216216217e-05, "loss": 0.072, "step": 12900 }, { "epoch": 1.02, "learning_rate": 7.462287869264614e-05, "loss": 0.0662, "step": 12920 }, { "epoch": 1.02, "learning_rate": 7.45835952231301e-05, "loss": 0.0537, "step": 12940 }, { "epoch": 1.02, "learning_rate": 7.454431175361408e-05, "loss": 0.0514, "step": 12960 }, { "epoch": 1.02, "learning_rate": 7.450502828409805e-05, "loss": 0.0545, "step": 12980 }, { "epoch": 1.02, "learning_rate": 7.446574481458203e-05, "loss": 0.0736, "step": 13000 }, { "epoch": 1.02, "learning_rate": 7.4426461345066e-05, "loss": 0.0545, "step": 13020 }, { "epoch": 1.02, "learning_rate": 7.438717787554997e-05, "loss": 0.0607, "step": 13040 }, { "epoch": 1.03, "learning_rate": 7.434789440603395e-05, "loss": 0.0631, "step": 13060 }, { "epoch": 1.03, "learning_rate": 7.430861093651792e-05, "loss": 0.0538, "step": 13080 }, { "epoch": 1.03, "learning_rate": 7.42693274670019e-05, "loss": 0.0624, "step": 13100 }, { "epoch": 1.03, "learning_rate": 7.423004399748587e-05, "loss": 0.0466, "step": 13120 }, { "epoch": 1.03, "learning_rate": 7.419076052796984e-05, "loss": 0.045, "step": 13140 }, { "epoch": 1.03, "learning_rate": 7.41514770584538e-05, "loss": 0.0612, "step": 13160 }, { "epoch": 1.04, "learning_rate": 7.411219358893778e-05, "loss": 0.0662, "step": 13180 }, { "epoch": 1.04, "learning_rate": 7.407291011942174e-05, "loss": 0.0596, "step": 13200 }, { "epoch": 1.04, "learning_rate": 7.403362664990571e-05, "loss": 0.0645, "step": 13220 }, { "epoch": 1.04, "learning_rate": 7.399434318038969e-05, "loss": 0.0729, "step": 13240 }, { "epoch": 1.04, "learning_rate": 7.395505971087366e-05, "loss": 0.0659, "step": 13260 }, { "epoch": 1.04, "learning_rate": 7.391577624135764e-05, "loss": 0.0538, "step": 13280 }, { "epoch": 1.04, "learning_rate": 7.387649277184161e-05, "loss": 0.0647, "step": 13300 }, { "epoch": 1.05, "learning_rate": 7.383720930232558e-05, "loss": 0.0588, "step": 13320 }, { "epoch": 1.05, "learning_rate": 7.379792583280956e-05, "loss": 0.0585, "step": 13340 }, { "epoch": 1.05, "learning_rate": 7.375864236329353e-05, "loss": 0.0385, "step": 13360 }, { "epoch": 1.05, "learning_rate": 7.371935889377751e-05, "loss": 0.0498, "step": 13380 }, { "epoch": 1.05, "learning_rate": 7.368007542426148e-05, "loss": 0.0633, "step": 13400 }, { "epoch": 1.05, "learning_rate": 7.364079195474544e-05, "loss": 0.049, "step": 13420 }, { "epoch": 1.06, "learning_rate": 7.360150848522942e-05, "loss": 0.065, "step": 13440 }, { "epoch": 1.06, "learning_rate": 7.356222501571339e-05, "loss": 0.0637, "step": 13460 }, { "epoch": 1.06, "learning_rate": 7.352294154619736e-05, "loss": 0.0655, "step": 13480 }, { "epoch": 1.06, "learning_rate": 7.348365807668134e-05, "loss": 0.0654, "step": 13500 }, { "epoch": 1.06, "learning_rate": 7.344437460716531e-05, "loss": 0.0708, "step": 13520 }, { "epoch": 1.06, "learning_rate": 7.340509113764929e-05, "loss": 0.0582, "step": 13540 }, { "epoch": 1.07, "learning_rate": 7.336580766813326e-05, "loss": 0.0685, "step": 13560 }, { "epoch": 1.07, "learning_rate": 7.332652419861723e-05, "loss": 0.0643, "step": 13580 }, { "epoch": 1.07, "learning_rate": 7.328724072910121e-05, "loss": 0.0395, "step": 13600 }, { "epoch": 1.07, "learning_rate": 7.324795725958517e-05, "loss": 0.082, "step": 13620 }, { "epoch": 1.07, "learning_rate": 7.320867379006914e-05, "loss": 0.0559, "step": 13640 }, { "epoch": 1.07, "learning_rate": 7.31693903205531e-05, "loss": 0.0613, "step": 13660 }, { "epoch": 1.07, "learning_rate": 7.313010685103708e-05, "loss": 0.0639, "step": 13680 }, { "epoch": 1.08, "learning_rate": 7.309082338152105e-05, "loss": 0.0436, "step": 13700 }, { "epoch": 1.08, "learning_rate": 7.305153991200503e-05, "loss": 0.0696, "step": 13720 }, { "epoch": 1.08, "learning_rate": 7.3012256442489e-05, "loss": 0.0711, "step": 13740 }, { "epoch": 1.08, "learning_rate": 7.297297297297297e-05, "loss": 0.0705, "step": 13760 }, { "epoch": 1.08, "learning_rate": 7.293368950345695e-05, "loss": 0.0482, "step": 13780 }, { "epoch": 1.08, "learning_rate": 7.289440603394092e-05, "loss": 0.0578, "step": 13800 }, { "epoch": 1.09, "learning_rate": 7.28551225644249e-05, "loss": 0.0628, "step": 13820 }, { "epoch": 1.09, "learning_rate": 7.281583909490887e-05, "loss": 0.0645, "step": 13840 }, { "epoch": 1.09, "learning_rate": 7.277655562539285e-05, "loss": 0.0581, "step": 13860 }, { "epoch": 1.09, "learning_rate": 7.27372721558768e-05, "loss": 0.0771, "step": 13880 }, { "epoch": 1.09, "learning_rate": 7.269798868636078e-05, "loss": 0.0557, "step": 13900 }, { "epoch": 1.09, "learning_rate": 7.265870521684475e-05, "loss": 0.0615, "step": 13920 }, { "epoch": 1.1, "learning_rate": 7.261942174732873e-05, "loss": 0.0509, "step": 13940 }, { "epoch": 1.1, "learning_rate": 7.25801382778127e-05, "loss": 0.0628, "step": 13960 }, { "epoch": 1.1, "learning_rate": 7.254085480829668e-05, "loss": 0.0559, "step": 13980 }, { "epoch": 1.1, "learning_rate": 7.250157133878065e-05, "loss": 0.079, "step": 14000 }, { "epoch": 1.1, "learning_rate": 7.246228786926462e-05, "loss": 0.0626, "step": 14020 }, { "epoch": 1.1, "learning_rate": 7.242300439974859e-05, "loss": 0.0687, "step": 14040 }, { "epoch": 1.1, "learning_rate": 7.238372093023256e-05, "loss": 0.094, "step": 14060 }, { "epoch": 1.11, "learning_rate": 7.234443746071653e-05, "loss": 0.0507, "step": 14080 }, { "epoch": 1.11, "learning_rate": 7.230515399120051e-05, "loss": 0.0539, "step": 14100 }, { "epoch": 1.11, "learning_rate": 7.226587052168448e-05, "loss": 0.0644, "step": 14120 }, { "epoch": 1.11, "learning_rate": 7.222658705216844e-05, "loss": 0.0667, "step": 14140 }, { "epoch": 1.11, "learning_rate": 7.218730358265242e-05, "loss": 0.0618, "step": 14160 }, { "epoch": 1.11, "learning_rate": 7.214802011313639e-05, "loss": 0.0633, "step": 14180 }, { "epoch": 1.12, "learning_rate": 7.210873664362036e-05, "loss": 0.056, "step": 14200 }, { "epoch": 1.12, "learning_rate": 7.206945317410434e-05, "loss": 0.0577, "step": 14220 }, { "epoch": 1.12, "learning_rate": 7.203016970458831e-05, "loss": 0.046, "step": 14240 }, { "epoch": 1.12, "learning_rate": 7.199088623507229e-05, "loss": 0.0399, "step": 14260 }, { "epoch": 1.12, "learning_rate": 7.195160276555626e-05, "loss": 0.0589, "step": 14280 }, { "epoch": 1.12, "learning_rate": 7.191231929604024e-05, "loss": 0.0551, "step": 14300 }, { "epoch": 1.13, "learning_rate": 7.187303582652421e-05, "loss": 0.0619, "step": 14320 }, { "epoch": 1.13, "learning_rate": 7.183375235700818e-05, "loss": 0.0752, "step": 14340 }, { "epoch": 1.13, "learning_rate": 7.179446888749214e-05, "loss": 0.0676, "step": 14360 }, { "epoch": 1.13, "learning_rate": 7.175518541797612e-05, "loss": 0.0563, "step": 14380 }, { "epoch": 1.13, "learning_rate": 7.171590194846009e-05, "loss": 0.0614, "step": 14400 }, { "epoch": 1.13, "learning_rate": 7.167661847894407e-05, "loss": 0.055, "step": 14420 }, { "epoch": 1.13, "learning_rate": 7.163733500942804e-05, "loss": 0.0586, "step": 14440 }, { "epoch": 1.14, "learning_rate": 7.1598051539912e-05, "loss": 0.0744, "step": 14460 }, { "epoch": 1.14, "learning_rate": 7.155876807039598e-05, "loss": 0.0554, "step": 14480 }, { "epoch": 1.14, "learning_rate": 7.151948460087995e-05, "loss": 0.0541, "step": 14500 }, { "epoch": 1.14, "learning_rate": 7.148020113136392e-05, "loss": 0.0574, "step": 14520 }, { "epoch": 1.14, "learning_rate": 7.14409176618479e-05, "loss": 0.0564, "step": 14540 }, { "epoch": 1.14, "learning_rate": 7.140163419233187e-05, "loss": 0.0585, "step": 14560 }, { "epoch": 1.15, "learning_rate": 7.136235072281585e-05, "loss": 0.0488, "step": 14580 }, { "epoch": 1.15, "learning_rate": 7.132306725329981e-05, "loss": 0.0597, "step": 14600 }, { "epoch": 1.15, "learning_rate": 7.128378378378378e-05, "loss": 0.0542, "step": 14620 }, { "epoch": 1.15, "learning_rate": 7.124450031426776e-05, "loss": 0.0581, "step": 14640 }, { "epoch": 1.15, "learning_rate": 7.120521684475173e-05, "loss": 0.0495, "step": 14660 }, { "epoch": 1.15, "learning_rate": 7.11659333752357e-05, "loss": 0.0529, "step": 14680 }, { "epoch": 1.15, "learning_rate": 7.112664990571968e-05, "loss": 0.065, "step": 14700 }, { "epoch": 1.16, "learning_rate": 7.108736643620365e-05, "loss": 0.0705, "step": 14720 }, { "epoch": 1.16, "learning_rate": 7.104808296668763e-05, "loss": 0.0447, "step": 14740 }, { "epoch": 1.16, "learning_rate": 7.10087994971716e-05, "loss": 0.0557, "step": 14760 }, { "epoch": 1.16, "learning_rate": 7.096951602765557e-05, "loss": 0.0584, "step": 14780 }, { "epoch": 1.16, "learning_rate": 7.093023255813955e-05, "loss": 0.0741, "step": 14800 }, { "epoch": 1.16, "learning_rate": 7.089094908862351e-05, "loss": 0.0618, "step": 14820 }, { "epoch": 1.17, "learning_rate": 7.085166561910748e-05, "loss": 0.0776, "step": 14840 }, { "epoch": 1.17, "learning_rate": 7.081238214959146e-05, "loss": 0.0669, "step": 14860 }, { "epoch": 1.17, "learning_rate": 7.077309868007542e-05, "loss": 0.0652, "step": 14880 }, { "epoch": 1.17, "learning_rate": 7.073381521055939e-05, "loss": 0.0544, "step": 14900 }, { "epoch": 1.17, "learning_rate": 7.069453174104337e-05, "loss": 0.0497, "step": 14920 }, { "epoch": 1.17, "learning_rate": 7.065524827152734e-05, "loss": 0.0582, "step": 14940 }, { "epoch": 1.18, "learning_rate": 7.061596480201131e-05, "loss": 0.0872, "step": 14960 }, { "epoch": 1.18, "learning_rate": 7.057668133249529e-05, "loss": 0.0624, "step": 14980 }, { "epoch": 1.18, "learning_rate": 7.053739786297926e-05, "loss": 0.0431, "step": 15000 }, { "epoch": 1.18, "learning_rate": 7.049811439346324e-05, "loss": 0.0479, "step": 15020 }, { "epoch": 1.18, "learning_rate": 7.045883092394721e-05, "loss": 0.0523, "step": 15040 }, { "epoch": 1.18, "learning_rate": 7.041954745443118e-05, "loss": 0.0564, "step": 15060 }, { "epoch": 1.18, "learning_rate": 7.038026398491515e-05, "loss": 0.0404, "step": 15080 }, { "epoch": 1.19, "learning_rate": 7.034098051539912e-05, "loss": 0.0698, "step": 15100 }, { "epoch": 1.19, "learning_rate": 7.03016970458831e-05, "loss": 0.0424, "step": 15120 }, { "epoch": 1.19, "learning_rate": 7.026241357636707e-05, "loss": 0.0537, "step": 15140 }, { "epoch": 1.19, "learning_rate": 7.022313010685104e-05, "loss": 0.0684, "step": 15160 }, { "epoch": 1.19, "learning_rate": 7.018384663733502e-05, "loss": 0.066, "step": 15180 }, { "epoch": 1.19, "learning_rate": 7.014456316781899e-05, "loss": 0.0657, "step": 15200 }, { "epoch": 1.2, "learning_rate": 7.010527969830296e-05, "loss": 0.0505, "step": 15220 }, { "epoch": 1.2, "learning_rate": 7.006599622878694e-05, "loss": 0.0641, "step": 15240 }, { "epoch": 1.2, "learning_rate": 7.002671275927091e-05, "loss": 0.0615, "step": 15260 }, { "epoch": 1.2, "learning_rate": 6.998742928975489e-05, "loss": 0.0517, "step": 15280 }, { "epoch": 1.2, "learning_rate": 6.994814582023885e-05, "loss": 0.0779, "step": 15300 }, { "epoch": 1.2, "learning_rate": 6.990886235072281e-05, "loss": 0.0544, "step": 15320 }, { "epoch": 1.21, "learning_rate": 6.986957888120678e-05, "loss": 0.0665, "step": 15340 }, { "epoch": 1.21, "learning_rate": 6.983029541169076e-05, "loss": 0.0627, "step": 15360 }, { "epoch": 1.21, "learning_rate": 6.979101194217473e-05, "loss": 0.0615, "step": 15380 }, { "epoch": 1.21, "learning_rate": 6.97517284726587e-05, "loss": 0.0391, "step": 15400 }, { "epoch": 1.21, "learning_rate": 6.971244500314268e-05, "loss": 0.0582, "step": 15420 }, { "epoch": 1.21, "learning_rate": 6.967316153362665e-05, "loss": 0.0672, "step": 15440 }, { "epoch": 1.21, "learning_rate": 6.963387806411063e-05, "loss": 0.0498, "step": 15460 }, { "epoch": 1.22, "learning_rate": 6.95945945945946e-05, "loss": 0.0558, "step": 15480 }, { "epoch": 1.22, "learning_rate": 6.955531112507858e-05, "loss": 0.0436, "step": 15500 }, { "epoch": 1.22, "learning_rate": 6.951602765556255e-05, "loss": 0.0482, "step": 15520 }, { "epoch": 1.22, "learning_rate": 6.947674418604651e-05, "loss": 0.0507, "step": 15540 }, { "epoch": 1.22, "learning_rate": 6.943746071653048e-05, "loss": 0.0696, "step": 15560 }, { "epoch": 1.22, "learning_rate": 6.939817724701446e-05, "loss": 0.0516, "step": 15580 }, { "epoch": 1.23, "learning_rate": 6.935889377749843e-05, "loss": 0.0532, "step": 15600 }, { "epoch": 1.23, "learning_rate": 6.93196103079824e-05, "loss": 0.0521, "step": 15620 }, { "epoch": 1.23, "learning_rate": 6.928032683846638e-05, "loss": 0.0565, "step": 15640 }, { "epoch": 1.23, "learning_rate": 6.924104336895035e-05, "loss": 0.0619, "step": 15660 }, { "epoch": 1.23, "learning_rate": 6.920175989943433e-05, "loss": 0.0699, "step": 15680 }, { "epoch": 1.23, "learning_rate": 6.91624764299183e-05, "loss": 0.0776, "step": 15700 }, { "epoch": 1.24, "learning_rate": 6.912319296040226e-05, "loss": 0.0447, "step": 15720 }, { "epoch": 1.24, "learning_rate": 6.908390949088624e-05, "loss": 0.0571, "step": 15740 }, { "epoch": 1.24, "learning_rate": 6.904462602137021e-05, "loss": 0.0617, "step": 15760 }, { "epoch": 1.24, "learning_rate": 6.900534255185419e-05, "loss": 0.0736, "step": 15780 }, { "epoch": 1.24, "learning_rate": 6.896605908233815e-05, "loss": 0.0625, "step": 15800 }, { "epoch": 1.24, "learning_rate": 6.892677561282212e-05, "loss": 0.0563, "step": 15820 }, { "epoch": 1.24, "learning_rate": 6.88874921433061e-05, "loss": 0.0553, "step": 15840 }, { "epoch": 1.25, "learning_rate": 6.884820867379007e-05, "loss": 0.0748, "step": 15860 }, { "epoch": 1.25, "learning_rate": 6.880892520427404e-05, "loss": 0.0689, "step": 15880 }, { "epoch": 1.25, "learning_rate": 6.876964173475802e-05, "loss": 0.0533, "step": 15900 }, { "epoch": 1.25, "learning_rate": 6.873035826524199e-05, "loss": 0.037, "step": 15920 }, { "epoch": 1.25, "learning_rate": 6.869107479572597e-05, "loss": 0.0535, "step": 15940 }, { "epoch": 1.25, "learning_rate": 6.865179132620994e-05, "loss": 0.066, "step": 15960 }, { "epoch": 1.26, "learning_rate": 6.861250785669391e-05, "loss": 0.057, "step": 15980 }, { "epoch": 1.26, "learning_rate": 6.857322438717789e-05, "loss": 0.0509, "step": 16000 }, { "epoch": 1.26, "learning_rate": 6.853394091766185e-05, "loss": 0.0382, "step": 16020 }, { "epoch": 1.26, "learning_rate": 6.849465744814582e-05, "loss": 0.0648, "step": 16040 }, { "epoch": 1.26, "learning_rate": 6.84553739786298e-05, "loss": 0.0745, "step": 16060 }, { "epoch": 1.26, "learning_rate": 6.841609050911377e-05, "loss": 0.0571, "step": 16080 }, { "epoch": 1.26, "learning_rate": 6.837680703959774e-05, "loss": 0.0625, "step": 16100 }, { "epoch": 1.27, "learning_rate": 6.83375235700817e-05, "loss": 0.0508, "step": 16120 }, { "epoch": 1.27, "learning_rate": 6.829824010056568e-05, "loss": 0.0583, "step": 16140 }, { "epoch": 1.27, "learning_rate": 6.825895663104965e-05, "loss": 0.0501, "step": 16160 }, { "epoch": 1.27, "learning_rate": 6.821967316153363e-05, "loss": 0.0573, "step": 16180 }, { "epoch": 1.27, "learning_rate": 6.81803896920176e-05, "loss": 0.0576, "step": 16200 }, { "epoch": 1.27, "learning_rate": 6.814110622250158e-05, "loss": 0.0658, "step": 16220 }, { "epoch": 1.28, "learning_rate": 6.810182275298555e-05, "loss": 0.0724, "step": 16240 }, { "epoch": 1.28, "learning_rate": 6.806253928346951e-05, "loss": 0.0524, "step": 16260 }, { "epoch": 1.28, "learning_rate": 6.802325581395348e-05, "loss": 0.0554, "step": 16280 }, { "epoch": 1.28, "learning_rate": 6.798397234443746e-05, "loss": 0.0498, "step": 16300 }, { "epoch": 1.28, "learning_rate": 6.794468887492143e-05, "loss": 0.067, "step": 16320 }, { "epoch": 1.28, "learning_rate": 6.790540540540541e-05, "loss": 0.0833, "step": 16340 }, { "epoch": 1.29, "learning_rate": 6.786612193588938e-05, "loss": 0.053, "step": 16360 }, { "epoch": 1.29, "learning_rate": 6.782683846637336e-05, "loss": 0.0613, "step": 16380 }, { "epoch": 1.29, "learning_rate": 6.778755499685733e-05, "loss": 0.0493, "step": 16400 }, { "epoch": 1.29, "learning_rate": 6.77482715273413e-05, "loss": 0.0515, "step": 16420 }, { "epoch": 1.29, "learning_rate": 6.770898805782528e-05, "loss": 0.0507, "step": 16440 }, { "epoch": 1.29, "learning_rate": 6.766970458830925e-05, "loss": 0.0466, "step": 16460 }, { "epoch": 1.29, "learning_rate": 6.763042111879321e-05, "loss": 0.0512, "step": 16480 }, { "epoch": 1.3, "learning_rate": 6.759113764927719e-05, "loss": 0.0464, "step": 16500 }, { "epoch": 1.3, "learning_rate": 6.755185417976116e-05, "loss": 0.0555, "step": 16520 }, { "epoch": 1.3, "learning_rate": 6.751257071024512e-05, "loss": 0.0468, "step": 16540 }, { "epoch": 1.3, "learning_rate": 6.74732872407291e-05, "loss": 0.0512, "step": 16560 }, { "epoch": 1.3, "learning_rate": 6.743400377121307e-05, "loss": 0.0506, "step": 16580 }, { "epoch": 1.3, "learning_rate": 6.739472030169704e-05, "loss": 0.0583, "step": 16600 }, { "epoch": 1.31, "learning_rate": 6.735543683218102e-05, "loss": 0.0324, "step": 16620 }, { "epoch": 1.31, "learning_rate": 6.731615336266499e-05, "loss": 0.0818, "step": 16640 }, { "epoch": 1.31, "learning_rate": 6.727686989314897e-05, "loss": 0.0672, "step": 16660 }, { "epoch": 1.31, "learning_rate": 6.723758642363294e-05, "loss": 0.0457, "step": 16680 }, { "epoch": 1.31, "learning_rate": 6.719830295411691e-05, "loss": 0.0606, "step": 16700 }, { "epoch": 1.31, "learning_rate": 6.715901948460089e-05, "loss": 0.0487, "step": 16720 }, { "epoch": 1.32, "learning_rate": 6.711973601508485e-05, "loss": 0.0684, "step": 16740 }, { "epoch": 1.32, "learning_rate": 6.708045254556882e-05, "loss": 0.0341, "step": 16760 }, { "epoch": 1.32, "learning_rate": 6.70411690760528e-05, "loss": 0.0771, "step": 16780 }, { "epoch": 1.32, "learning_rate": 6.700188560653677e-05, "loss": 0.0573, "step": 16800 }, { "epoch": 1.32, "learning_rate": 6.696260213702075e-05, "loss": 0.0626, "step": 16820 }, { "epoch": 1.32, "learning_rate": 6.692331866750472e-05, "loss": 0.0549, "step": 16840 }, { "epoch": 1.32, "learning_rate": 6.68840351979887e-05, "loss": 0.0514, "step": 16860 }, { "epoch": 1.33, "learning_rate": 6.684475172847267e-05, "loss": 0.0522, "step": 16880 }, { "epoch": 1.33, "learning_rate": 6.680546825895664e-05, "loss": 0.0686, "step": 16900 }, { "epoch": 1.33, "learning_rate": 6.676618478944062e-05, "loss": 0.0778, "step": 16920 }, { "epoch": 1.33, "learning_rate": 6.672690131992459e-05, "loss": 0.0582, "step": 16940 }, { "epoch": 1.33, "learning_rate": 6.668761785040855e-05, "loss": 0.0578, "step": 16960 }, { "epoch": 1.33, "learning_rate": 6.664833438089253e-05, "loss": 0.055, "step": 16980 }, { "epoch": 1.34, "learning_rate": 6.660905091137649e-05, "loss": 0.0623, "step": 17000 }, { "epoch": 1.34, "learning_rate": 6.656976744186046e-05, "loss": 0.0589, "step": 17020 }, { "epoch": 1.34, "learning_rate": 6.653048397234443e-05, "loss": 0.0728, "step": 17040 }, { "epoch": 1.34, "learning_rate": 6.649120050282841e-05, "loss": 0.0522, "step": 17060 }, { "epoch": 1.34, "learning_rate": 6.645191703331238e-05, "loss": 0.0482, "step": 17080 }, { "epoch": 1.34, "learning_rate": 6.641263356379636e-05, "loss": 0.0629, "step": 17100 }, { "epoch": 1.35, "learning_rate": 6.637335009428033e-05, "loss": 0.0395, "step": 17120 }, { "epoch": 1.35, "learning_rate": 6.63340666247643e-05, "loss": 0.0435, "step": 17140 }, { "epoch": 1.35, "learning_rate": 6.629478315524828e-05, "loss": 0.0593, "step": 17160 }, { "epoch": 1.35, "learning_rate": 6.625549968573225e-05, "loss": 0.0463, "step": 17180 }, { "epoch": 1.35, "learning_rate": 6.621621621621621e-05, "loss": 0.0622, "step": 17200 }, { "epoch": 1.35, "learning_rate": 6.617693274670019e-05, "loss": 0.0585, "step": 17220 }, { "epoch": 1.35, "learning_rate": 6.613764927718416e-05, "loss": 0.0796, "step": 17240 }, { "epoch": 1.36, "learning_rate": 6.609836580766814e-05, "loss": 0.0409, "step": 17260 }, { "epoch": 1.36, "learning_rate": 6.605908233815211e-05, "loss": 0.0545, "step": 17280 }, { "epoch": 1.36, "learning_rate": 6.601979886863608e-05, "loss": 0.0562, "step": 17300 }, { "epoch": 1.36, "learning_rate": 6.598051539912006e-05, "loss": 0.0489, "step": 17320 }, { "epoch": 1.36, "learning_rate": 6.594123192960403e-05, "loss": 0.0609, "step": 17340 }, { "epoch": 1.36, "learning_rate": 6.5901948460088e-05, "loss": 0.0507, "step": 17360 }, { "epoch": 1.37, "learning_rate": 6.586266499057197e-05, "loss": 0.0419, "step": 17380 }, { "epoch": 1.37, "learning_rate": 6.582338152105594e-05, "loss": 0.0823, "step": 17400 }, { "epoch": 1.37, "learning_rate": 6.578409805153992e-05, "loss": 0.0512, "step": 17420 }, { "epoch": 1.37, "learning_rate": 6.574481458202389e-05, "loss": 0.058, "step": 17440 }, { "epoch": 1.37, "learning_rate": 6.570553111250785e-05, "loss": 0.0545, "step": 17460 }, { "epoch": 1.37, "learning_rate": 6.566624764299182e-05, "loss": 0.0509, "step": 17480 }, { "epoch": 1.37, "learning_rate": 6.56269641734758e-05, "loss": 0.0694, "step": 17500 }, { "epoch": 1.38, "learning_rate": 6.558768070395977e-05, "loss": 0.0678, "step": 17520 }, { "epoch": 1.38, "learning_rate": 6.554839723444375e-05, "loss": 0.0639, "step": 17540 }, { "epoch": 1.38, "learning_rate": 6.550911376492772e-05, "loss": 0.0505, "step": 17560 }, { "epoch": 1.38, "learning_rate": 6.54698302954117e-05, "loss": 0.0558, "step": 17580 }, { "epoch": 1.38, "learning_rate": 6.543054682589567e-05, "loss": 0.0495, "step": 17600 }, { "epoch": 1.38, "learning_rate": 6.539126335637964e-05, "loss": 0.0555, "step": 17620 }, { "epoch": 1.39, "learning_rate": 6.535197988686362e-05, "loss": 0.0496, "step": 17640 }, { "epoch": 1.39, "learning_rate": 6.531269641734759e-05, "loss": 0.0434, "step": 17660 }, { "epoch": 1.39, "learning_rate": 6.527341294783155e-05, "loss": 0.0493, "step": 17680 }, { "epoch": 1.39, "learning_rate": 6.523412947831553e-05, "loss": 0.068, "step": 17700 }, { "epoch": 1.39, "learning_rate": 6.51948460087995e-05, "loss": 0.0613, "step": 17720 }, { "epoch": 1.39, "learning_rate": 6.515556253928347e-05, "loss": 0.0522, "step": 17740 }, { "epoch": 1.4, "learning_rate": 6.511627906976745e-05, "loss": 0.0472, "step": 17760 }, { "epoch": 1.4, "learning_rate": 6.507699560025142e-05, "loss": 0.049, "step": 17780 }, { "epoch": 1.4, "learning_rate": 6.503771213073538e-05, "loss": 0.0507, "step": 17800 }, { "epoch": 1.4, "learning_rate": 6.499842866121936e-05, "loss": 0.0482, "step": 17820 }, { "epoch": 1.4, "learning_rate": 6.495914519170333e-05, "loss": 0.0467, "step": 17840 }, { "epoch": 1.4, "learning_rate": 6.49198617221873e-05, "loss": 0.0564, "step": 17860 }, { "epoch": 1.4, "learning_rate": 6.488057825267128e-05, "loss": 0.0659, "step": 17880 }, { "epoch": 1.41, "learning_rate": 6.484129478315525e-05, "loss": 0.0472, "step": 17900 }, { "epoch": 1.41, "learning_rate": 6.480201131363923e-05, "loss": 0.0357, "step": 17920 }, { "epoch": 1.41, "learning_rate": 6.476272784412319e-05, "loss": 0.0471, "step": 17940 }, { "epoch": 1.41, "learning_rate": 6.472344437460716e-05, "loss": 0.0393, "step": 17960 }, { "epoch": 1.41, "learning_rate": 6.468416090509114e-05, "loss": 0.0425, "step": 17980 }, { "epoch": 1.41, "learning_rate": 6.464487743557511e-05, "loss": 0.0509, "step": 18000 }, { "epoch": 1.42, "learning_rate": 6.460559396605909e-05, "loss": 0.0578, "step": 18020 }, { "epoch": 1.42, "learning_rate": 6.456631049654306e-05, "loss": 0.0696, "step": 18040 }, { "epoch": 1.42, "learning_rate": 6.452702702702703e-05, "loss": 0.0547, "step": 18060 }, { "epoch": 1.42, "learning_rate": 6.448774355751101e-05, "loss": 0.0521, "step": 18080 }, { "epoch": 1.42, "learning_rate": 6.444846008799498e-05, "loss": 0.0633, "step": 18100 }, { "epoch": 1.42, "learning_rate": 6.440917661847896e-05, "loss": 0.0594, "step": 18120 }, { "epoch": 1.43, "learning_rate": 6.436989314896292e-05, "loss": 0.0652, "step": 18140 }, { "epoch": 1.43, "learning_rate": 6.433060967944689e-05, "loss": 0.0699, "step": 18160 }, { "epoch": 1.43, "learning_rate": 6.429132620993086e-05, "loss": 0.0598, "step": 18180 }, { "epoch": 1.43, "learning_rate": 6.425204274041484e-05, "loss": 0.0369, "step": 18200 }, { "epoch": 1.43, "learning_rate": 6.42127592708988e-05, "loss": 0.0637, "step": 18220 }, { "epoch": 1.43, "learning_rate": 6.417347580138277e-05, "loss": 0.0775, "step": 18240 }, { "epoch": 1.43, "learning_rate": 6.413419233186675e-05, "loss": 0.0358, "step": 18260 }, { "epoch": 1.44, "learning_rate": 6.409490886235072e-05, "loss": 0.0477, "step": 18280 }, { "epoch": 1.44, "learning_rate": 6.40556253928347e-05, "loss": 0.0574, "step": 18300 }, { "epoch": 1.44, "learning_rate": 6.401634192331867e-05, "loss": 0.0471, "step": 18320 }, { "epoch": 1.44, "learning_rate": 6.397705845380264e-05, "loss": 0.0511, "step": 18340 }, { "epoch": 1.44, "learning_rate": 6.393777498428662e-05, "loss": 0.0708, "step": 18360 }, { "epoch": 1.44, "learning_rate": 6.389849151477059e-05, "loss": 0.0655, "step": 18380 }, { "epoch": 1.45, "learning_rate": 6.385920804525455e-05, "loss": 0.0408, "step": 18400 }, { "epoch": 1.45, "learning_rate": 6.381992457573853e-05, "loss": 0.049, "step": 18420 }, { "epoch": 1.45, "learning_rate": 6.37806411062225e-05, "loss": 0.0441, "step": 18440 }, { "epoch": 1.45, "learning_rate": 6.374135763670648e-05, "loss": 0.062, "step": 18460 }, { "epoch": 1.45, "learning_rate": 6.370207416719045e-05, "loss": 0.0481, "step": 18480 }, { "epoch": 1.45, "learning_rate": 6.366279069767442e-05, "loss": 0.0542, "step": 18500 }, { "epoch": 1.46, "learning_rate": 6.36235072281584e-05, "loss": 0.0537, "step": 18520 }, { "epoch": 1.46, "learning_rate": 6.358422375864237e-05, "loss": 0.0614, "step": 18540 }, { "epoch": 1.46, "learning_rate": 6.354494028912635e-05, "loss": 0.0552, "step": 18560 }, { "epoch": 1.46, "learning_rate": 6.350565681961032e-05, "loss": 0.0594, "step": 18580 }, { "epoch": 1.46, "learning_rate": 6.34663733500943e-05, "loss": 0.0657, "step": 18600 }, { "epoch": 1.46, "learning_rate": 6.342708988057826e-05, "loss": 0.0477, "step": 18620 }, { "epoch": 1.46, "learning_rate": 6.338780641106223e-05, "loss": 0.0523, "step": 18640 }, { "epoch": 1.47, "learning_rate": 6.334852294154619e-05, "loss": 0.0536, "step": 18660 }, { "epoch": 1.47, "learning_rate": 6.330923947203016e-05, "loss": 0.0719, "step": 18680 }, { "epoch": 1.47, "learning_rate": 6.326995600251414e-05, "loss": 0.0564, "step": 18700 }, { "epoch": 1.47, "learning_rate": 6.323067253299811e-05, "loss": 0.0578, "step": 18720 }, { "epoch": 1.47, "learning_rate": 6.319138906348209e-05, "loss": 0.0585, "step": 18740 }, { "epoch": 1.47, "learning_rate": 6.315210559396606e-05, "loss": 0.0761, "step": 18760 }, { "epoch": 1.48, "learning_rate": 6.311282212445003e-05, "loss": 0.0574, "step": 18780 }, { "epoch": 1.48, "learning_rate": 6.307353865493401e-05, "loss": 0.0622, "step": 18800 }, { "epoch": 1.48, "learning_rate": 6.303425518541798e-05, "loss": 0.0585, "step": 18820 }, { "epoch": 1.48, "learning_rate": 6.299497171590196e-05, "loss": 0.082, "step": 18840 }, { "epoch": 1.48, "learning_rate": 6.295568824638592e-05, "loss": 0.0526, "step": 18860 }, { "epoch": 1.48, "learning_rate": 6.291640477686989e-05, "loss": 0.0606, "step": 18880 }, { "epoch": 1.48, "learning_rate": 6.287712130735387e-05, "loss": 0.058, "step": 18900 }, { "epoch": 1.49, "learning_rate": 6.283783783783784e-05, "loss": 0.0531, "step": 18920 }, { "epoch": 1.49, "learning_rate": 6.279855436832181e-05, "loss": 0.0501, "step": 18940 }, { "epoch": 1.49, "learning_rate": 6.275927089880579e-05, "loss": 0.0585, "step": 18960 }, { "epoch": 1.49, "learning_rate": 6.271998742928976e-05, "loss": 0.0579, "step": 18980 }, { "epoch": 1.49, "learning_rate": 6.268070395977374e-05, "loss": 0.0537, "step": 19000 }, { "epoch": 1.49, "learning_rate": 6.264142049025771e-05, "loss": 0.0485, "step": 19020 }, { "epoch": 1.5, "learning_rate": 6.260213702074168e-05, "loss": 0.0439, "step": 19040 }, { "epoch": 1.5, "learning_rate": 6.256285355122565e-05, "loss": 0.0368, "step": 19060 }, { "epoch": 1.5, "learning_rate": 6.252357008170962e-05, "loss": 0.039, "step": 19080 }, { "epoch": 1.5, "learning_rate": 6.24842866121936e-05, "loss": 0.051, "step": 19100 }, { "epoch": 1.5, "learning_rate": 6.244500314267755e-05, "loss": 0.0623, "step": 19120 }, { "epoch": 1.5, "learning_rate": 6.240571967316153e-05, "loss": 0.0562, "step": 19140 }, { "epoch": 1.51, "learning_rate": 6.23664362036455e-05, "loss": 0.0489, "step": 19160 }, { "epoch": 1.51, "learning_rate": 6.232715273412948e-05, "loss": 0.0655, "step": 19180 }, { "epoch": 1.51, "learning_rate": 6.228786926461345e-05, "loss": 0.045, "step": 19200 }, { "epoch": 1.51, "learning_rate": 6.224858579509742e-05, "loss": 0.0453, "step": 19220 }, { "epoch": 1.51, "learning_rate": 6.22093023255814e-05, "loss": 0.0645, "step": 19240 }, { "epoch": 1.51, "learning_rate": 6.217001885606537e-05, "loss": 0.0534, "step": 19260 }, { "epoch": 1.51, "learning_rate": 6.213073538654935e-05, "loss": 0.0415, "step": 19280 }, { "epoch": 1.52, "learning_rate": 6.209145191703332e-05, "loss": 0.0383, "step": 19300 }, { "epoch": 1.52, "learning_rate": 6.20521684475173e-05, "loss": 0.0578, "step": 19320 }, { "epoch": 1.52, "learning_rate": 6.201288497800126e-05, "loss": 0.0536, "step": 19340 }, { "epoch": 1.52, "learning_rate": 6.197360150848523e-05, "loss": 0.0474, "step": 19360 }, { "epoch": 1.52, "learning_rate": 6.19343180389692e-05, "loss": 0.0319, "step": 19380 }, { "epoch": 1.52, "learning_rate": 6.189503456945318e-05, "loss": 0.0751, "step": 19400 }, { "epoch": 1.53, "learning_rate": 6.185575109993715e-05, "loss": 0.0745, "step": 19420 }, { "epoch": 1.53, "learning_rate": 6.181646763042113e-05, "loss": 0.0593, "step": 19440 }, { "epoch": 1.53, "learning_rate": 6.17771841609051e-05, "loss": 0.0623, "step": 19460 }, { "epoch": 1.53, "learning_rate": 6.173790069138906e-05, "loss": 0.076, "step": 19480 }, { "epoch": 1.53, "learning_rate": 6.169861722187304e-05, "loss": 0.0486, "step": 19500 }, { "epoch": 1.53, "learning_rate": 6.165933375235701e-05, "loss": 0.068, "step": 19520 }, { "epoch": 1.54, "learning_rate": 6.162005028284098e-05, "loss": 0.0512, "step": 19540 }, { "epoch": 1.54, "learning_rate": 6.158076681332496e-05, "loss": 0.0441, "step": 19560 }, { "epoch": 1.54, "learning_rate": 6.154148334380893e-05, "loss": 0.0448, "step": 19580 }, { "epoch": 1.54, "learning_rate": 6.150219987429289e-05, "loss": 0.0509, "step": 19600 }, { "epoch": 1.54, "learning_rate": 6.146291640477687e-05, "loss": 0.0597, "step": 19620 }, { "epoch": 1.54, "learning_rate": 6.142363293526084e-05, "loss": 0.0588, "step": 19640 }, { "epoch": 1.54, "learning_rate": 6.138434946574482e-05, "loss": 0.0432, "step": 19660 }, { "epoch": 1.55, "learning_rate": 6.134506599622879e-05, "loss": 0.0622, "step": 19680 }, { "epoch": 1.55, "learning_rate": 6.130578252671276e-05, "loss": 0.0485, "step": 19700 }, { "epoch": 1.55, "learning_rate": 6.126649905719674e-05, "loss": 0.0762, "step": 19720 }, { "epoch": 1.55, "learning_rate": 6.122721558768071e-05, "loss": 0.0505, "step": 19740 }, { "epoch": 1.55, "learning_rate": 6.118793211816469e-05, "loss": 0.0647, "step": 19760 }, { "epoch": 1.55, "learning_rate": 6.114864864864866e-05, "loss": 0.0576, "step": 19780 }, { "epoch": 1.56, "learning_rate": 6.110936517913262e-05, "loss": 0.0589, "step": 19800 }, { "epoch": 1.56, "learning_rate": 6.10700817096166e-05, "loss": 0.0422, "step": 19820 }, { "epoch": 1.56, "learning_rate": 6.1030798240100576e-05, "loss": 0.0478, "step": 19840 }, { "epoch": 1.56, "learning_rate": 6.099151477058454e-05, "loss": 0.0425, "step": 19860 }, { "epoch": 1.56, "learning_rate": 6.095223130106852e-05, "loss": 0.0464, "step": 19880 }, { "epoch": 1.56, "learning_rate": 6.091294783155248e-05, "loss": 0.0561, "step": 19900 }, { "epoch": 1.57, "learning_rate": 6.087366436203645e-05, "loss": 0.054, "step": 19920 }, { "epoch": 1.57, "learning_rate": 6.0834380892520426e-05, "loss": 0.059, "step": 19940 }, { "epoch": 1.57, "learning_rate": 6.07950974230044e-05, "loss": 0.0603, "step": 19960 }, { "epoch": 1.57, "learning_rate": 6.0755813953488374e-05, "loss": 0.0702, "step": 19980 }, { "epoch": 1.57, "learning_rate": 6.071653048397234e-05, "loss": 0.0409, "step": 20000 }, { "epoch": 1.57, "eval_loss": 0.21751189231872559, "eval_matthews_correlation": 0.4470165224542278, "eval_runtime": 1024.81, "eval_samples_per_second": 111.274, "eval_steps_per_second": 13.91, "step": 20000 }, { "epoch": 1.57, "learning_rate": 6.0677247014456316e-05, "loss": 0.0608, "step": 20020 }, { "epoch": 1.57, "learning_rate": 6.063796354494029e-05, "loss": 0.062, "step": 20040 }, { "epoch": 1.58, "learning_rate": 6.0598680075424264e-05, "loss": 0.0404, "step": 20060 }, { "epoch": 1.58, "learning_rate": 6.055939660590824e-05, "loss": 0.037, "step": 20080 }, { "epoch": 1.58, "learning_rate": 6.052011313639221e-05, "loss": 0.0621, "step": 20100 }, { "epoch": 1.58, "learning_rate": 6.048082966687618e-05, "loss": 0.0426, "step": 20120 }, { "epoch": 1.58, "learning_rate": 6.0441546197360154e-05, "loss": 0.0415, "step": 20140 }, { "epoch": 1.58, "learning_rate": 6.040226272784413e-05, "loss": 0.0487, "step": 20160 }, { "epoch": 1.59, "learning_rate": 6.03629792583281e-05, "loss": 0.0519, "step": 20180 }, { "epoch": 1.59, "learning_rate": 6.0323695788812076e-05, "loss": 0.0687, "step": 20200 }, { "epoch": 1.59, "learning_rate": 6.0284412319296043e-05, "loss": 0.0433, "step": 20220 }, { "epoch": 1.59, "learning_rate": 6.024512884978002e-05, "loss": 0.0399, "step": 20240 }, { "epoch": 1.59, "learning_rate": 6.020584538026399e-05, "loss": 0.0515, "step": 20260 }, { "epoch": 1.59, "learning_rate": 6.0166561910747966e-05, "loss": 0.0584, "step": 20280 }, { "epoch": 1.59, "learning_rate": 6.012727844123194e-05, "loss": 0.0543, "step": 20300 }, { "epoch": 1.6, "learning_rate": 6.00879949717159e-05, "loss": 0.0471, "step": 20320 }, { "epoch": 1.6, "learning_rate": 6.0048711502199875e-05, "loss": 0.0617, "step": 20340 }, { "epoch": 1.6, "learning_rate": 6.000942803268384e-05, "loss": 0.0351, "step": 20360 }, { "epoch": 1.6, "learning_rate": 5.9970144563167816e-05, "loss": 0.0598, "step": 20380 }, { "epoch": 1.6, "learning_rate": 5.993086109365179e-05, "loss": 0.055, "step": 20400 }, { "epoch": 1.6, "learning_rate": 5.9891577624135764e-05, "loss": 0.0765, "step": 20420 }, { "epoch": 1.61, "learning_rate": 5.985229415461974e-05, "loss": 0.0603, "step": 20440 }, { "epoch": 1.61, "learning_rate": 5.981301068510371e-05, "loss": 0.0661, "step": 20460 }, { "epoch": 1.61, "learning_rate": 5.977372721558768e-05, "loss": 0.0569, "step": 20480 }, { "epoch": 1.61, "learning_rate": 5.9734443746071654e-05, "loss": 0.0513, "step": 20500 }, { "epoch": 1.61, "learning_rate": 5.969516027655563e-05, "loss": 0.0492, "step": 20520 }, { "epoch": 1.61, "learning_rate": 5.96558768070396e-05, "loss": 0.0528, "step": 20540 }, { "epoch": 1.62, "learning_rate": 5.9616593337523577e-05, "loss": 0.0492, "step": 20560 }, { "epoch": 1.62, "learning_rate": 5.9577309868007544e-05, "loss": 0.0681, "step": 20580 }, { "epoch": 1.62, "learning_rate": 5.953802639849152e-05, "loss": 0.0461, "step": 20600 }, { "epoch": 1.62, "learning_rate": 5.949874292897549e-05, "loss": 0.0526, "step": 20620 }, { "epoch": 1.62, "learning_rate": 5.9459459459459466e-05, "loss": 0.0491, "step": 20640 }, { "epoch": 1.62, "learning_rate": 5.942017598994344e-05, "loss": 0.0563, "step": 20660 }, { "epoch": 1.62, "learning_rate": 5.9380892520427415e-05, "loss": 0.0537, "step": 20680 }, { "epoch": 1.63, "learning_rate": 5.934160905091138e-05, "loss": 0.0374, "step": 20700 }, { "epoch": 1.63, "learning_rate": 5.9302325581395356e-05, "loss": 0.0421, "step": 20720 }, { "epoch": 1.63, "learning_rate": 5.926304211187932e-05, "loss": 0.0547, "step": 20740 }, { "epoch": 1.63, "learning_rate": 5.922375864236329e-05, "loss": 0.0465, "step": 20760 }, { "epoch": 1.63, "learning_rate": 5.9184475172847265e-05, "loss": 0.0417, "step": 20780 }, { "epoch": 1.63, "learning_rate": 5.914519170333124e-05, "loss": 0.0567, "step": 20800 }, { "epoch": 1.64, "learning_rate": 5.910590823381521e-05, "loss": 0.0643, "step": 20820 }, { "epoch": 1.64, "learning_rate": 5.906662476429918e-05, "loss": 0.0451, "step": 20840 }, { "epoch": 1.64, "learning_rate": 5.9027341294783155e-05, "loss": 0.0429, "step": 20860 }, { "epoch": 1.64, "learning_rate": 5.898805782526713e-05, "loss": 0.062, "step": 20880 }, { "epoch": 1.64, "learning_rate": 5.89487743557511e-05, "loss": 0.0558, "step": 20900 }, { "epoch": 1.64, "learning_rate": 5.890949088623508e-05, "loss": 0.0654, "step": 20920 }, { "epoch": 1.65, "learning_rate": 5.8870207416719044e-05, "loss": 0.0378, "step": 20940 }, { "epoch": 1.65, "learning_rate": 5.883092394720302e-05, "loss": 0.0582, "step": 20960 }, { "epoch": 1.65, "learning_rate": 5.879164047768699e-05, "loss": 0.0485, "step": 20980 }, { "epoch": 1.65, "learning_rate": 5.875235700817097e-05, "loss": 0.0333, "step": 21000 }, { "epoch": 1.65, "learning_rate": 5.871307353865494e-05, "loss": 0.0491, "step": 21020 }, { "epoch": 1.65, "learning_rate": 5.8673790069138915e-05, "loss": 0.0447, "step": 21040 }, { "epoch": 1.65, "learning_rate": 5.863450659962288e-05, "loss": 0.0352, "step": 21060 }, { "epoch": 1.66, "learning_rate": 5.8595223130106857e-05, "loss": 0.0579, "step": 21080 }, { "epoch": 1.66, "learning_rate": 5.855593966059083e-05, "loss": 0.0589, "step": 21100 }, { "epoch": 1.66, "learning_rate": 5.8516656191074805e-05, "loss": 0.064, "step": 21120 }, { "epoch": 1.66, "learning_rate": 5.847737272155878e-05, "loss": 0.0438, "step": 21140 }, { "epoch": 1.66, "learning_rate": 5.843808925204274e-05, "loss": 0.0563, "step": 21160 }, { "epoch": 1.66, "learning_rate": 5.8398805782526714e-05, "loss": 0.0406, "step": 21180 }, { "epoch": 1.67, "learning_rate": 5.835952231301068e-05, "loss": 0.0349, "step": 21200 }, { "epoch": 1.67, "learning_rate": 5.8320238843494655e-05, "loss": 0.0394, "step": 21220 }, { "epoch": 1.67, "learning_rate": 5.828095537397863e-05, "loss": 0.0526, "step": 21240 }, { "epoch": 1.67, "learning_rate": 5.8241671904462603e-05, "loss": 0.0448, "step": 21260 }, { "epoch": 1.67, "learning_rate": 5.820238843494658e-05, "loss": 0.0459, "step": 21280 }, { "epoch": 1.67, "learning_rate": 5.8163104965430545e-05, "loss": 0.0544, "step": 21300 }, { "epoch": 1.68, "learning_rate": 5.812382149591452e-05, "loss": 0.0429, "step": 21320 }, { "epoch": 1.68, "learning_rate": 5.808453802639849e-05, "loss": 0.0443, "step": 21340 }, { "epoch": 1.68, "learning_rate": 5.804525455688247e-05, "loss": 0.0577, "step": 21360 }, { "epoch": 1.68, "learning_rate": 5.800597108736644e-05, "loss": 0.041, "step": 21380 }, { "epoch": 1.68, "learning_rate": 5.7966687617850416e-05, "loss": 0.0413, "step": 21400 }, { "epoch": 1.68, "learning_rate": 5.792740414833438e-05, "loss": 0.0431, "step": 21420 }, { "epoch": 1.68, "learning_rate": 5.788812067881836e-05, "loss": 0.0402, "step": 21440 }, { "epoch": 1.69, "learning_rate": 5.784883720930233e-05, "loss": 0.0675, "step": 21460 }, { "epoch": 1.69, "learning_rate": 5.7809553739786305e-05, "loss": 0.0552, "step": 21480 }, { "epoch": 1.69, "learning_rate": 5.777027027027028e-05, "loss": 0.0469, "step": 21500 }, { "epoch": 1.69, "learning_rate": 5.773098680075425e-05, "loss": 0.0745, "step": 21520 }, { "epoch": 1.69, "learning_rate": 5.769170333123822e-05, "loss": 0.0481, "step": 21540 }, { "epoch": 1.69, "learning_rate": 5.7652419861722195e-05, "loss": 0.0489, "step": 21560 }, { "epoch": 1.7, "learning_rate": 5.7613136392206156e-05, "loss": 0.0336, "step": 21580 }, { "epoch": 1.7, "learning_rate": 5.757385292269013e-05, "loss": 0.0422, "step": 21600 }, { "epoch": 1.7, "learning_rate": 5.7534569453174104e-05, "loss": 0.0604, "step": 21620 }, { "epoch": 1.7, "learning_rate": 5.749528598365808e-05, "loss": 0.0513, "step": 21640 }, { "epoch": 1.7, "learning_rate": 5.7456002514142045e-05, "loss": 0.0488, "step": 21660 }, { "epoch": 1.7, "learning_rate": 5.741671904462602e-05, "loss": 0.0434, "step": 21680 }, { "epoch": 1.7, "learning_rate": 5.7377435575109994e-05, "loss": 0.0411, "step": 21700 }, { "epoch": 1.71, "learning_rate": 5.733815210559397e-05, "loss": 0.0478, "step": 21720 }, { "epoch": 1.71, "learning_rate": 5.729886863607794e-05, "loss": 0.0401, "step": 21740 }, { "epoch": 1.71, "learning_rate": 5.7259585166561916e-05, "loss": 0.0665, "step": 21760 }, { "epoch": 1.71, "learning_rate": 5.7220301697045883e-05, "loss": 0.0522, "step": 21780 }, { "epoch": 1.71, "learning_rate": 5.718101822752986e-05, "loss": 0.0416, "step": 21800 }, { "epoch": 1.71, "learning_rate": 5.714173475801383e-05, "loss": 0.0585, "step": 21820 }, { "epoch": 1.72, "learning_rate": 5.7102451288497806e-05, "loss": 0.0315, "step": 21840 }, { "epoch": 1.72, "learning_rate": 5.706316781898178e-05, "loss": 0.0528, "step": 21860 }, { "epoch": 1.72, "learning_rate": 5.702388434946575e-05, "loss": 0.0655, "step": 21880 }, { "epoch": 1.72, "learning_rate": 5.698460087994972e-05, "loss": 0.0523, "step": 21900 }, { "epoch": 1.72, "learning_rate": 5.6945317410433696e-05, "loss": 0.0526, "step": 21920 }, { "epoch": 1.72, "learning_rate": 5.690603394091767e-05, "loss": 0.0704, "step": 21940 }, { "epoch": 1.73, "learning_rate": 5.6866750471401644e-05, "loss": 0.0574, "step": 21960 }, { "epoch": 1.73, "learning_rate": 5.682746700188562e-05, "loss": 0.0581, "step": 21980 }, { "epoch": 1.73, "learning_rate": 5.678818353236958e-05, "loss": 0.0338, "step": 22000 }, { "epoch": 1.73, "learning_rate": 5.674890006285355e-05, "loss": 0.0556, "step": 22020 }, { "epoch": 1.73, "learning_rate": 5.670961659333752e-05, "loss": 0.0494, "step": 22040 }, { "epoch": 1.73, "learning_rate": 5.6670333123821494e-05, "loss": 0.0385, "step": 22060 }, { "epoch": 1.73, "learning_rate": 5.663104965430547e-05, "loss": 0.0468, "step": 22080 }, { "epoch": 1.74, "learning_rate": 5.659176618478944e-05, "loss": 0.0622, "step": 22100 }, { "epoch": 1.74, "learning_rate": 5.655248271527342e-05, "loss": 0.0382, "step": 22120 }, { "epoch": 1.74, "learning_rate": 5.6513199245757384e-05, "loss": 0.0478, "step": 22140 }, { "epoch": 1.74, "learning_rate": 5.647391577624136e-05, "loss": 0.0375, "step": 22160 }, { "epoch": 1.74, "learning_rate": 5.643463230672533e-05, "loss": 0.048, "step": 22180 }, { "epoch": 1.74, "learning_rate": 5.6395348837209306e-05, "loss": 0.0495, "step": 22200 }, { "epoch": 1.75, "learning_rate": 5.635606536769328e-05, "loss": 0.0631, "step": 22220 }, { "epoch": 1.75, "learning_rate": 5.631678189817725e-05, "loss": 0.0599, "step": 22240 }, { "epoch": 1.75, "learning_rate": 5.627749842866122e-05, "loss": 0.0532, "step": 22260 }, { "epoch": 1.75, "learning_rate": 5.6238214959145196e-05, "loss": 0.0441, "step": 22280 }, { "epoch": 1.75, "learning_rate": 5.619893148962917e-05, "loss": 0.0536, "step": 22300 }, { "epoch": 1.75, "learning_rate": 5.6159648020113144e-05, "loss": 0.0567, "step": 22320 }, { "epoch": 1.76, "learning_rate": 5.612036455059712e-05, "loss": 0.0477, "step": 22340 }, { "epoch": 1.76, "learning_rate": 5.6081081081081086e-05, "loss": 0.0697, "step": 22360 }, { "epoch": 1.76, "learning_rate": 5.604179761156506e-05, "loss": 0.048, "step": 22380 }, { "epoch": 1.76, "learning_rate": 5.600251414204902e-05, "loss": 0.0493, "step": 22400 }, { "epoch": 1.76, "learning_rate": 5.5963230672532995e-05, "loss": 0.0604, "step": 22420 }, { "epoch": 1.76, "learning_rate": 5.592394720301697e-05, "loss": 0.0605, "step": 22440 }, { "epoch": 1.76, "learning_rate": 5.588466373350094e-05, "loss": 0.0429, "step": 22460 }, { "epoch": 1.77, "learning_rate": 5.584538026398492e-05, "loss": 0.0614, "step": 22480 }, { "epoch": 1.77, "learning_rate": 5.5806096794468884e-05, "loss": 0.0439, "step": 22500 }, { "epoch": 1.77, "learning_rate": 5.576681332495286e-05, "loss": 0.0501, "step": 22520 }, { "epoch": 1.77, "learning_rate": 5.572752985543683e-05, "loss": 0.0403, "step": 22540 }, { "epoch": 1.77, "learning_rate": 5.568824638592081e-05, "loss": 0.0473, "step": 22560 }, { "epoch": 1.77, "learning_rate": 5.564896291640478e-05, "loss": 0.0499, "step": 22580 }, { "epoch": 1.78, "learning_rate": 5.560967944688875e-05, "loss": 0.0629, "step": 22600 }, { "epoch": 1.78, "learning_rate": 5.557039597737272e-05, "loss": 0.0427, "step": 22620 }, { "epoch": 1.78, "learning_rate": 5.55311125078567e-05, "loss": 0.0455, "step": 22640 }, { "epoch": 1.78, "learning_rate": 5.549182903834067e-05, "loss": 0.0607, "step": 22660 }, { "epoch": 1.78, "learning_rate": 5.5452545568824645e-05, "loss": 0.0637, "step": 22680 }, { "epoch": 1.78, "learning_rate": 5.541326209930862e-05, "loss": 0.0599, "step": 22700 }, { "epoch": 1.79, "learning_rate": 5.5373978629792586e-05, "loss": 0.0383, "step": 22720 }, { "epoch": 1.79, "learning_rate": 5.533469516027656e-05, "loss": 0.0382, "step": 22740 }, { "epoch": 1.79, "learning_rate": 5.5295411690760535e-05, "loss": 0.0527, "step": 22760 }, { "epoch": 1.79, "learning_rate": 5.525612822124451e-05, "loss": 0.0468, "step": 22780 }, { "epoch": 1.79, "learning_rate": 5.521684475172848e-05, "loss": 0.0483, "step": 22800 }, { "epoch": 1.79, "learning_rate": 5.5177561282212443e-05, "loss": 0.0481, "step": 22820 }, { "epoch": 1.79, "learning_rate": 5.513827781269642e-05, "loss": 0.0511, "step": 22840 }, { "epoch": 1.8, "learning_rate": 5.5098994343180385e-05, "loss": 0.0541, "step": 22860 }, { "epoch": 1.8, "learning_rate": 5.505971087366436e-05, "loss": 0.0487, "step": 22880 }, { "epoch": 1.8, "learning_rate": 5.502042740414833e-05, "loss": 0.0452, "step": 22900 }, { "epoch": 1.8, "learning_rate": 5.498114393463231e-05, "loss": 0.0618, "step": 22920 }, { "epoch": 1.8, "learning_rate": 5.494186046511628e-05, "loss": 0.0462, "step": 22940 }, { "epoch": 1.8, "learning_rate": 5.4902576995600256e-05, "loss": 0.0453, "step": 22960 }, { "epoch": 1.81, "learning_rate": 5.486329352608422e-05, "loss": 0.0567, "step": 22980 }, { "epoch": 1.81, "learning_rate": 5.48240100565682e-05, "loss": 0.0683, "step": 23000 }, { "epoch": 1.81, "learning_rate": 5.478472658705217e-05, "loss": 0.049, "step": 23020 }, { "epoch": 1.81, "learning_rate": 5.4745443117536145e-05, "loss": 0.0533, "step": 23040 }, { "epoch": 1.81, "learning_rate": 5.470615964802012e-05, "loss": 0.0498, "step": 23060 }, { "epoch": 1.81, "learning_rate": 5.466687617850409e-05, "loss": 0.0619, "step": 23080 }, { "epoch": 1.81, "learning_rate": 5.462759270898806e-05, "loss": 0.0624, "step": 23100 }, { "epoch": 1.82, "learning_rate": 5.4588309239472035e-05, "loss": 0.0503, "step": 23120 }, { "epoch": 1.82, "learning_rate": 5.454902576995601e-05, "loss": 0.0387, "step": 23140 }, { "epoch": 1.82, "learning_rate": 5.4509742300439983e-05, "loss": 0.0413, "step": 23160 }, { "epoch": 1.82, "learning_rate": 5.447045883092395e-05, "loss": 0.0673, "step": 23180 }, { "epoch": 1.82, "learning_rate": 5.4431175361407925e-05, "loss": 0.0294, "step": 23200 }, { "epoch": 1.82, "learning_rate": 5.43918918918919e-05, "loss": 0.0437, "step": 23220 }, { "epoch": 1.83, "learning_rate": 5.435260842237586e-05, "loss": 0.049, "step": 23240 }, { "epoch": 1.83, "learning_rate": 5.4313324952859834e-05, "loss": 0.0364, "step": 23260 }, { "epoch": 1.83, "learning_rate": 5.427404148334381e-05, "loss": 0.0552, "step": 23280 }, { "epoch": 1.83, "learning_rate": 5.423475801382778e-05, "loss": 0.0588, "step": 23300 }, { "epoch": 1.83, "learning_rate": 5.4195474544311756e-05, "loss": 0.0389, "step": 23320 }, { "epoch": 1.83, "learning_rate": 5.4156191074795723e-05, "loss": 0.0507, "step": 23340 }, { "epoch": 1.84, "learning_rate": 5.41169076052797e-05, "loss": 0.0446, "step": 23360 }, { "epoch": 1.84, "learning_rate": 5.407762413576367e-05, "loss": 0.0291, "step": 23380 }, { "epoch": 1.84, "learning_rate": 5.4038340666247646e-05, "loss": 0.0492, "step": 23400 }, { "epoch": 1.84, "learning_rate": 5.399905719673162e-05, "loss": 0.046, "step": 23420 }, { "epoch": 1.84, "learning_rate": 5.395977372721559e-05, "loss": 0.0568, "step": 23440 }, { "epoch": 1.84, "learning_rate": 5.392049025769956e-05, "loss": 0.0429, "step": 23460 }, { "epoch": 1.84, "learning_rate": 5.3881206788183536e-05, "loss": 0.048, "step": 23480 }, { "epoch": 1.85, "learning_rate": 5.384192331866751e-05, "loss": 0.054, "step": 23500 }, { "epoch": 1.85, "learning_rate": 5.3802639849151484e-05, "loss": 0.0469, "step": 23520 }, { "epoch": 1.85, "learning_rate": 5.376335637963545e-05, "loss": 0.0349, "step": 23540 }, { "epoch": 1.85, "learning_rate": 5.3724072910119425e-05, "loss": 0.0635, "step": 23560 }, { "epoch": 1.85, "learning_rate": 5.36847894406034e-05, "loss": 0.0556, "step": 23580 }, { "epoch": 1.85, "learning_rate": 5.3645505971087374e-05, "loss": 0.0461, "step": 23600 }, { "epoch": 1.86, "learning_rate": 5.360622250157135e-05, "loss": 0.036, "step": 23620 }, { "epoch": 1.86, "learning_rate": 5.356693903205532e-05, "loss": 0.0482, "step": 23640 }, { "epoch": 1.86, "learning_rate": 5.352765556253928e-05, "loss": 0.0587, "step": 23660 }, { "epoch": 1.86, "learning_rate": 5.348837209302326e-05, "loss": 0.0585, "step": 23680 }, { "epoch": 1.86, "learning_rate": 5.3449088623507224e-05, "loss": 0.0443, "step": 23700 }, { "epoch": 1.86, "learning_rate": 5.34098051539912e-05, "loss": 0.0391, "step": 23720 }, { "epoch": 1.87, "learning_rate": 5.337052168447517e-05, "loss": 0.0406, "step": 23740 }, { "epoch": 1.87, "learning_rate": 5.3331238214959146e-05, "loss": 0.0514, "step": 23760 }, { "epoch": 1.87, "learning_rate": 5.329195474544312e-05, "loss": 0.0313, "step": 23780 }, { "epoch": 1.87, "learning_rate": 5.325267127592709e-05, "loss": 0.0493, "step": 23800 }, { "epoch": 1.87, "learning_rate": 5.321338780641106e-05, "loss": 0.0345, "step": 23820 }, { "epoch": 1.87, "learning_rate": 5.3174104336895036e-05, "loss": 0.0528, "step": 23840 }, { "epoch": 1.87, "learning_rate": 5.313482086737901e-05, "loss": 0.0502, "step": 23860 }, { "epoch": 1.88, "learning_rate": 5.3095537397862984e-05, "loss": 0.0605, "step": 23880 }, { "epoch": 1.88, "learning_rate": 5.305625392834695e-05, "loss": 0.0496, "step": 23900 }, { "epoch": 1.88, "learning_rate": 5.3016970458830926e-05, "loss": 0.0523, "step": 23920 }, { "epoch": 1.88, "learning_rate": 5.29776869893149e-05, "loss": 0.074, "step": 23940 }, { "epoch": 1.88, "learning_rate": 5.2938403519798874e-05, "loss": 0.0684, "step": 23960 }, { "epoch": 1.88, "learning_rate": 5.289912005028285e-05, "loss": 0.0417, "step": 23980 }, { "epoch": 1.89, "learning_rate": 5.285983658076682e-05, "loss": 0.0707, "step": 24000 }, { "epoch": 1.89, "learning_rate": 5.282055311125079e-05, "loss": 0.0487, "step": 24020 }, { "epoch": 1.89, "learning_rate": 5.2781269641734764e-05, "loss": 0.0384, "step": 24040 }, { "epoch": 1.89, "learning_rate": 5.274198617221874e-05, "loss": 0.0567, "step": 24060 }, { "epoch": 1.89, "learning_rate": 5.27027027027027e-05, "loss": 0.0426, "step": 24080 }, { "epoch": 1.89, "learning_rate": 5.266341923318667e-05, "loss": 0.0385, "step": 24100 }, { "epoch": 1.9, "learning_rate": 5.262413576367065e-05, "loss": 0.0423, "step": 24120 }, { "epoch": 1.9, "learning_rate": 5.258485229415462e-05, "loss": 0.0438, "step": 24140 }, { "epoch": 1.9, "learning_rate": 5.254556882463859e-05, "loss": 0.0418, "step": 24160 }, { "epoch": 1.9, "learning_rate": 5.250628535512256e-05, "loss": 0.0433, "step": 24180 }, { "epoch": 1.9, "learning_rate": 5.246700188560654e-05, "loss": 0.0527, "step": 24200 }, { "epoch": 1.9, "learning_rate": 5.242771841609051e-05, "loss": 0.0543, "step": 24220 }, { "epoch": 1.9, "learning_rate": 5.2388434946574485e-05, "loss": 0.0503, "step": 24240 }, { "epoch": 1.91, "learning_rate": 5.234915147705846e-05, "loss": 0.0537, "step": 24260 }, { "epoch": 1.91, "learning_rate": 5.2309868007542426e-05, "loss": 0.0442, "step": 24280 }, { "epoch": 1.91, "learning_rate": 5.22705845380264e-05, "loss": 0.054, "step": 24300 }, { "epoch": 1.91, "learning_rate": 5.2231301068510375e-05, "loss": 0.0538, "step": 24320 }, { "epoch": 1.91, "learning_rate": 5.219201759899435e-05, "loss": 0.0372, "step": 24340 }, { "epoch": 1.91, "learning_rate": 5.215273412947832e-05, "loss": 0.0386, "step": 24360 }, { "epoch": 1.92, "learning_rate": 5.211345065996229e-05, "loss": 0.0478, "step": 24380 }, { "epoch": 1.92, "learning_rate": 5.2074167190446264e-05, "loss": 0.0429, "step": 24400 }, { "epoch": 1.92, "learning_rate": 5.203488372093024e-05, "loss": 0.0346, "step": 24420 }, { "epoch": 1.92, "learning_rate": 5.199560025141421e-05, "loss": 0.0517, "step": 24440 }, { "epoch": 1.92, "learning_rate": 5.195631678189819e-05, "loss": 0.0589, "step": 24460 }, { "epoch": 1.92, "learning_rate": 5.1917033312382154e-05, "loss": 0.0538, "step": 24480 }, { "epoch": 1.92, "learning_rate": 5.187774984286612e-05, "loss": 0.0375, "step": 24500 }, { "epoch": 1.93, "learning_rate": 5.183846637335009e-05, "loss": 0.0375, "step": 24520 }, { "epoch": 1.93, "learning_rate": 5.179918290383406e-05, "loss": 0.0592, "step": 24540 }, { "epoch": 1.93, "learning_rate": 5.175989943431804e-05, "loss": 0.0453, "step": 24560 }, { "epoch": 1.93, "learning_rate": 5.172061596480201e-05, "loss": 0.0442, "step": 24580 }, { "epoch": 1.93, "learning_rate": 5.1681332495285985e-05, "loss": 0.0603, "step": 24600 }, { "epoch": 1.93, "learning_rate": 5.164204902576996e-05, "loss": 0.0556, "step": 24620 }, { "epoch": 1.94, "learning_rate": 5.160276555625393e-05, "loss": 0.0564, "step": 24640 }, { "epoch": 1.94, "learning_rate": 5.15634820867379e-05, "loss": 0.038, "step": 24660 }, { "epoch": 1.94, "learning_rate": 5.1524198617221875e-05, "loss": 0.0551, "step": 24680 }, { "epoch": 1.94, "learning_rate": 5.148491514770585e-05, "loss": 0.0406, "step": 24700 }, { "epoch": 1.94, "learning_rate": 5.1445631678189823e-05, "loss": 0.0382, "step": 24720 }, { "epoch": 1.94, "learning_rate": 5.140634820867379e-05, "loss": 0.0532, "step": 24740 }, { "epoch": 1.95, "learning_rate": 5.1367064739157765e-05, "loss": 0.0598, "step": 24760 }, { "epoch": 1.95, "learning_rate": 5.132778126964174e-05, "loss": 0.0456, "step": 24780 }, { "epoch": 1.95, "learning_rate": 5.128849780012571e-05, "loss": 0.0582, "step": 24800 }, { "epoch": 1.95, "learning_rate": 5.124921433060969e-05, "loss": 0.0539, "step": 24820 }, { "epoch": 1.95, "learning_rate": 5.1209930861093655e-05, "loss": 0.0605, "step": 24840 }, { "epoch": 1.95, "learning_rate": 5.117064739157763e-05, "loss": 0.0514, "step": 24860 }, { "epoch": 1.95, "learning_rate": 5.11313639220616e-05, "loss": 0.0472, "step": 24880 }, { "epoch": 1.96, "learning_rate": 5.109208045254558e-05, "loss": 0.0403, "step": 24900 }, { "epoch": 1.96, "learning_rate": 5.105279698302954e-05, "loss": 0.047, "step": 24920 }, { "epoch": 1.96, "learning_rate": 5.101351351351351e-05, "loss": 0.0416, "step": 24940 }, { "epoch": 1.96, "learning_rate": 5.0974230043997486e-05, "loss": 0.0415, "step": 24960 }, { "epoch": 1.96, "learning_rate": 5.093494657448146e-05, "loss": 0.05, "step": 24980 }, { "epoch": 1.96, "learning_rate": 5.089566310496543e-05, "loss": 0.0529, "step": 25000 }, { "epoch": 1.97, "learning_rate": 5.08563796354494e-05, "loss": 0.0664, "step": 25020 }, { "epoch": 1.97, "learning_rate": 5.0817096165933376e-05, "loss": 0.0536, "step": 25040 }, { "epoch": 1.97, "learning_rate": 5.077781269641735e-05, "loss": 0.0598, "step": 25060 }, { "epoch": 1.97, "learning_rate": 5.0738529226901324e-05, "loss": 0.0565, "step": 25080 }, { "epoch": 1.97, "learning_rate": 5.069924575738529e-05, "loss": 0.0459, "step": 25100 }, { "epoch": 1.97, "learning_rate": 5.0659962287869265e-05, "loss": 0.0398, "step": 25120 }, { "epoch": 1.98, "learning_rate": 5.062067881835324e-05, "loss": 0.0471, "step": 25140 }, { "epoch": 1.98, "learning_rate": 5.0581395348837214e-05, "loss": 0.0525, "step": 25160 }, { "epoch": 1.98, "learning_rate": 5.054211187932119e-05, "loss": 0.069, "step": 25180 }, { "epoch": 1.98, "learning_rate": 5.0502828409805155e-05, "loss": 0.075, "step": 25200 }, { "epoch": 1.98, "learning_rate": 5.046354494028913e-05, "loss": 0.0392, "step": 25220 }, { "epoch": 1.98, "learning_rate": 5.0424261470773103e-05, "loss": 0.0452, "step": 25240 }, { "epoch": 1.98, "learning_rate": 5.038497800125708e-05, "loss": 0.0536, "step": 25260 }, { "epoch": 1.99, "learning_rate": 5.034569453174105e-05, "loss": 0.0532, "step": 25280 }, { "epoch": 1.99, "learning_rate": 5.0306411062225026e-05, "loss": 0.0405, "step": 25300 }, { "epoch": 1.99, "learning_rate": 5.026712759270899e-05, "loss": 0.0515, "step": 25320 }, { "epoch": 1.99, "learning_rate": 5.022784412319296e-05, "loss": 0.0316, "step": 25340 }, { "epoch": 1.99, "learning_rate": 5.018856065367693e-05, "loss": 0.0592, "step": 25360 }, { "epoch": 1.99, "learning_rate": 5.01492771841609e-05, "loss": 0.0454, "step": 25380 }, { "epoch": 2.0, "learning_rate": 5.0109993714644876e-05, "loss": 0.0546, "step": 25400 }, { "epoch": 2.0, "learning_rate": 5.007071024512885e-05, "loss": 0.0512, "step": 25420 }, { "epoch": 2.0, "learning_rate": 5.0031426775612824e-05, "loss": 0.0556, "step": 25440 }, { "epoch": 2.0, "learning_rate": 4.999214330609679e-05, "loss": 0.0506, "step": 25460 }, { "epoch": 2.0, "learning_rate": 4.9952859836580766e-05, "loss": 0.0366, "step": 25480 }, { "epoch": 2.0, "learning_rate": 4.991357636706474e-05, "loss": 0.0346, "step": 25500 }, { "epoch": 2.01, "learning_rate": 4.9874292897548714e-05, "loss": 0.0285, "step": 25520 }, { "epoch": 2.01, "learning_rate": 4.983500942803269e-05, "loss": 0.0421, "step": 25540 }, { "epoch": 2.01, "learning_rate": 4.979572595851666e-05, "loss": 0.0378, "step": 25560 }, { "epoch": 2.01, "learning_rate": 4.975644248900063e-05, "loss": 0.0415, "step": 25580 }, { "epoch": 2.01, "learning_rate": 4.9717159019484604e-05, "loss": 0.0438, "step": 25600 }, { "epoch": 2.01, "learning_rate": 4.967787554996858e-05, "loss": 0.0369, "step": 25620 }, { "epoch": 2.01, "learning_rate": 4.963859208045255e-05, "loss": 0.0438, "step": 25640 }, { "epoch": 2.02, "learning_rate": 4.959930861093652e-05, "loss": 0.0352, "step": 25660 }, { "epoch": 2.02, "learning_rate": 4.9560025141420494e-05, "loss": 0.0282, "step": 25680 }, { "epoch": 2.02, "learning_rate": 4.952074167190446e-05, "loss": 0.0456, "step": 25700 }, { "epoch": 2.02, "learning_rate": 4.9481458202388435e-05, "loss": 0.0329, "step": 25720 }, { "epoch": 2.02, "learning_rate": 4.944217473287241e-05, "loss": 0.0308, "step": 25740 }, { "epoch": 2.02, "learning_rate": 4.9402891263356383e-05, "loss": 0.037, "step": 25760 }, { "epoch": 2.03, "learning_rate": 4.936360779384036e-05, "loss": 0.0343, "step": 25780 }, { "epoch": 2.03, "learning_rate": 4.9324324324324325e-05, "loss": 0.0359, "step": 25800 }, { "epoch": 2.03, "learning_rate": 4.92850408548083e-05, "loss": 0.029, "step": 25820 }, { "epoch": 2.03, "learning_rate": 4.924575738529227e-05, "loss": 0.0301, "step": 25840 }, { "epoch": 2.03, "learning_rate": 4.920647391577624e-05, "loss": 0.0434, "step": 25860 }, { "epoch": 2.03, "learning_rate": 4.9167190446260215e-05, "loss": 0.0328, "step": 25880 }, { "epoch": 2.03, "learning_rate": 4.912790697674419e-05, "loss": 0.0329, "step": 25900 }, { "epoch": 2.04, "learning_rate": 4.908862350722816e-05, "loss": 0.0475, "step": 25920 }, { "epoch": 2.04, "learning_rate": 4.904934003771213e-05, "loss": 0.0401, "step": 25940 }, { "epoch": 2.04, "learning_rate": 4.9010056568196104e-05, "loss": 0.0441, "step": 25960 }, { "epoch": 2.04, "learning_rate": 4.897077309868008e-05, "loss": 0.0337, "step": 25980 }, { "epoch": 2.04, "learning_rate": 4.893148962916405e-05, "loss": 0.0419, "step": 26000 }, { "epoch": 2.04, "learning_rate": 4.889220615964803e-05, "loss": 0.0437, "step": 26020 }, { "epoch": 2.05, "learning_rate": 4.8852922690131994e-05, "loss": 0.0377, "step": 26040 }, { "epoch": 2.05, "learning_rate": 4.881363922061597e-05, "loss": 0.039, "step": 26060 }, { "epoch": 2.05, "learning_rate": 4.8774355751099936e-05, "loss": 0.0262, "step": 26080 }, { "epoch": 2.05, "learning_rate": 4.873507228158391e-05, "loss": 0.0381, "step": 26100 }, { "epoch": 2.05, "learning_rate": 4.8695788812067884e-05, "loss": 0.0435, "step": 26120 }, { "epoch": 2.05, "learning_rate": 4.865650534255186e-05, "loss": 0.0353, "step": 26140 }, { "epoch": 2.06, "learning_rate": 4.8617221873035825e-05, "loss": 0.0279, "step": 26160 }, { "epoch": 2.06, "learning_rate": 4.85779384035198e-05, "loss": 0.0282, "step": 26180 }, { "epoch": 2.06, "learning_rate": 4.8538654934003774e-05, "loss": 0.0206, "step": 26200 }, { "epoch": 2.06, "learning_rate": 4.849937146448775e-05, "loss": 0.0521, "step": 26220 }, { "epoch": 2.06, "learning_rate": 4.846008799497172e-05, "loss": 0.0354, "step": 26240 }, { "epoch": 2.06, "learning_rate": 4.8420804525455696e-05, "loss": 0.0263, "step": 26260 }, { "epoch": 2.06, "learning_rate": 4.8381521055939663e-05, "loss": 0.0289, "step": 26280 }, { "epoch": 2.07, "learning_rate": 4.834223758642363e-05, "loss": 0.0385, "step": 26300 }, { "epoch": 2.07, "learning_rate": 4.8302954116907605e-05, "loss": 0.0299, "step": 26320 }, { "epoch": 2.07, "learning_rate": 4.826367064739158e-05, "loss": 0.044, "step": 26340 }, { "epoch": 2.07, "learning_rate": 4.822438717787555e-05, "loss": 0.0314, "step": 26360 }, { "epoch": 2.07, "learning_rate": 4.818510370835953e-05, "loss": 0.0302, "step": 26380 }, { "epoch": 2.07, "learning_rate": 4.8145820238843495e-05, "loss": 0.0608, "step": 26400 }, { "epoch": 2.08, "learning_rate": 4.810653676932747e-05, "loss": 0.0356, "step": 26420 }, { "epoch": 2.08, "learning_rate": 4.806725329981144e-05, "loss": 0.0387, "step": 26440 }, { "epoch": 2.08, "learning_rate": 4.802796983029542e-05, "loss": 0.0376, "step": 26460 }, { "epoch": 2.08, "learning_rate": 4.798868636077939e-05, "loss": 0.0237, "step": 26480 }, { "epoch": 2.08, "learning_rate": 4.794940289126336e-05, "loss": 0.0336, "step": 26500 }, { "epoch": 2.08, "learning_rate": 4.7910119421747326e-05, "loss": 0.0378, "step": 26520 }, { "epoch": 2.09, "learning_rate": 4.78708359522313e-05, "loss": 0.0209, "step": 26540 }, { "epoch": 2.09, "learning_rate": 4.7831552482715274e-05, "loss": 0.0511, "step": 26560 }, { "epoch": 2.09, "learning_rate": 4.779226901319925e-05, "loss": 0.0358, "step": 26580 }, { "epoch": 2.09, "learning_rate": 4.775298554368322e-05, "loss": 0.0306, "step": 26600 }, { "epoch": 2.09, "learning_rate": 4.77137020741672e-05, "loss": 0.0368, "step": 26620 }, { "epoch": 2.09, "learning_rate": 4.7674418604651164e-05, "loss": 0.0335, "step": 26640 }, { "epoch": 2.09, "learning_rate": 4.763513513513514e-05, "loss": 0.0355, "step": 26660 }, { "epoch": 2.1, "learning_rate": 4.759585166561911e-05, "loss": 0.0411, "step": 26680 }, { "epoch": 2.1, "learning_rate": 4.755656819610308e-05, "loss": 0.0362, "step": 26700 }, { "epoch": 2.1, "learning_rate": 4.7517284726587054e-05, "loss": 0.0301, "step": 26720 }, { "epoch": 2.1, "learning_rate": 4.747800125707103e-05, "loss": 0.0239, "step": 26740 }, { "epoch": 2.1, "learning_rate": 4.7438717787554995e-05, "loss": 0.0296, "step": 26760 }, { "epoch": 2.1, "learning_rate": 4.739943431803897e-05, "loss": 0.0372, "step": 26780 }, { "epoch": 2.11, "learning_rate": 4.7360150848522943e-05, "loss": 0.036, "step": 26800 }, { "epoch": 2.11, "learning_rate": 4.732086737900692e-05, "loss": 0.0277, "step": 26820 }, { "epoch": 2.11, "learning_rate": 4.728158390949089e-05, "loss": 0.0296, "step": 26840 }, { "epoch": 2.11, "learning_rate": 4.7242300439974866e-05, "loss": 0.0343, "step": 26860 }, { "epoch": 2.11, "learning_rate": 4.720301697045883e-05, "loss": 0.0393, "step": 26880 }, { "epoch": 2.11, "learning_rate": 4.716373350094281e-05, "loss": 0.0311, "step": 26900 }, { "epoch": 2.12, "learning_rate": 4.7124450031426775e-05, "loss": 0.0324, "step": 26920 }, { "epoch": 2.12, "learning_rate": 4.708516656191075e-05, "loss": 0.0355, "step": 26940 }, { "epoch": 2.12, "learning_rate": 4.704588309239472e-05, "loss": 0.035, "step": 26960 }, { "epoch": 2.12, "learning_rate": 4.70065996228787e-05, "loss": 0.0427, "step": 26980 }, { "epoch": 2.12, "learning_rate": 4.6967316153362664e-05, "loss": 0.0334, "step": 27000 }, { "epoch": 2.12, "learning_rate": 4.692803268384664e-05, "loss": 0.053, "step": 27020 }, { "epoch": 2.12, "learning_rate": 4.688874921433061e-05, "loss": 0.0367, "step": 27040 }, { "epoch": 2.13, "learning_rate": 4.684946574481459e-05, "loss": 0.033, "step": 27060 }, { "epoch": 2.13, "learning_rate": 4.681018227529856e-05, "loss": 0.0412, "step": 27080 }, { "epoch": 2.13, "learning_rate": 4.677089880578253e-05, "loss": 0.0506, "step": 27100 }, { "epoch": 2.13, "learning_rate": 4.6731615336266496e-05, "loss": 0.0385, "step": 27120 }, { "epoch": 2.13, "learning_rate": 4.669233186675047e-05, "loss": 0.0295, "step": 27140 }, { "epoch": 2.13, "learning_rate": 4.6653048397234444e-05, "loss": 0.031, "step": 27160 }, { "epoch": 2.14, "learning_rate": 4.661376492771842e-05, "loss": 0.0383, "step": 27180 }, { "epoch": 2.14, "learning_rate": 4.657448145820239e-05, "loss": 0.0331, "step": 27200 }, { "epoch": 2.14, "learning_rate": 4.6535197988686366e-05, "loss": 0.0409, "step": 27220 }, { "epoch": 2.14, "learning_rate": 4.6495914519170334e-05, "loss": 0.0293, "step": 27240 }, { "epoch": 2.14, "learning_rate": 4.645663104965431e-05, "loss": 0.038, "step": 27260 }, { "epoch": 2.14, "learning_rate": 4.641734758013828e-05, "loss": 0.0511, "step": 27280 }, { "epoch": 2.14, "learning_rate": 4.6378064110622256e-05, "loss": 0.0241, "step": 27300 }, { "epoch": 2.15, "learning_rate": 4.6338780641106223e-05, "loss": 0.0326, "step": 27320 }, { "epoch": 2.15, "learning_rate": 4.62994971715902e-05, "loss": 0.0288, "step": 27340 }, { "epoch": 2.15, "learning_rate": 4.6260213702074165e-05, "loss": 0.024, "step": 27360 }, { "epoch": 2.15, "learning_rate": 4.622093023255814e-05, "loss": 0.0273, "step": 27380 }, { "epoch": 2.15, "learning_rate": 4.618164676304211e-05, "loss": 0.0295, "step": 27400 }, { "epoch": 2.15, "learning_rate": 4.614236329352609e-05, "loss": 0.0354, "step": 27420 }, { "epoch": 2.16, "learning_rate": 4.610307982401006e-05, "loss": 0.0292, "step": 27440 }, { "epoch": 2.16, "learning_rate": 4.606379635449403e-05, "loss": 0.0362, "step": 27460 }, { "epoch": 2.16, "learning_rate": 4.6024512884978e-05, "loss": 0.0347, "step": 27480 }, { "epoch": 2.16, "learning_rate": 4.598522941546198e-05, "loss": 0.0206, "step": 27500 }, { "epoch": 2.16, "learning_rate": 4.594594594594595e-05, "loss": 0.0234, "step": 27520 }, { "epoch": 2.16, "learning_rate": 4.590666247642992e-05, "loss": 0.0435, "step": 27540 }, { "epoch": 2.17, "learning_rate": 4.586737900691389e-05, "loss": 0.0311, "step": 27560 }, { "epoch": 2.17, "learning_rate": 4.582809553739787e-05, "loss": 0.0267, "step": 27580 }, { "epoch": 2.17, "learning_rate": 4.5788812067881834e-05, "loss": 0.0264, "step": 27600 }, { "epoch": 2.17, "learning_rate": 4.574952859836581e-05, "loss": 0.0439, "step": 27620 }, { "epoch": 2.17, "learning_rate": 4.571024512884978e-05, "loss": 0.0366, "step": 27640 }, { "epoch": 2.17, "learning_rate": 4.567096165933376e-05, "loss": 0.018, "step": 27660 }, { "epoch": 2.17, "learning_rate": 4.563167818981773e-05, "loss": 0.0469, "step": 27680 }, { "epoch": 2.18, "learning_rate": 4.55923947203017e-05, "loss": 0.0429, "step": 27700 }, { "epoch": 2.18, "learning_rate": 4.555311125078567e-05, "loss": 0.0303, "step": 27720 }, { "epoch": 2.18, "learning_rate": 4.551382778126964e-05, "loss": 0.0336, "step": 27740 }, { "epoch": 2.18, "learning_rate": 4.5474544311753614e-05, "loss": 0.0369, "step": 27760 }, { "epoch": 2.18, "learning_rate": 4.543526084223759e-05, "loss": 0.0437, "step": 27780 }, { "epoch": 2.18, "learning_rate": 4.539597737272156e-05, "loss": 0.026, "step": 27800 }, { "epoch": 2.19, "learning_rate": 4.535669390320553e-05, "loss": 0.048, "step": 27820 }, { "epoch": 2.19, "learning_rate": 4.5317410433689503e-05, "loss": 0.0405, "step": 27840 }, { "epoch": 2.19, "learning_rate": 4.527812696417348e-05, "loss": 0.0401, "step": 27860 }, { "epoch": 2.19, "learning_rate": 4.523884349465745e-05, "loss": 0.0256, "step": 27880 }, { "epoch": 2.19, "learning_rate": 4.5199560025141426e-05, "loss": 0.0523, "step": 27900 }, { "epoch": 2.19, "learning_rate": 4.51602765556254e-05, "loss": 0.0251, "step": 27920 }, { "epoch": 2.2, "learning_rate": 4.512099308610937e-05, "loss": 0.0237, "step": 27940 }, { "epoch": 2.2, "learning_rate": 4.5081709616593335e-05, "loss": 0.036, "step": 27960 }, { "epoch": 2.2, "learning_rate": 4.504242614707731e-05, "loss": 0.0279, "step": 27980 }, { "epoch": 2.2, "learning_rate": 4.500314267756128e-05, "loss": 0.026, "step": 28000 }, { "epoch": 2.2, "learning_rate": 4.496385920804526e-05, "loss": 0.0346, "step": 28020 }, { "epoch": 2.2, "learning_rate": 4.492457573852923e-05, "loss": 0.0231, "step": 28040 }, { "epoch": 2.2, "learning_rate": 4.48852922690132e-05, "loss": 0.0345, "step": 28060 }, { "epoch": 2.21, "learning_rate": 4.484600879949717e-05, "loss": 0.0295, "step": 28080 }, { "epoch": 2.21, "learning_rate": 4.480672532998115e-05, "loss": 0.033, "step": 28100 }, { "epoch": 2.21, "learning_rate": 4.476744186046512e-05, "loss": 0.0378, "step": 28120 }, { "epoch": 2.21, "learning_rate": 4.4728158390949095e-05, "loss": 0.0242, "step": 28140 }, { "epoch": 2.21, "learning_rate": 4.468887492143306e-05, "loss": 0.0338, "step": 28160 }, { "epoch": 2.21, "learning_rate": 4.464959145191704e-05, "loss": 0.0492, "step": 28180 }, { "epoch": 2.22, "learning_rate": 4.4610307982401004e-05, "loss": 0.0387, "step": 28200 }, { "epoch": 2.22, "learning_rate": 4.457102451288498e-05, "loss": 0.033, "step": 28220 }, { "epoch": 2.22, "learning_rate": 4.453174104336895e-05, "loss": 0.0354, "step": 28240 }, { "epoch": 2.22, "learning_rate": 4.4492457573852926e-05, "loss": 0.0257, "step": 28260 }, { "epoch": 2.22, "learning_rate": 4.44531741043369e-05, "loss": 0.0369, "step": 28280 }, { "epoch": 2.22, "learning_rate": 4.441389063482087e-05, "loss": 0.0252, "step": 28300 }, { "epoch": 2.23, "learning_rate": 4.437460716530484e-05, "loss": 0.0357, "step": 28320 }, { "epoch": 2.23, "learning_rate": 4.4335323695788816e-05, "loss": 0.0436, "step": 28340 }, { "epoch": 2.23, "learning_rate": 4.429604022627279e-05, "loss": 0.0299, "step": 28360 }, { "epoch": 2.23, "learning_rate": 4.425675675675676e-05, "loss": 0.0404, "step": 28380 }, { "epoch": 2.23, "learning_rate": 4.421747328724073e-05, "loss": 0.0363, "step": 28400 }, { "epoch": 2.23, "learning_rate": 4.41781898177247e-05, "loss": 0.0322, "step": 28420 }, { "epoch": 2.23, "learning_rate": 4.413890634820867e-05, "loss": 0.0328, "step": 28440 }, { "epoch": 2.24, "learning_rate": 4.409962287869265e-05, "loss": 0.0257, "step": 28460 }, { "epoch": 2.24, "learning_rate": 4.406033940917662e-05, "loss": 0.0429, "step": 28480 }, { "epoch": 2.24, "learning_rate": 4.4021055939660596e-05, "loss": 0.0295, "step": 28500 }, { "epoch": 2.24, "learning_rate": 4.398177247014457e-05, "loss": 0.0298, "step": 28520 }, { "epoch": 2.24, "learning_rate": 4.394248900062854e-05, "loss": 0.0389, "step": 28540 }, { "epoch": 2.24, "learning_rate": 4.390320553111251e-05, "loss": 0.0459, "step": 28560 }, { "epoch": 2.25, "learning_rate": 4.386392206159648e-05, "loss": 0.0369, "step": 28580 }, { "epoch": 2.25, "learning_rate": 4.382463859208045e-05, "loss": 0.0388, "step": 28600 }, { "epoch": 2.25, "learning_rate": 4.378535512256443e-05, "loss": 0.0313, "step": 28620 }, { "epoch": 2.25, "learning_rate": 4.37460716530484e-05, "loss": 0.0367, "step": 28640 }, { "epoch": 2.25, "learning_rate": 4.370678818353237e-05, "loss": 0.0418, "step": 28660 }, { "epoch": 2.25, "learning_rate": 4.366750471401634e-05, "loss": 0.0272, "step": 28680 }, { "epoch": 2.25, "learning_rate": 4.362822124450032e-05, "loss": 0.0405, "step": 28700 }, { "epoch": 2.26, "learning_rate": 4.358893777498429e-05, "loss": 0.0259, "step": 28720 }, { "epoch": 2.26, "learning_rate": 4.3549654305468265e-05, "loss": 0.0311, "step": 28740 }, { "epoch": 2.26, "learning_rate": 4.351037083595223e-05, "loss": 0.0364, "step": 28760 }, { "epoch": 2.26, "learning_rate": 4.3471087366436206e-05, "loss": 0.0358, "step": 28780 }, { "epoch": 2.26, "learning_rate": 4.3431803896920174e-05, "loss": 0.0492, "step": 28800 }, { "epoch": 2.26, "learning_rate": 4.339252042740415e-05, "loss": 0.029, "step": 28820 }, { "epoch": 2.27, "learning_rate": 4.335323695788812e-05, "loss": 0.0301, "step": 28840 }, { "epoch": 2.27, "learning_rate": 4.3313953488372096e-05, "loss": 0.0446, "step": 28860 }, { "epoch": 2.27, "learning_rate": 4.327467001885607e-05, "loss": 0.0262, "step": 28880 }, { "epoch": 2.27, "learning_rate": 4.323538654934004e-05, "loss": 0.0356, "step": 28900 }, { "epoch": 2.27, "learning_rate": 4.319610307982401e-05, "loss": 0.0216, "step": 28920 }, { "epoch": 2.27, "learning_rate": 4.3156819610307986e-05, "loss": 0.0323, "step": 28940 }, { "epoch": 2.28, "learning_rate": 4.311753614079196e-05, "loss": 0.0391, "step": 28960 }, { "epoch": 2.28, "learning_rate": 4.3078252671275934e-05, "loss": 0.0404, "step": 28980 }, { "epoch": 2.28, "learning_rate": 4.30389692017599e-05, "loss": 0.04, "step": 29000 }, { "epoch": 2.28, "learning_rate": 4.299968573224387e-05, "loss": 0.0216, "step": 29020 }, { "epoch": 2.28, "learning_rate": 4.296040226272784e-05, "loss": 0.0276, "step": 29040 }, { "epoch": 2.28, "learning_rate": 4.292111879321182e-05, "loss": 0.0413, "step": 29060 }, { "epoch": 2.28, "learning_rate": 4.288183532369579e-05, "loss": 0.025, "step": 29080 }, { "epoch": 2.29, "learning_rate": 4.2842551854179765e-05, "loss": 0.0256, "step": 29100 }, { "epoch": 2.29, "learning_rate": 4.280326838466373e-05, "loss": 0.0305, "step": 29120 }, { "epoch": 2.29, "learning_rate": 4.276398491514771e-05, "loss": 0.0248, "step": 29140 }, { "epoch": 2.29, "learning_rate": 4.272470144563168e-05, "loss": 0.0307, "step": 29160 }, { "epoch": 2.29, "learning_rate": 4.2685417976115655e-05, "loss": 0.0298, "step": 29180 }, { "epoch": 2.29, "learning_rate": 4.264613450659963e-05, "loss": 0.0223, "step": 29200 }, { "epoch": 2.3, "learning_rate": 4.26068510370836e-05, "loss": 0.0252, "step": 29220 }, { "epoch": 2.3, "learning_rate": 4.256756756756757e-05, "loss": 0.0288, "step": 29240 }, { "epoch": 2.3, "learning_rate": 4.252828409805154e-05, "loss": 0.0298, "step": 29260 }, { "epoch": 2.3, "learning_rate": 4.248900062853551e-05, "loss": 0.0423, "step": 29280 }, { "epoch": 2.3, "learning_rate": 4.2449717159019486e-05, "loss": 0.0256, "step": 29300 }, { "epoch": 2.3, "learning_rate": 4.241043368950346e-05, "loss": 0.026, "step": 29320 }, { "epoch": 2.31, "learning_rate": 4.2371150219987435e-05, "loss": 0.0368, "step": 29340 }, { "epoch": 2.31, "learning_rate": 4.23318667504714e-05, "loss": 0.0312, "step": 29360 }, { "epoch": 2.31, "learning_rate": 4.2292583280955376e-05, "loss": 0.0311, "step": 29380 }, { "epoch": 2.31, "learning_rate": 4.225329981143935e-05, "loss": 0.0303, "step": 29400 }, { "epoch": 2.31, "learning_rate": 4.221401634192332e-05, "loss": 0.0382, "step": 29420 }, { "epoch": 2.31, "learning_rate": 4.217473287240729e-05, "loss": 0.0267, "step": 29440 }, { "epoch": 2.31, "learning_rate": 4.2135449402891266e-05, "loss": 0.0327, "step": 29460 }, { "epoch": 2.32, "learning_rate": 4.209616593337524e-05, "loss": 0.0352, "step": 29480 }, { "epoch": 2.32, "learning_rate": 4.205688246385921e-05, "loss": 0.0345, "step": 29500 }, { "epoch": 2.32, "learning_rate": 4.201759899434318e-05, "loss": 0.0287, "step": 29520 }, { "epoch": 2.32, "learning_rate": 4.1978315524827156e-05, "loss": 0.0358, "step": 29540 }, { "epoch": 2.32, "learning_rate": 4.193903205531113e-05, "loss": 0.0385, "step": 29560 }, { "epoch": 2.32, "learning_rate": 4.1899748585795104e-05, "loss": 0.0164, "step": 29580 }, { "epoch": 2.33, "learning_rate": 4.186046511627907e-05, "loss": 0.0389, "step": 29600 }, { "epoch": 2.33, "learning_rate": 4.1821181646763045e-05, "loss": 0.0386, "step": 29620 }, { "epoch": 2.33, "learning_rate": 4.178189817724701e-05, "loss": 0.0402, "step": 29640 }, { "epoch": 2.33, "learning_rate": 4.174261470773099e-05, "loss": 0.0331, "step": 29660 }, { "epoch": 2.33, "learning_rate": 4.170333123821496e-05, "loss": 0.0356, "step": 29680 }, { "epoch": 2.33, "learning_rate": 4.1664047768698935e-05, "loss": 0.0291, "step": 29700 }, { "epoch": 2.34, "learning_rate": 4.16247642991829e-05, "loss": 0.0178, "step": 29720 }, { "epoch": 2.34, "learning_rate": 4.158548082966688e-05, "loss": 0.0348, "step": 29740 }, { "epoch": 2.34, "learning_rate": 4.154619736015085e-05, "loss": 0.0391, "step": 29760 }, { "epoch": 2.34, "learning_rate": 4.1506913890634825e-05, "loss": 0.0348, "step": 29780 }, { "epoch": 2.34, "learning_rate": 4.14676304211188e-05, "loss": 0.0323, "step": 29800 }, { "epoch": 2.34, "learning_rate": 4.142834695160277e-05, "loss": 0.0366, "step": 29820 }, { "epoch": 2.34, "learning_rate": 4.138906348208674e-05, "loss": 0.0394, "step": 29840 }, { "epoch": 2.35, "learning_rate": 4.134978001257071e-05, "loss": 0.0326, "step": 29860 }, { "epoch": 2.35, "learning_rate": 4.131049654305468e-05, "loss": 0.0264, "step": 29880 }, { "epoch": 2.35, "learning_rate": 4.1271213073538656e-05, "loss": 0.0277, "step": 29900 }, { "epoch": 2.35, "learning_rate": 4.123192960402263e-05, "loss": 0.049, "step": 29920 }, { "epoch": 2.35, "learning_rate": 4.1192646134506604e-05, "loss": 0.0341, "step": 29940 }, { "epoch": 2.35, "learning_rate": 4.115336266499057e-05, "loss": 0.0331, "step": 29960 }, { "epoch": 2.36, "learning_rate": 4.1114079195474546e-05, "loss": 0.0187, "step": 29980 }, { "epoch": 2.36, "learning_rate": 4.107479572595852e-05, "loss": 0.0345, "step": 30000 }, { "epoch": 2.36, "eval_loss": 0.25138914585113525, "eval_matthews_correlation": 0.4717224984629874, "eval_runtime": 1011.3645, "eval_samples_per_second": 112.754, "eval_steps_per_second": 14.095, "step": 30000 }, { "epoch": 2.36, "learning_rate": 4.1035512256442494e-05, "loss": 0.0285, "step": 30020 }, { "epoch": 2.36, "learning_rate": 4.099622878692647e-05, "loss": 0.0278, "step": 30040 }, { "epoch": 2.36, "learning_rate": 4.0956945317410436e-05, "loss": 0.0284, "step": 30060 }, { "epoch": 2.36, "learning_rate": 4.09176618478944e-05, "loss": 0.0279, "step": 30080 }, { "epoch": 2.36, "learning_rate": 4.087837837837838e-05, "loss": 0.0374, "step": 30100 }, { "epoch": 2.37, "learning_rate": 4.083909490886235e-05, "loss": 0.0202, "step": 30120 }, { "epoch": 2.37, "learning_rate": 4.0799811439346325e-05, "loss": 0.0229, "step": 30140 }, { "epoch": 2.37, "learning_rate": 4.07605279698303e-05, "loss": 0.0308, "step": 30160 }, { "epoch": 2.37, "learning_rate": 4.0721244500314274e-05, "loss": 0.0352, "step": 30180 }, { "epoch": 2.37, "learning_rate": 4.068196103079824e-05, "loss": 0.0275, "step": 30200 }, { "epoch": 2.37, "learning_rate": 4.0642677561282215e-05, "loss": 0.0323, "step": 30220 }, { "epoch": 2.38, "learning_rate": 4.060339409176619e-05, "loss": 0.027, "step": 30240 }, { "epoch": 2.38, "learning_rate": 4.056411062225016e-05, "loss": 0.0357, "step": 30260 }, { "epoch": 2.38, "learning_rate": 4.052482715273413e-05, "loss": 0.0264, "step": 30280 }, { "epoch": 2.38, "learning_rate": 4.0485543683218105e-05, "loss": 0.0247, "step": 30300 }, { "epoch": 2.38, "learning_rate": 4.044626021370207e-05, "loss": 0.0175, "step": 30320 }, { "epoch": 2.38, "learning_rate": 4.0406976744186046e-05, "loss": 0.0287, "step": 30340 }, { "epoch": 2.39, "learning_rate": 4.036769327467002e-05, "loss": 0.0197, "step": 30360 }, { "epoch": 2.39, "learning_rate": 4.0328409805153995e-05, "loss": 0.029, "step": 30380 }, { "epoch": 2.39, "learning_rate": 4.028912633563797e-05, "loss": 0.0295, "step": 30400 }, { "epoch": 2.39, "learning_rate": 4.0249842866121936e-05, "loss": 0.0268, "step": 30420 }, { "epoch": 2.39, "learning_rate": 4.021055939660591e-05, "loss": 0.0176, "step": 30440 }, { "epoch": 2.39, "learning_rate": 4.017127592708988e-05, "loss": 0.0196, "step": 30460 }, { "epoch": 2.39, "learning_rate": 4.013199245757385e-05, "loss": 0.025, "step": 30480 }, { "epoch": 2.4, "learning_rate": 4.0092708988057826e-05, "loss": 0.0247, "step": 30500 }, { "epoch": 2.4, "learning_rate": 4.00534255185418e-05, "loss": 0.0255, "step": 30520 }, { "epoch": 2.4, "learning_rate": 4.0014142049025774e-05, "loss": 0.0283, "step": 30540 }, { "epoch": 2.4, "learning_rate": 3.997485857950974e-05, "loss": 0.0195, "step": 30560 }, { "epoch": 2.4, "learning_rate": 3.9935575109993716e-05, "loss": 0.0129, "step": 30580 }, { "epoch": 2.4, "learning_rate": 3.989629164047769e-05, "loss": 0.0204, "step": 30600 }, { "epoch": 2.41, "learning_rate": 3.9857008170961664e-05, "loss": 0.03, "step": 30620 }, { "epoch": 2.41, "learning_rate": 3.981772470144564e-05, "loss": 0.02, "step": 30640 }, { "epoch": 2.41, "learning_rate": 3.9778441231929605e-05, "loss": 0.026, "step": 30660 }, { "epoch": 2.41, "learning_rate": 3.973915776241357e-05, "loss": 0.026, "step": 30680 }, { "epoch": 2.41, "learning_rate": 3.969987429289755e-05, "loss": 0.0332, "step": 30700 }, { "epoch": 2.41, "learning_rate": 3.966059082338152e-05, "loss": 0.0261, "step": 30720 }, { "epoch": 2.42, "learning_rate": 3.9621307353865495e-05, "loss": 0.0227, "step": 30740 }, { "epoch": 2.42, "learning_rate": 3.958202388434947e-05, "loss": 0.0269, "step": 30760 }, { "epoch": 2.42, "learning_rate": 3.9542740414833443e-05, "loss": 0.0201, "step": 30780 }, { "epoch": 2.42, "learning_rate": 3.950345694531741e-05, "loss": 0.0301, "step": 30800 }, { "epoch": 2.42, "learning_rate": 3.9464173475801385e-05, "loss": 0.0299, "step": 30820 }, { "epoch": 2.42, "learning_rate": 3.942489000628536e-05, "loss": 0.0261, "step": 30840 }, { "epoch": 2.42, "learning_rate": 3.938560653676933e-05, "loss": 0.0275, "step": 30860 }, { "epoch": 2.43, "learning_rate": 3.93463230672533e-05, "loss": 0.0408, "step": 30880 }, { "epoch": 2.43, "learning_rate": 3.9307039597737275e-05, "loss": 0.0241, "step": 30900 }, { "epoch": 2.43, "learning_rate": 3.926775612822124e-05, "loss": 0.0204, "step": 30920 }, { "epoch": 2.43, "learning_rate": 3.9228472658705216e-05, "loss": 0.0235, "step": 30940 }, { "epoch": 2.43, "learning_rate": 3.918918918918919e-05, "loss": 0.0256, "step": 30960 }, { "epoch": 2.43, "learning_rate": 3.9149905719673164e-05, "loss": 0.0298, "step": 30980 }, { "epoch": 2.44, "learning_rate": 3.911062225015714e-05, "loss": 0.0195, "step": 31000 }, { "epoch": 2.44, "learning_rate": 3.9071338780641106e-05, "loss": 0.0322, "step": 31020 }, { "epoch": 2.44, "learning_rate": 3.903205531112508e-05, "loss": 0.0273, "step": 31040 }, { "epoch": 2.44, "learning_rate": 3.8992771841609054e-05, "loss": 0.0371, "step": 31060 }, { "epoch": 2.44, "learning_rate": 3.895348837209303e-05, "loss": 0.0277, "step": 31080 }, { "epoch": 2.44, "learning_rate": 3.8914204902576996e-05, "loss": 0.0303, "step": 31100 }, { "epoch": 2.45, "learning_rate": 3.887492143306097e-05, "loss": 0.0264, "step": 31120 }, { "epoch": 2.45, "learning_rate": 3.8835637963544944e-05, "loss": 0.0203, "step": 31140 }, { "epoch": 2.45, "learning_rate": 3.879635449402891e-05, "loss": 0.0261, "step": 31160 }, { "epoch": 2.45, "learning_rate": 3.8757071024512885e-05, "loss": 0.0264, "step": 31180 }, { "epoch": 2.45, "learning_rate": 3.871778755499686e-05, "loss": 0.0256, "step": 31200 }, { "epoch": 2.45, "learning_rate": 3.8678504085480834e-05, "loss": 0.0227, "step": 31220 }, { "epoch": 2.45, "learning_rate": 3.863922061596481e-05, "loss": 0.0311, "step": 31240 }, { "epoch": 2.46, "learning_rate": 3.8599937146448775e-05, "loss": 0.0349, "step": 31260 }, { "epoch": 2.46, "learning_rate": 3.856065367693275e-05, "loss": 0.0278, "step": 31280 }, { "epoch": 2.46, "learning_rate": 3.852137020741672e-05, "loss": 0.0307, "step": 31300 }, { "epoch": 2.46, "learning_rate": 3.848208673790069e-05, "loss": 0.0301, "step": 31320 }, { "epoch": 2.46, "learning_rate": 3.8442803268384665e-05, "loss": 0.035, "step": 31340 }, { "epoch": 2.46, "learning_rate": 3.840351979886864e-05, "loss": 0.0322, "step": 31360 }, { "epoch": 2.47, "learning_rate": 3.8364236329352606e-05, "loss": 0.0319, "step": 31380 }, { "epoch": 2.47, "learning_rate": 3.832495285983658e-05, "loss": 0.0225, "step": 31400 }, { "epoch": 2.47, "learning_rate": 3.8285669390320555e-05, "loss": 0.0374, "step": 31420 }, { "epoch": 2.47, "learning_rate": 3.824638592080453e-05, "loss": 0.0232, "step": 31440 }, { "epoch": 2.47, "learning_rate": 3.82071024512885e-05, "loss": 0.0275, "step": 31460 }, { "epoch": 2.47, "learning_rate": 3.816781898177248e-05, "loss": 0.0288, "step": 31480 }, { "epoch": 2.47, "learning_rate": 3.8128535512256444e-05, "loss": 0.0331, "step": 31500 }, { "epoch": 2.48, "learning_rate": 3.808925204274041e-05, "loss": 0.0263, "step": 31520 }, { "epoch": 2.48, "learning_rate": 3.8049968573224386e-05, "loss": 0.0236, "step": 31540 }, { "epoch": 2.48, "learning_rate": 3.801068510370836e-05, "loss": 0.0264, "step": 31560 }, { "epoch": 2.48, "learning_rate": 3.7971401634192334e-05, "loss": 0.026, "step": 31580 }, { "epoch": 2.48, "learning_rate": 3.793211816467631e-05, "loss": 0.0224, "step": 31600 }, { "epoch": 2.48, "learning_rate": 3.7892834695160276e-05, "loss": 0.0224, "step": 31620 }, { "epoch": 2.49, "learning_rate": 3.785355122564425e-05, "loss": 0.0163, "step": 31640 }, { "epoch": 2.49, "learning_rate": 3.7814267756128224e-05, "loss": 0.0199, "step": 31660 }, { "epoch": 2.49, "learning_rate": 3.77749842866122e-05, "loss": 0.0255, "step": 31680 }, { "epoch": 2.49, "learning_rate": 3.773570081709617e-05, "loss": 0.0287, "step": 31700 }, { "epoch": 2.49, "learning_rate": 3.769641734758014e-05, "loss": 0.039, "step": 31720 }, { "epoch": 2.49, "learning_rate": 3.765713387806411e-05, "loss": 0.0363, "step": 31740 }, { "epoch": 2.5, "learning_rate": 3.761785040854808e-05, "loss": 0.0224, "step": 31760 }, { "epoch": 2.5, "learning_rate": 3.7578566939032055e-05, "loss": 0.0289, "step": 31780 }, { "epoch": 2.5, "learning_rate": 3.753928346951603e-05, "loss": 0.0212, "step": 31800 }, { "epoch": 2.5, "learning_rate": 3.7500000000000003e-05, "loss": 0.0257, "step": 31820 }, { "epoch": 2.5, "learning_rate": 3.746071653048398e-05, "loss": 0.0212, "step": 31840 }, { "epoch": 2.5, "learning_rate": 3.7421433060967945e-05, "loss": 0.0124, "step": 31860 }, { "epoch": 2.5, "learning_rate": 3.738214959145192e-05, "loss": 0.0299, "step": 31880 }, { "epoch": 2.51, "learning_rate": 3.734286612193589e-05, "loss": 0.0337, "step": 31900 }, { "epoch": 2.51, "learning_rate": 3.730358265241987e-05, "loss": 0.0388, "step": 31920 }, { "epoch": 2.51, "learning_rate": 3.7264299182903835e-05, "loss": 0.0165, "step": 31940 }, { "epoch": 2.51, "learning_rate": 3.722501571338781e-05, "loss": 0.0286, "step": 31960 }, { "epoch": 2.51, "learning_rate": 3.7185732243871776e-05, "loss": 0.021, "step": 31980 }, { "epoch": 2.51, "learning_rate": 3.714644877435575e-05, "loss": 0.0419, "step": 32000 }, { "epoch": 2.52, "learning_rate": 3.7107165304839724e-05, "loss": 0.0231, "step": 32020 }, { "epoch": 2.52, "learning_rate": 3.70678818353237e-05, "loss": 0.0286, "step": 32040 }, { "epoch": 2.52, "learning_rate": 3.702859836580767e-05, "loss": 0.0298, "step": 32060 }, { "epoch": 2.52, "learning_rate": 3.698931489629165e-05, "loss": 0.0248, "step": 32080 }, { "epoch": 2.52, "learning_rate": 3.6950031426775614e-05, "loss": 0.0258, "step": 32100 }, { "epoch": 2.52, "learning_rate": 3.691074795725959e-05, "loss": 0.0417, "step": 32120 }, { "epoch": 2.53, "learning_rate": 3.6871464487743556e-05, "loss": 0.0231, "step": 32140 }, { "epoch": 2.53, "learning_rate": 3.683218101822753e-05, "loss": 0.0374, "step": 32160 }, { "epoch": 2.53, "learning_rate": 3.6792897548711504e-05, "loss": 0.0365, "step": 32180 }, { "epoch": 2.53, "learning_rate": 3.675361407919548e-05, "loss": 0.0345, "step": 32200 }, { "epoch": 2.53, "learning_rate": 3.6714330609679445e-05, "loss": 0.03, "step": 32220 }, { "epoch": 2.53, "learning_rate": 3.667504714016342e-05, "loss": 0.0403, "step": 32240 }, { "epoch": 2.53, "learning_rate": 3.6635763670647394e-05, "loss": 0.0242, "step": 32260 }, { "epoch": 2.54, "learning_rate": 3.659648020113137e-05, "loss": 0.0239, "step": 32280 }, { "epoch": 2.54, "learning_rate": 3.655719673161534e-05, "loss": 0.0287, "step": 32300 }, { "epoch": 2.54, "learning_rate": 3.651791326209931e-05, "loss": 0.0277, "step": 32320 }, { "epoch": 2.54, "learning_rate": 3.6478629792583284e-05, "loss": 0.0323, "step": 32340 }, { "epoch": 2.54, "learning_rate": 3.643934632306725e-05, "loss": 0.0233, "step": 32360 }, { "epoch": 2.54, "learning_rate": 3.6400062853551225e-05, "loss": 0.0138, "step": 32380 }, { "epoch": 2.55, "learning_rate": 3.63607793840352e-05, "loss": 0.0372, "step": 32400 }, { "epoch": 2.55, "learning_rate": 3.632149591451917e-05, "loss": 0.0245, "step": 32420 }, { "epoch": 2.55, "learning_rate": 3.628221244500315e-05, "loss": 0.0321, "step": 32440 }, { "epoch": 2.55, "learning_rate": 3.6242928975487115e-05, "loss": 0.0376, "step": 32460 }, { "epoch": 2.55, "learning_rate": 3.620364550597109e-05, "loss": 0.028, "step": 32480 }, { "epoch": 2.55, "learning_rate": 3.616436203645506e-05, "loss": 0.0522, "step": 32500 }, { "epoch": 2.55, "learning_rate": 3.612507856693904e-05, "loss": 0.0428, "step": 32520 }, { "epoch": 2.56, "learning_rate": 3.608579509742301e-05, "loss": 0.0256, "step": 32540 }, { "epoch": 2.56, "learning_rate": 3.604651162790698e-05, "loss": 0.0326, "step": 32560 }, { "epoch": 2.56, "learning_rate": 3.6007228158390946e-05, "loss": 0.0279, "step": 32580 }, { "epoch": 2.56, "learning_rate": 3.596794468887492e-05, "loss": 0.0265, "step": 32600 }, { "epoch": 2.56, "learning_rate": 3.5928661219358894e-05, "loss": 0.0345, "step": 32620 }, { "epoch": 2.56, "learning_rate": 3.588937774984287e-05, "loss": 0.0348, "step": 32640 }, { "epoch": 2.57, "learning_rate": 3.585009428032684e-05, "loss": 0.0347, "step": 32660 }, { "epoch": 2.57, "learning_rate": 3.581081081081081e-05, "loss": 0.036, "step": 32680 }, { "epoch": 2.57, "learning_rate": 3.5771527341294784e-05, "loss": 0.03, "step": 32700 }, { "epoch": 2.57, "learning_rate": 3.573224387177876e-05, "loss": 0.017, "step": 32720 }, { "epoch": 2.57, "learning_rate": 3.569296040226273e-05, "loss": 0.0228, "step": 32740 }, { "epoch": 2.57, "learning_rate": 3.5653676932746706e-05, "loss": 0.0367, "step": 32760 }, { "epoch": 2.58, "learning_rate": 3.5614393463230674e-05, "loss": 0.0265, "step": 32780 }, { "epoch": 2.58, "learning_rate": 3.557510999371465e-05, "loss": 0.0299, "step": 32800 }, { "epoch": 2.58, "learning_rate": 3.5535826524198615e-05, "loss": 0.0226, "step": 32820 }, { "epoch": 2.58, "learning_rate": 3.549654305468259e-05, "loss": 0.0255, "step": 32840 }, { "epoch": 2.58, "learning_rate": 3.5457259585166564e-05, "loss": 0.0235, "step": 32860 }, { "epoch": 2.58, "learning_rate": 3.541797611565054e-05, "loss": 0.0267, "step": 32880 }, { "epoch": 2.58, "learning_rate": 3.537869264613451e-05, "loss": 0.0289, "step": 32900 }, { "epoch": 2.59, "learning_rate": 3.533940917661848e-05, "loss": 0.0151, "step": 32920 }, { "epoch": 2.59, "learning_rate": 3.530012570710245e-05, "loss": 0.0246, "step": 32940 }, { "epoch": 2.59, "learning_rate": 3.526084223758643e-05, "loss": 0.0323, "step": 32960 }, { "epoch": 2.59, "learning_rate": 3.5221558768070395e-05, "loss": 0.0256, "step": 32980 }, { "epoch": 2.59, "learning_rate": 3.518227529855437e-05, "loss": 0.0229, "step": 33000 }, { "epoch": 2.59, "learning_rate": 3.514299182903834e-05, "loss": 0.0342, "step": 33020 }, { "epoch": 2.6, "learning_rate": 3.510370835952231e-05, "loss": 0.025, "step": 33040 }, { "epoch": 2.6, "learning_rate": 3.5064424890006285e-05, "loss": 0.0262, "step": 33060 }, { "epoch": 2.6, "learning_rate": 3.502514142049026e-05, "loss": 0.0233, "step": 33080 }, { "epoch": 2.6, "learning_rate": 3.498585795097423e-05, "loss": 0.0179, "step": 33100 }, { "epoch": 2.6, "learning_rate": 3.494657448145821e-05, "loss": 0.022, "step": 33120 }, { "epoch": 2.6, "learning_rate": 3.490729101194218e-05, "loss": 0.0168, "step": 33140 }, { "epoch": 2.61, "learning_rate": 3.486800754242615e-05, "loss": 0.0274, "step": 33160 }, { "epoch": 2.61, "learning_rate": 3.482872407291012e-05, "loss": 0.0259, "step": 33180 }, { "epoch": 2.61, "learning_rate": 3.478944060339409e-05, "loss": 0.0247, "step": 33200 }, { "epoch": 2.61, "learning_rate": 3.4750157133878064e-05, "loss": 0.0356, "step": 33220 }, { "epoch": 2.61, "learning_rate": 3.471087366436204e-05, "loss": 0.0344, "step": 33240 }, { "epoch": 2.61, "learning_rate": 3.467159019484601e-05, "loss": 0.0299, "step": 33260 }, { "epoch": 2.61, "learning_rate": 3.463230672532998e-05, "loss": 0.0146, "step": 33280 }, { "epoch": 2.62, "learning_rate": 3.4593023255813954e-05, "loss": 0.0257, "step": 33300 }, { "epoch": 2.62, "learning_rate": 3.455373978629793e-05, "loss": 0.0156, "step": 33320 }, { "epoch": 2.62, "learning_rate": 3.45144563167819e-05, "loss": 0.0356, "step": 33340 }, { "epoch": 2.62, "learning_rate": 3.4475172847265876e-05, "loss": 0.0275, "step": 33360 }, { "epoch": 2.62, "learning_rate": 3.443588937774985e-05, "loss": 0.0216, "step": 33380 }, { "epoch": 2.62, "learning_rate": 3.439660590823382e-05, "loss": 0.0283, "step": 33400 }, { "epoch": 2.63, "learning_rate": 3.4357322438717785e-05, "loss": 0.0166, "step": 33420 }, { "epoch": 2.63, "learning_rate": 3.431803896920176e-05, "loss": 0.0401, "step": 33440 }, { "epoch": 2.63, "learning_rate": 3.427875549968573e-05, "loss": 0.0199, "step": 33460 }, { "epoch": 2.63, "learning_rate": 3.423947203016971e-05, "loss": 0.0263, "step": 33480 }, { "epoch": 2.63, "learning_rate": 3.420018856065368e-05, "loss": 0.04, "step": 33500 }, { "epoch": 2.63, "learning_rate": 3.416090509113765e-05, "loss": 0.0304, "step": 33520 }, { "epoch": 2.64, "learning_rate": 3.412162162162162e-05, "loss": 0.0265, "step": 33540 }, { "epoch": 2.64, "learning_rate": 3.40823381521056e-05, "loss": 0.0266, "step": 33560 }, { "epoch": 2.64, "learning_rate": 3.404305468258957e-05, "loss": 0.0327, "step": 33580 }, { "epoch": 2.64, "learning_rate": 3.400377121307354e-05, "loss": 0.0305, "step": 33600 }, { "epoch": 2.64, "learning_rate": 3.396448774355751e-05, "loss": 0.0149, "step": 33620 }, { "epoch": 2.64, "learning_rate": 3.392520427404148e-05, "loss": 0.0307, "step": 33640 }, { "epoch": 2.64, "learning_rate": 3.3885920804525454e-05, "loss": 0.0231, "step": 33660 }, { "epoch": 2.65, "learning_rate": 3.384663733500943e-05, "loss": 0.0424, "step": 33680 }, { "epoch": 2.65, "learning_rate": 3.38073538654934e-05, "loss": 0.0267, "step": 33700 }, { "epoch": 2.65, "learning_rate": 3.376807039597738e-05, "loss": 0.0214, "step": 33720 }, { "epoch": 2.65, "learning_rate": 3.372878692646135e-05, "loss": 0.034, "step": 33740 }, { "epoch": 2.65, "learning_rate": 3.368950345694532e-05, "loss": 0.021, "step": 33760 }, { "epoch": 2.65, "learning_rate": 3.365021998742929e-05, "loss": 0.0164, "step": 33780 }, { "epoch": 2.66, "learning_rate": 3.3610936517913266e-05, "loss": 0.0305, "step": 33800 }, { "epoch": 2.66, "learning_rate": 3.3571653048397234e-05, "loss": 0.0367, "step": 33820 }, { "epoch": 2.66, "learning_rate": 3.353236957888121e-05, "loss": 0.0367, "step": 33840 }, { "epoch": 2.66, "learning_rate": 3.349308610936518e-05, "loss": 0.0265, "step": 33860 }, { "epoch": 2.66, "learning_rate": 3.345380263984915e-05, "loss": 0.032, "step": 33880 }, { "epoch": 2.66, "learning_rate": 3.3414519170333124e-05, "loss": 0.0162, "step": 33900 }, { "epoch": 2.66, "learning_rate": 3.33752357008171e-05, "loss": 0.0375, "step": 33920 }, { "epoch": 2.67, "learning_rate": 3.333595223130107e-05, "loss": 0.0363, "step": 33940 }, { "epoch": 2.67, "learning_rate": 3.3296668761785046e-05, "loss": 0.0274, "step": 33960 }, { "epoch": 2.67, "learning_rate": 3.325738529226901e-05, "loss": 0.041, "step": 33980 }, { "epoch": 2.67, "learning_rate": 3.321810182275299e-05, "loss": 0.0386, "step": 34000 }, { "epoch": 2.67, "learning_rate": 3.3178818353236955e-05, "loss": 0.0207, "step": 34020 }, { "epoch": 2.67, "learning_rate": 3.313953488372093e-05, "loss": 0.0161, "step": 34040 }, { "epoch": 2.68, "learning_rate": 3.31002514142049e-05, "loss": 0.0265, "step": 34060 }, { "epoch": 2.68, "learning_rate": 3.306096794468888e-05, "loss": 0.0248, "step": 34080 }, { "epoch": 2.68, "learning_rate": 3.302168447517285e-05, "loss": 0.0244, "step": 34100 }, { "epoch": 2.68, "learning_rate": 3.298240100565682e-05, "loss": 0.0254, "step": 34120 }, { "epoch": 2.68, "learning_rate": 3.294311753614079e-05, "loss": 0.0248, "step": 34140 }, { "epoch": 2.68, "learning_rate": 3.290383406662477e-05, "loss": 0.0172, "step": 34160 }, { "epoch": 2.69, "learning_rate": 3.286455059710874e-05, "loss": 0.0209, "step": 34180 }, { "epoch": 2.69, "learning_rate": 3.2825267127592715e-05, "loss": 0.0206, "step": 34200 }, { "epoch": 2.69, "learning_rate": 3.278598365807668e-05, "loss": 0.0406, "step": 34220 }, { "epoch": 2.69, "learning_rate": 3.274670018856065e-05, "loss": 0.021, "step": 34240 }, { "epoch": 2.69, "learning_rate": 3.2707416719044624e-05, "loss": 0.0241, "step": 34260 }, { "epoch": 2.69, "learning_rate": 3.26681332495286e-05, "loss": 0.0283, "step": 34280 }, { "epoch": 2.69, "learning_rate": 3.262884978001257e-05, "loss": 0.0216, "step": 34300 }, { "epoch": 2.7, "learning_rate": 3.2589566310496546e-05, "loss": 0.0252, "step": 34320 }, { "epoch": 2.7, "learning_rate": 3.2550282840980514e-05, "loss": 0.0183, "step": 34340 }, { "epoch": 2.7, "learning_rate": 3.251099937146449e-05, "loss": 0.0456, "step": 34360 }, { "epoch": 2.7, "learning_rate": 3.247171590194846e-05, "loss": 0.0332, "step": 34380 }, { "epoch": 2.7, "learning_rate": 3.2432432432432436e-05, "loss": 0.0266, "step": 34400 }, { "epoch": 2.7, "learning_rate": 3.239314896291641e-05, "loss": 0.0181, "step": 34420 }, { "epoch": 2.71, "learning_rate": 3.235386549340038e-05, "loss": 0.0291, "step": 34440 }, { "epoch": 2.71, "learning_rate": 3.231458202388435e-05, "loss": 0.0203, "step": 34460 }, { "epoch": 2.71, "learning_rate": 3.227529855436832e-05, "loss": 0.0431, "step": 34480 }, { "epoch": 2.71, "learning_rate": 3.223601508485229e-05, "loss": 0.0266, "step": 34500 }, { "epoch": 2.71, "learning_rate": 3.219673161533627e-05, "loss": 0.0262, "step": 34520 }, { "epoch": 2.71, "learning_rate": 3.215744814582024e-05, "loss": 0.0306, "step": 34540 }, { "epoch": 2.72, "learning_rate": 3.2118164676304216e-05, "loss": 0.0238, "step": 34560 }, { "epoch": 2.72, "learning_rate": 3.207888120678818e-05, "loss": 0.0384, "step": 34580 }, { "epoch": 2.72, "learning_rate": 3.203959773727216e-05, "loss": 0.026, "step": 34600 }, { "epoch": 2.72, "learning_rate": 3.200031426775613e-05, "loss": 0.0239, "step": 34620 }, { "epoch": 2.72, "learning_rate": 3.1961030798240105e-05, "loss": 0.034, "step": 34640 }, { "epoch": 2.72, "learning_rate": 3.192174732872407e-05, "loss": 0.0265, "step": 34660 }, { "epoch": 2.72, "learning_rate": 3.188246385920805e-05, "loss": 0.0281, "step": 34680 }, { "epoch": 2.73, "learning_rate": 3.184318038969202e-05, "loss": 0.0296, "step": 34700 }, { "epoch": 2.73, "learning_rate": 3.180389692017599e-05, "loss": 0.03, "step": 34720 }, { "epoch": 2.73, "learning_rate": 3.176461345065996e-05, "loss": 0.0513, "step": 34740 }, { "epoch": 2.73, "learning_rate": 3.172532998114394e-05, "loss": 0.0259, "step": 34760 }, { "epoch": 2.73, "learning_rate": 3.168604651162791e-05, "loss": 0.0335, "step": 34780 }, { "epoch": 2.73, "learning_rate": 3.1646763042111885e-05, "loss": 0.0208, "step": 34800 }, { "epoch": 2.74, "learning_rate": 3.160747957259585e-05, "loss": 0.0341, "step": 34820 }, { "epoch": 2.74, "learning_rate": 3.1568196103079826e-05, "loss": 0.0297, "step": 34840 }, { "epoch": 2.74, "learning_rate": 3.1528912633563794e-05, "loss": 0.028, "step": 34860 }, { "epoch": 2.74, "learning_rate": 3.148962916404777e-05, "loss": 0.0273, "step": 34880 }, { "epoch": 2.74, "learning_rate": 3.145034569453174e-05, "loss": 0.0259, "step": 34900 }, { "epoch": 2.74, "learning_rate": 3.1411062225015716e-05, "loss": 0.0215, "step": 34920 }, { "epoch": 2.75, "learning_rate": 3.1371778755499684e-05, "loss": 0.013, "step": 34940 }, { "epoch": 2.75, "learning_rate": 3.133249528598366e-05, "loss": 0.0179, "step": 34960 }, { "epoch": 2.75, "learning_rate": 3.129321181646763e-05, "loss": 0.0226, "step": 34980 }, { "epoch": 2.75, "learning_rate": 3.1253928346951606e-05, "loss": 0.025, "step": 35000 }, { "epoch": 2.75, "learning_rate": 3.121464487743558e-05, "loss": 0.0119, "step": 35020 }, { "epoch": 2.75, "learning_rate": 3.1175361407919554e-05, "loss": 0.0281, "step": 35040 }, { "epoch": 2.75, "learning_rate": 3.113607793840352e-05, "loss": 0.0247, "step": 35060 }, { "epoch": 2.76, "learning_rate": 3.109679446888749e-05, "loss": 0.0152, "step": 35080 }, { "epoch": 2.76, "learning_rate": 3.105751099937146e-05, "loss": 0.0308, "step": 35100 }, { "epoch": 2.76, "learning_rate": 3.101822752985544e-05, "loss": 0.016, "step": 35120 }, { "epoch": 2.76, "learning_rate": 3.097894406033941e-05, "loss": 0.0327, "step": 35140 }, { "epoch": 2.76, "learning_rate": 3.0939660590823385e-05, "loss": 0.0252, "step": 35160 }, { "epoch": 2.76, "learning_rate": 3.090037712130735e-05, "loss": 0.0291, "step": 35180 }, { "epoch": 2.77, "learning_rate": 3.086109365179133e-05, "loss": 0.0443, "step": 35200 }, { "epoch": 2.77, "learning_rate": 3.08218101822753e-05, "loss": 0.0281, "step": 35220 }, { "epoch": 2.77, "learning_rate": 3.0782526712759275e-05, "loss": 0.0362, "step": 35240 }, { "epoch": 2.77, "learning_rate": 3.074324324324325e-05, "loss": 0.0272, "step": 35260 }, { "epoch": 2.77, "learning_rate": 3.070395977372722e-05, "loss": 0.0304, "step": 35280 }, { "epoch": 2.77, "learning_rate": 3.0664676304211184e-05, "loss": 0.0407, "step": 35300 }, { "epoch": 2.77, "learning_rate": 3.062539283469516e-05, "loss": 0.0262, "step": 35320 }, { "epoch": 2.78, "learning_rate": 3.058610936517913e-05, "loss": 0.0307, "step": 35340 }, { "epoch": 2.78, "learning_rate": 3.0546825895663106e-05, "loss": 0.0339, "step": 35360 }, { "epoch": 2.78, "learning_rate": 3.0507542426147077e-05, "loss": 0.0239, "step": 35380 }, { "epoch": 2.78, "learning_rate": 3.046825895663105e-05, "loss": 0.0219, "step": 35400 }, { "epoch": 2.78, "learning_rate": 3.0428975487115025e-05, "loss": 0.0235, "step": 35420 }, { "epoch": 2.78, "learning_rate": 3.0389692017598996e-05, "loss": 0.0334, "step": 35440 }, { "epoch": 2.79, "learning_rate": 3.035040854808297e-05, "loss": 0.0287, "step": 35460 }, { "epoch": 2.79, "learning_rate": 3.0311125078566944e-05, "loss": 0.0195, "step": 35480 }, { "epoch": 2.79, "learning_rate": 3.0271841609050912e-05, "loss": 0.0196, "step": 35500 }, { "epoch": 2.79, "learning_rate": 3.0232558139534883e-05, "loss": 0.0155, "step": 35520 }, { "epoch": 2.79, "learning_rate": 3.0193274670018857e-05, "loss": 0.0189, "step": 35540 }, { "epoch": 2.79, "learning_rate": 3.0153991200502827e-05, "loss": 0.0209, "step": 35560 }, { "epoch": 2.8, "learning_rate": 3.01147077309868e-05, "loss": 0.0323, "step": 35580 }, { "epoch": 2.8, "learning_rate": 3.0075424261470776e-05, "loss": 0.013, "step": 35600 }, { "epoch": 2.8, "learning_rate": 3.0036140791954746e-05, "loss": 0.0306, "step": 35620 }, { "epoch": 2.8, "learning_rate": 2.999685732243872e-05, "loss": 0.045, "step": 35640 }, { "epoch": 2.8, "learning_rate": 2.9957573852922695e-05, "loss": 0.0215, "step": 35660 }, { "epoch": 2.8, "learning_rate": 2.9918290383406665e-05, "loss": 0.0304, "step": 35680 }, { "epoch": 2.8, "learning_rate": 2.9879006913890633e-05, "loss": 0.0271, "step": 35700 }, { "epoch": 2.81, "learning_rate": 2.9839723444374607e-05, "loss": 0.0197, "step": 35720 }, { "epoch": 2.81, "learning_rate": 2.980043997485858e-05, "loss": 0.0146, "step": 35740 }, { "epoch": 2.81, "learning_rate": 2.9761156505342552e-05, "loss": 0.0314, "step": 35760 }, { "epoch": 2.81, "learning_rate": 2.9721873035826526e-05, "loss": 0.0142, "step": 35780 }, { "epoch": 2.81, "learning_rate": 2.9682589566310497e-05, "loss": 0.0305, "step": 35800 }, { "epoch": 2.81, "learning_rate": 2.964330609679447e-05, "loss": 0.0285, "step": 35820 }, { "epoch": 2.82, "learning_rate": 2.9604022627278445e-05, "loss": 0.0167, "step": 35840 }, { "epoch": 2.82, "learning_rate": 2.9564739157762416e-05, "loss": 0.0333, "step": 35860 }, { "epoch": 2.82, "learning_rate": 2.952545568824639e-05, "loss": 0.0232, "step": 35880 }, { "epoch": 2.82, "learning_rate": 2.9486172218730364e-05, "loss": 0.0295, "step": 35900 }, { "epoch": 2.82, "learning_rate": 2.944688874921433e-05, "loss": 0.027, "step": 35920 }, { "epoch": 2.82, "learning_rate": 2.9407605279698302e-05, "loss": 0.0223, "step": 35940 }, { "epoch": 2.83, "learning_rate": 2.9368321810182276e-05, "loss": 0.0475, "step": 35960 }, { "epoch": 2.83, "learning_rate": 2.9329038340666247e-05, "loss": 0.0204, "step": 35980 }, { "epoch": 2.83, "learning_rate": 2.928975487115022e-05, "loss": 0.0254, "step": 36000 }, { "epoch": 2.83, "learning_rate": 2.9250471401634195e-05, "loss": 0.0357, "step": 36020 }, { "epoch": 2.83, "learning_rate": 2.9211187932118166e-05, "loss": 0.0237, "step": 36040 }, { "epoch": 2.83, "learning_rate": 2.917190446260214e-05, "loss": 0.026, "step": 36060 }, { "epoch": 2.83, "learning_rate": 2.9132620993086114e-05, "loss": 0.0267, "step": 36080 }, { "epoch": 2.84, "learning_rate": 2.9093337523570085e-05, "loss": 0.0262, "step": 36100 }, { "epoch": 2.84, "learning_rate": 2.9054054054054052e-05, "loss": 0.02, "step": 36120 }, { "epoch": 2.84, "learning_rate": 2.9014770584538026e-05, "loss": 0.0233, "step": 36140 }, { "epoch": 2.84, "learning_rate": 2.8975487115021997e-05, "loss": 0.0235, "step": 36160 }, { "epoch": 2.84, "learning_rate": 2.893620364550597e-05, "loss": 0.0173, "step": 36180 }, { "epoch": 2.84, "learning_rate": 2.8896920175989945e-05, "loss": 0.0366, "step": 36200 }, { "epoch": 2.85, "learning_rate": 2.8857636706473916e-05, "loss": 0.0249, "step": 36220 }, { "epoch": 2.85, "learning_rate": 2.881835323695789e-05, "loss": 0.0338, "step": 36240 }, { "epoch": 2.85, "learning_rate": 2.8779069767441864e-05, "loss": 0.0374, "step": 36260 }, { "epoch": 2.85, "learning_rate": 2.8739786297925835e-05, "loss": 0.0237, "step": 36280 }, { "epoch": 2.85, "learning_rate": 2.870050282840981e-05, "loss": 0.0296, "step": 36300 }, { "epoch": 2.85, "learning_rate": 2.866121935889378e-05, "loss": 0.0175, "step": 36320 }, { "epoch": 2.86, "learning_rate": 2.8621935889377747e-05, "loss": 0.0157, "step": 36340 }, { "epoch": 2.86, "learning_rate": 2.858265241986172e-05, "loss": 0.0234, "step": 36360 }, { "epoch": 2.86, "learning_rate": 2.8543368950345696e-05, "loss": 0.0296, "step": 36380 }, { "epoch": 2.86, "learning_rate": 2.8504085480829666e-05, "loss": 0.0266, "step": 36400 }, { "epoch": 2.86, "learning_rate": 2.846480201131364e-05, "loss": 0.0281, "step": 36420 }, { "epoch": 2.86, "learning_rate": 2.8425518541797615e-05, "loss": 0.0186, "step": 36440 }, { "epoch": 2.86, "learning_rate": 2.8386235072281585e-05, "loss": 0.0242, "step": 36460 }, { "epoch": 2.87, "learning_rate": 2.834695160276556e-05, "loss": 0.0298, "step": 36480 }, { "epoch": 2.87, "learning_rate": 2.830766813324953e-05, "loss": 0.0238, "step": 36500 }, { "epoch": 2.87, "learning_rate": 2.8268384663733504e-05, "loss": 0.0384, "step": 36520 }, { "epoch": 2.87, "learning_rate": 2.8229101194217472e-05, "loss": 0.0207, "step": 36540 }, { "epoch": 2.87, "learning_rate": 2.8189817724701446e-05, "loss": 0.0254, "step": 36560 }, { "epoch": 2.87, "learning_rate": 2.8150534255185417e-05, "loss": 0.034, "step": 36580 }, { "epoch": 2.88, "learning_rate": 2.811125078566939e-05, "loss": 0.0208, "step": 36600 }, { "epoch": 2.88, "learning_rate": 2.8071967316153365e-05, "loss": 0.0331, "step": 36620 }, { "epoch": 2.88, "learning_rate": 2.8032683846637336e-05, "loss": 0.0218, "step": 36640 }, { "epoch": 2.88, "learning_rate": 2.799340037712131e-05, "loss": 0.0191, "step": 36660 }, { "epoch": 2.88, "learning_rate": 2.795411690760528e-05, "loss": 0.0271, "step": 36680 }, { "epoch": 2.88, "learning_rate": 2.7914833438089255e-05, "loss": 0.0229, "step": 36700 }, { "epoch": 2.88, "learning_rate": 2.787554996857323e-05, "loss": 0.0325, "step": 36720 }, { "epoch": 2.89, "learning_rate": 2.7836266499057196e-05, "loss": 0.0265, "step": 36740 }, { "epoch": 2.89, "learning_rate": 2.7796983029541167e-05, "loss": 0.0268, "step": 36760 }, { "epoch": 2.89, "learning_rate": 2.775769956002514e-05, "loss": 0.0246, "step": 36780 }, { "epoch": 2.89, "learning_rate": 2.7718416090509115e-05, "loss": 0.017, "step": 36800 }, { "epoch": 2.89, "learning_rate": 2.7679132620993086e-05, "loss": 0.0242, "step": 36820 }, { "epoch": 2.89, "learning_rate": 2.763984915147706e-05, "loss": 0.0236, "step": 36840 }, { "epoch": 2.9, "learning_rate": 2.760056568196103e-05, "loss": 0.0208, "step": 36860 }, { "epoch": 2.9, "learning_rate": 2.7561282212445005e-05, "loss": 0.0412, "step": 36880 }, { "epoch": 2.9, "learning_rate": 2.752199874292898e-05, "loss": 0.0337, "step": 36900 }, { "epoch": 2.9, "learning_rate": 2.748271527341295e-05, "loss": 0.0343, "step": 36920 }, { "epoch": 2.9, "learning_rate": 2.7443431803896924e-05, "loss": 0.0319, "step": 36940 }, { "epoch": 2.9, "learning_rate": 2.740414833438089e-05, "loss": 0.0178, "step": 36960 }, { "epoch": 2.91, "learning_rate": 2.7364864864864865e-05, "loss": 0.0307, "step": 36980 }, { "epoch": 2.91, "learning_rate": 2.7325581395348836e-05, "loss": 0.0313, "step": 37000 }, { "epoch": 2.91, "learning_rate": 2.728629792583281e-05, "loss": 0.0307, "step": 37020 }, { "epoch": 2.91, "learning_rate": 2.7247014456316784e-05, "loss": 0.0258, "step": 37040 }, { "epoch": 2.91, "learning_rate": 2.7207730986800755e-05, "loss": 0.0257, "step": 37060 }, { "epoch": 2.91, "learning_rate": 2.716844751728473e-05, "loss": 0.0247, "step": 37080 }, { "epoch": 2.91, "learning_rate": 2.71291640477687e-05, "loss": 0.0317, "step": 37100 }, { "epoch": 2.92, "learning_rate": 2.7089880578252674e-05, "loss": 0.0234, "step": 37120 }, { "epoch": 2.92, "learning_rate": 2.705059710873665e-05, "loss": 0.0285, "step": 37140 }, { "epoch": 2.92, "learning_rate": 2.7011313639220616e-05, "loss": 0.0279, "step": 37160 }, { "epoch": 2.92, "learning_rate": 2.6972030169704586e-05, "loss": 0.0354, "step": 37180 }, { "epoch": 2.92, "learning_rate": 2.693274670018856e-05, "loss": 0.0239, "step": 37200 }, { "epoch": 2.92, "learning_rate": 2.6893463230672535e-05, "loss": 0.0279, "step": 37220 }, { "epoch": 2.93, "learning_rate": 2.6854179761156505e-05, "loss": 0.0311, "step": 37240 }, { "epoch": 2.93, "learning_rate": 2.681489629164048e-05, "loss": 0.0229, "step": 37260 }, { "epoch": 2.93, "learning_rate": 2.677561282212445e-05, "loss": 0.0247, "step": 37280 }, { "epoch": 2.93, "learning_rate": 2.6736329352608425e-05, "loss": 0.0222, "step": 37300 }, { "epoch": 2.93, "learning_rate": 2.66970458830924e-05, "loss": 0.0211, "step": 37320 }, { "epoch": 2.93, "learning_rate": 2.665776241357637e-05, "loss": 0.0188, "step": 37340 }, { "epoch": 2.94, "learning_rate": 2.6618478944060344e-05, "loss": 0.0177, "step": 37360 }, { "epoch": 2.94, "learning_rate": 2.657919547454431e-05, "loss": 0.0284, "step": 37380 }, { "epoch": 2.94, "learning_rate": 2.6539912005028285e-05, "loss": 0.0236, "step": 37400 }, { "epoch": 2.94, "learning_rate": 2.6500628535512256e-05, "loss": 0.0221, "step": 37420 }, { "epoch": 2.94, "learning_rate": 2.646134506599623e-05, "loss": 0.0201, "step": 37440 }, { "epoch": 2.94, "learning_rate": 2.64220615964802e-05, "loss": 0.017, "step": 37460 }, { "epoch": 2.94, "learning_rate": 2.6382778126964175e-05, "loss": 0.015, "step": 37480 }, { "epoch": 2.95, "learning_rate": 2.634349465744815e-05, "loss": 0.0295, "step": 37500 }, { "epoch": 2.95, "learning_rate": 2.630421118793212e-05, "loss": 0.0253, "step": 37520 }, { "epoch": 2.95, "learning_rate": 2.6264927718416094e-05, "loss": 0.0146, "step": 37540 }, { "epoch": 2.95, "learning_rate": 2.6225644248900068e-05, "loss": 0.0235, "step": 37560 }, { "epoch": 2.95, "learning_rate": 2.6186360779384035e-05, "loss": 0.0235, "step": 37580 }, { "epoch": 2.95, "learning_rate": 2.6147077309868006e-05, "loss": 0.0293, "step": 37600 }, { "epoch": 2.96, "learning_rate": 2.610779384035198e-05, "loss": 0.0264, "step": 37620 }, { "epoch": 2.96, "learning_rate": 2.606851037083595e-05, "loss": 0.0243, "step": 37640 }, { "epoch": 2.96, "learning_rate": 2.6029226901319925e-05, "loss": 0.0169, "step": 37660 }, { "epoch": 2.96, "learning_rate": 2.59899434318039e-05, "loss": 0.0233, "step": 37680 }, { "epoch": 2.96, "learning_rate": 2.595065996228787e-05, "loss": 0.0393, "step": 37700 }, { "epoch": 2.96, "learning_rate": 2.5911376492771844e-05, "loss": 0.0214, "step": 37720 }, { "epoch": 2.97, "learning_rate": 2.5872093023255818e-05, "loss": 0.0204, "step": 37740 }, { "epoch": 2.97, "learning_rate": 2.583280955373979e-05, "loss": 0.0261, "step": 37760 }, { "epoch": 2.97, "learning_rate": 2.5793526084223763e-05, "loss": 0.0184, "step": 37780 }, { "epoch": 2.97, "learning_rate": 2.575424261470773e-05, "loss": 0.0265, "step": 37800 }, { "epoch": 2.97, "learning_rate": 2.57149591451917e-05, "loss": 0.0339, "step": 37820 }, { "epoch": 2.97, "learning_rate": 2.5675675675675675e-05, "loss": 0.031, "step": 37840 }, { "epoch": 2.97, "learning_rate": 2.563639220615965e-05, "loss": 0.0077, "step": 37860 }, { "epoch": 2.98, "learning_rate": 2.559710873664362e-05, "loss": 0.0218, "step": 37880 }, { "epoch": 2.98, "learning_rate": 2.5557825267127594e-05, "loss": 0.0273, "step": 37900 }, { "epoch": 2.98, "learning_rate": 2.551854179761157e-05, "loss": 0.0261, "step": 37920 }, { "epoch": 2.98, "learning_rate": 2.547925832809554e-05, "loss": 0.0152, "step": 37940 }, { "epoch": 2.98, "learning_rate": 2.5439974858579513e-05, "loss": 0.0088, "step": 37960 }, { "epoch": 2.98, "learning_rate": 2.5400691389063484e-05, "loss": 0.032, "step": 37980 }, { "epoch": 2.99, "learning_rate": 2.536140791954745e-05, "loss": 0.0172, "step": 38000 }, { "epoch": 2.99, "learning_rate": 2.5322124450031426e-05, "loss": 0.0276, "step": 38020 }, { "epoch": 2.99, "learning_rate": 2.52828409805154e-05, "loss": 0.03, "step": 38040 }, { "epoch": 2.99, "learning_rate": 2.524355751099937e-05, "loss": 0.0309, "step": 38060 }, { "epoch": 2.99, "learning_rate": 2.5204274041483345e-05, "loss": 0.0125, "step": 38080 }, { "epoch": 2.99, "learning_rate": 2.516499057196732e-05, "loss": 0.0226, "step": 38100 }, { "epoch": 2.99, "learning_rate": 2.512570710245129e-05, "loss": 0.034, "step": 38120 }, { "epoch": 3.0, "learning_rate": 2.5086423632935264e-05, "loss": 0.0278, "step": 38140 }, { "epoch": 3.0, "learning_rate": 2.5047140163419238e-05, "loss": 0.0291, "step": 38160 }, { "epoch": 3.0, "learning_rate": 2.500785669390321e-05, "loss": 0.0254, "step": 38180 }, { "epoch": 3.0, "learning_rate": 2.496857322438718e-05, "loss": 0.0155, "step": 38200 }, { "epoch": 3.0, "learning_rate": 2.4929289754871153e-05, "loss": 0.0148, "step": 38220 }, { "epoch": 3.0, "learning_rate": 2.4890006285355124e-05, "loss": 0.0201, "step": 38240 }, { "epoch": 3.01, "learning_rate": 2.4850722815839095e-05, "loss": 0.0193, "step": 38260 }, { "epoch": 3.01, "learning_rate": 2.481143934632307e-05, "loss": 0.0199, "step": 38280 }, { "epoch": 3.01, "learning_rate": 2.477215587680704e-05, "loss": 0.0192, "step": 38300 }, { "epoch": 3.01, "learning_rate": 2.4732872407291014e-05, "loss": 0.007, "step": 38320 }, { "epoch": 3.01, "learning_rate": 2.4693588937774988e-05, "loss": 0.0191, "step": 38340 }, { "epoch": 3.01, "learning_rate": 2.465430546825896e-05, "loss": 0.025, "step": 38360 }, { "epoch": 3.02, "learning_rate": 2.461502199874293e-05, "loss": 0.0209, "step": 38380 }, { "epoch": 3.02, "learning_rate": 2.4575738529226904e-05, "loss": 0.0158, "step": 38400 }, { "epoch": 3.02, "learning_rate": 2.4536455059710874e-05, "loss": 0.0152, "step": 38420 }, { "epoch": 3.02, "learning_rate": 2.449717159019485e-05, "loss": 0.0194, "step": 38440 }, { "epoch": 3.02, "learning_rate": 2.445788812067882e-05, "loss": 0.0154, "step": 38460 }, { "epoch": 3.02, "learning_rate": 2.441860465116279e-05, "loss": 0.0201, "step": 38480 }, { "epoch": 3.02, "learning_rate": 2.4379321181646764e-05, "loss": 0.0361, "step": 38500 }, { "epoch": 3.03, "learning_rate": 2.4340037712130738e-05, "loss": 0.0186, "step": 38520 }, { "epoch": 3.03, "learning_rate": 2.430075424261471e-05, "loss": 0.0189, "step": 38540 }, { "epoch": 3.03, "learning_rate": 2.4261470773098683e-05, "loss": 0.0098, "step": 38560 }, { "epoch": 3.03, "learning_rate": 2.4222187303582654e-05, "loss": 0.0174, "step": 38580 }, { "epoch": 3.03, "learning_rate": 2.4182903834066625e-05, "loss": 0.0226, "step": 38600 }, { "epoch": 3.03, "learning_rate": 2.41436203645506e-05, "loss": 0.0195, "step": 38620 }, { "epoch": 3.04, "learning_rate": 2.410433689503457e-05, "loss": 0.0261, "step": 38640 }, { "epoch": 3.04, "learning_rate": 2.4065053425518544e-05, "loss": 0.0124, "step": 38660 }, { "epoch": 3.04, "learning_rate": 2.4025769956002514e-05, "loss": 0.0107, "step": 38680 }, { "epoch": 3.04, "learning_rate": 2.398648648648649e-05, "loss": 0.0093, "step": 38700 }, { "epoch": 3.04, "learning_rate": 2.394720301697046e-05, "loss": 0.0225, "step": 38720 }, { "epoch": 3.04, "learning_rate": 2.3907919547454433e-05, "loss": 0.012, "step": 38740 }, { "epoch": 3.05, "learning_rate": 2.3868636077938404e-05, "loss": 0.0156, "step": 38760 }, { "epoch": 3.05, "learning_rate": 2.3829352608422378e-05, "loss": 0.0287, "step": 38780 }, { "epoch": 3.05, "learning_rate": 2.379006913890635e-05, "loss": 0.0132, "step": 38800 }, { "epoch": 3.05, "learning_rate": 2.375078566939032e-05, "loss": 0.011, "step": 38820 }, { "epoch": 3.05, "learning_rate": 2.3711502199874294e-05, "loss": 0.0125, "step": 38840 }, { "epoch": 3.05, "learning_rate": 2.3672218730358268e-05, "loss": 0.0244, "step": 38860 }, { "epoch": 3.05, "learning_rate": 2.363293526084224e-05, "loss": 0.0228, "step": 38880 }, { "epoch": 3.06, "learning_rate": 2.359365179132621e-05, "loss": 0.0187, "step": 38900 }, { "epoch": 3.06, "learning_rate": 2.3554368321810184e-05, "loss": 0.0121, "step": 38920 }, { "epoch": 3.06, "learning_rate": 2.3515084852294154e-05, "loss": 0.0193, "step": 38940 }, { "epoch": 3.06, "learning_rate": 2.347580138277813e-05, "loss": 0.0253, "step": 38960 }, { "epoch": 3.06, "learning_rate": 2.3436517913262103e-05, "loss": 0.01, "step": 38980 }, { "epoch": 3.06, "learning_rate": 2.3397234443746073e-05, "loss": 0.0216, "step": 39000 }, { "epoch": 3.07, "learning_rate": 2.3357950974230044e-05, "loss": 0.029, "step": 39020 }, { "epoch": 3.07, "learning_rate": 2.3318667504714018e-05, "loss": 0.0232, "step": 39040 }, { "epoch": 3.07, "learning_rate": 2.327938403519799e-05, "loss": 0.0176, "step": 39060 }, { "epoch": 3.07, "learning_rate": 2.3240100565681963e-05, "loss": 0.0097, "step": 39080 }, { "epoch": 3.07, "learning_rate": 2.3200817096165934e-05, "loss": 0.0143, "step": 39100 }, { "epoch": 3.07, "learning_rate": 2.3161533626649905e-05, "loss": 0.008, "step": 39120 }, { "epoch": 3.08, "learning_rate": 2.312225015713388e-05, "loss": 0.0187, "step": 39140 }, { "epoch": 3.08, "learning_rate": 2.3082966687617853e-05, "loss": 0.0134, "step": 39160 }, { "epoch": 3.08, "learning_rate": 2.3043683218101824e-05, "loss": 0.0259, "step": 39180 }, { "epoch": 3.08, "learning_rate": 2.3004399748585794e-05, "loss": 0.0232, "step": 39200 }, { "epoch": 3.08, "learning_rate": 2.296511627906977e-05, "loss": 0.0232, "step": 39220 }, { "epoch": 3.08, "learning_rate": 2.292583280955374e-05, "loss": 0.0124, "step": 39240 }, { "epoch": 3.08, "learning_rate": 2.2886549340037713e-05, "loss": 0.011, "step": 39260 }, { "epoch": 3.09, "learning_rate": 2.2847265870521687e-05, "loss": 0.0186, "step": 39280 }, { "epoch": 3.09, "learning_rate": 2.2807982401005658e-05, "loss": 0.0232, "step": 39300 }, { "epoch": 3.09, "learning_rate": 2.276869893148963e-05, "loss": 0.0237, "step": 39320 }, { "epoch": 3.09, "learning_rate": 2.2729415461973603e-05, "loss": 0.0196, "step": 39340 }, { "epoch": 3.09, "learning_rate": 2.2690131992457574e-05, "loss": 0.0125, "step": 39360 }, { "epoch": 3.09, "learning_rate": 2.2650848522941548e-05, "loss": 0.0101, "step": 39380 }, { "epoch": 3.1, "learning_rate": 2.2611565053425522e-05, "loss": 0.0092, "step": 39400 }, { "epoch": 3.1, "learning_rate": 2.257228158390949e-05, "loss": 0.0096, "step": 39420 }, { "epoch": 3.1, "learning_rate": 2.2532998114393464e-05, "loss": 0.0201, "step": 39440 }, { "epoch": 3.1, "learning_rate": 2.2493714644877438e-05, "loss": 0.0153, "step": 39460 }, { "epoch": 3.1, "learning_rate": 2.245443117536141e-05, "loss": 0.0219, "step": 39480 }, { "epoch": 3.1, "learning_rate": 2.2415147705845383e-05, "loss": 0.0162, "step": 39500 }, { "epoch": 3.1, "learning_rate": 2.2375864236329353e-05, "loss": 0.0134, "step": 39520 }, { "epoch": 3.11, "learning_rate": 2.2336580766813324e-05, "loss": 0.0119, "step": 39540 }, { "epoch": 3.11, "learning_rate": 2.2297297297297298e-05, "loss": 0.0225, "step": 39560 }, { "epoch": 3.11, "learning_rate": 2.2258013827781272e-05, "loss": 0.0125, "step": 39580 }, { "epoch": 3.11, "learning_rate": 2.2218730358265243e-05, "loss": 0.0168, "step": 39600 }, { "epoch": 3.11, "learning_rate": 2.2179446888749214e-05, "loss": 0.0214, "step": 39620 }, { "epoch": 3.11, "learning_rate": 2.2140163419233188e-05, "loss": 0.0099, "step": 39640 }, { "epoch": 3.12, "learning_rate": 2.210087994971716e-05, "loss": 0.0167, "step": 39660 }, { "epoch": 3.12, "learning_rate": 2.2061596480201133e-05, "loss": 0.0248, "step": 39680 }, { "epoch": 3.12, "learning_rate": 2.2022313010685107e-05, "loss": 0.0171, "step": 39700 }, { "epoch": 3.12, "learning_rate": 2.1983029541169078e-05, "loss": 0.018, "step": 39720 }, { "epoch": 3.12, "learning_rate": 2.194374607165305e-05, "loss": 0.0104, "step": 39740 }, { "epoch": 3.12, "learning_rate": 2.1904462602137023e-05, "loss": 0.0103, "step": 39760 }, { "epoch": 3.13, "learning_rate": 2.1865179132620993e-05, "loss": 0.0186, "step": 39780 }, { "epoch": 3.13, "learning_rate": 2.1825895663104967e-05, "loss": 0.0112, "step": 39800 }, { "epoch": 3.13, "learning_rate": 2.178661219358894e-05, "loss": 0.022, "step": 39820 }, { "epoch": 3.13, "learning_rate": 2.174732872407291e-05, "loss": 0.0196, "step": 39840 }, { "epoch": 3.13, "learning_rate": 2.1708045254556883e-05, "loss": 0.0179, "step": 39860 }, { "epoch": 3.13, "learning_rate": 2.1668761785040857e-05, "loss": 0.0117, "step": 39880 }, { "epoch": 3.13, "learning_rate": 2.1629478315524828e-05, "loss": 0.0145, "step": 39900 }, { "epoch": 3.14, "learning_rate": 2.1590194846008802e-05, "loss": 0.0238, "step": 39920 }, { "epoch": 3.14, "learning_rate": 2.1550911376492773e-05, "loss": 0.0104, "step": 39940 }, { "epoch": 3.14, "learning_rate": 2.1511627906976744e-05, "loss": 0.0262, "step": 39960 }, { "epoch": 3.14, "learning_rate": 2.1472344437460718e-05, "loss": 0.0244, "step": 39980 }, { "epoch": 3.14, "learning_rate": 2.1433060967944692e-05, "loss": 0.0184, "step": 40000 }, { "epoch": 3.14, "eval_loss": 0.30395957827568054, "eval_matthews_correlation": 0.4261279284067468, "eval_runtime": 1091.8559, "eval_samples_per_second": 104.441, "eval_steps_per_second": 13.056, "step": 40000 }, { "epoch": 3.14, "learning_rate": 2.1393777498428663e-05, "loss": 0.0157, "step": 40020 }, { "epoch": 3.15, "learning_rate": 2.1354494028912633e-05, "loss": 0.015, "step": 40040 }, { "epoch": 3.15, "learning_rate": 2.1315210559396607e-05, "loss": 0.01, "step": 40060 }, { "epoch": 3.15, "learning_rate": 2.1275927089880578e-05, "loss": 0.0097, "step": 40080 }, { "epoch": 3.15, "learning_rate": 2.1236643620364552e-05, "loss": 0.0168, "step": 40100 }, { "epoch": 3.15, "learning_rate": 2.1197360150848526e-05, "loss": 0.0078, "step": 40120 }, { "epoch": 3.15, "learning_rate": 2.1158076681332497e-05, "loss": 0.0129, "step": 40140 }, { "epoch": 3.16, "learning_rate": 2.1118793211816468e-05, "loss": 0.0179, "step": 40160 }, { "epoch": 3.16, "learning_rate": 2.1079509742300442e-05, "loss": 0.0185, "step": 40180 }, { "epoch": 3.16, "learning_rate": 2.1040226272784413e-05, "loss": 0.0102, "step": 40200 }, { "epoch": 3.16, "learning_rate": 2.1000942803268387e-05, "loss": 0.0086, "step": 40220 }, { "epoch": 3.16, "learning_rate": 2.0961659333752358e-05, "loss": 0.0197, "step": 40240 }, { "epoch": 3.16, "learning_rate": 2.092237586423633e-05, "loss": 0.0221, "step": 40260 }, { "epoch": 3.16, "learning_rate": 2.0883092394720303e-05, "loss": 0.0338, "step": 40280 }, { "epoch": 3.17, "learning_rate": 2.0843808925204277e-05, "loss": 0.0255, "step": 40300 }, { "epoch": 3.17, "learning_rate": 2.0804525455688247e-05, "loss": 0.0179, "step": 40320 }, { "epoch": 3.17, "learning_rate": 2.076524198617222e-05, "loss": 0.0157, "step": 40340 }, { "epoch": 3.17, "learning_rate": 2.0725958516656192e-05, "loss": 0.0121, "step": 40360 }, { "epoch": 3.17, "learning_rate": 2.0686675047140163e-05, "loss": 0.0141, "step": 40380 }, { "epoch": 3.17, "learning_rate": 2.0647391577624137e-05, "loss": 0.0194, "step": 40400 }, { "epoch": 3.18, "learning_rate": 2.0608108108108108e-05, "loss": 0.0237, "step": 40420 }, { "epoch": 3.18, "learning_rate": 2.0568824638592082e-05, "loss": 0.0201, "step": 40440 }, { "epoch": 3.18, "learning_rate": 2.0529541169076053e-05, "loss": 0.0103, "step": 40460 }, { "epoch": 3.18, "learning_rate": 2.0490257699560027e-05, "loss": 0.0108, "step": 40480 }, { "epoch": 3.18, "learning_rate": 2.0450974230043998e-05, "loss": 0.0065, "step": 40500 }, { "epoch": 3.18, "learning_rate": 2.0411690760527972e-05, "loss": 0.015, "step": 40520 }, { "epoch": 3.19, "learning_rate": 2.0372407291011943e-05, "loss": 0.01, "step": 40540 }, { "epoch": 3.19, "learning_rate": 2.0333123821495917e-05, "loss": 0.0151, "step": 40560 }, { "epoch": 3.19, "learning_rate": 2.0293840351979887e-05, "loss": 0.0046, "step": 40580 }, { "epoch": 3.19, "learning_rate": 2.0254556882463858e-05, "loss": 0.012, "step": 40600 }, { "epoch": 3.19, "learning_rate": 2.0215273412947832e-05, "loss": 0.0103, "step": 40620 }, { "epoch": 3.19, "learning_rate": 2.0175989943431806e-05, "loss": 0.0228, "step": 40640 }, { "epoch": 3.19, "learning_rate": 2.0136706473915777e-05, "loss": 0.0071, "step": 40660 }, { "epoch": 3.2, "learning_rate": 2.0097423004399748e-05, "loss": 0.0173, "step": 40680 }, { "epoch": 3.2, "learning_rate": 2.0058139534883722e-05, "loss": 0.0151, "step": 40700 }, { "epoch": 3.2, "learning_rate": 2.0018856065367693e-05, "loss": 0.0134, "step": 40720 }, { "epoch": 3.2, "learning_rate": 1.9979572595851667e-05, "loss": 0.0218, "step": 40740 }, { "epoch": 3.2, "learning_rate": 1.994028912633564e-05, "loss": 0.0269, "step": 40760 }, { "epoch": 3.2, "learning_rate": 1.9901005656819612e-05, "loss": 0.0141, "step": 40780 }, { "epoch": 3.21, "learning_rate": 1.9861722187303583e-05, "loss": 0.0134, "step": 40800 }, { "epoch": 3.21, "learning_rate": 1.9822438717787557e-05, "loss": 0.0221, "step": 40820 }, { "epoch": 3.21, "learning_rate": 1.9783155248271527e-05, "loss": 0.0267, "step": 40840 }, { "epoch": 3.21, "learning_rate": 1.97438717787555e-05, "loss": 0.015, "step": 40860 }, { "epoch": 3.21, "learning_rate": 1.9704588309239472e-05, "loss": 0.029, "step": 40880 }, { "epoch": 3.21, "learning_rate": 1.9665304839723443e-05, "loss": 0.0087, "step": 40900 }, { "epoch": 3.21, "learning_rate": 1.9626021370207417e-05, "loss": 0.0071, "step": 40920 }, { "epoch": 3.22, "learning_rate": 1.958673790069139e-05, "loss": 0.0097, "step": 40940 }, { "epoch": 3.22, "learning_rate": 1.9547454431175362e-05, "loss": 0.0109, "step": 40960 }, { "epoch": 3.22, "learning_rate": 1.9508170961659333e-05, "loss": 0.014, "step": 40980 }, { "epoch": 3.22, "learning_rate": 1.9468887492143307e-05, "loss": 0.0161, "step": 41000 }, { "epoch": 3.22, "learning_rate": 1.9429604022627278e-05, "loss": 0.0138, "step": 41020 }, { "epoch": 3.22, "learning_rate": 1.9390320553111252e-05, "loss": 0.0094, "step": 41040 }, { "epoch": 3.23, "learning_rate": 1.9351037083595226e-05, "loss": 0.0177, "step": 41060 }, { "epoch": 3.23, "learning_rate": 1.9311753614079197e-05, "loss": 0.0139, "step": 41080 }, { "epoch": 3.23, "learning_rate": 1.9272470144563167e-05, "loss": 0.0146, "step": 41100 }, { "epoch": 3.23, "learning_rate": 1.923318667504714e-05, "loss": 0.0185, "step": 41120 }, { "epoch": 3.23, "learning_rate": 1.9193903205531112e-05, "loss": 0.0211, "step": 41140 }, { "epoch": 3.23, "learning_rate": 1.9154619736015086e-05, "loss": 0.0207, "step": 41160 }, { "epoch": 3.24, "learning_rate": 1.911533626649906e-05, "loss": 0.008, "step": 41180 }, { "epoch": 3.24, "learning_rate": 1.9076052796983028e-05, "loss": 0.0049, "step": 41200 }, { "epoch": 3.24, "learning_rate": 1.9036769327467002e-05, "loss": 0.0235, "step": 41220 }, { "epoch": 3.24, "learning_rate": 1.8997485857950976e-05, "loss": 0.0164, "step": 41240 }, { "epoch": 3.24, "learning_rate": 1.8958202388434947e-05, "loss": 0.0118, "step": 41260 }, { "epoch": 3.24, "learning_rate": 1.891891891891892e-05, "loss": 0.0238, "step": 41280 }, { "epoch": 3.24, "learning_rate": 1.8879635449402892e-05, "loss": 0.0182, "step": 41300 }, { "epoch": 3.25, "learning_rate": 1.8840351979886863e-05, "loss": 0.0323, "step": 41320 }, { "epoch": 3.25, "learning_rate": 1.8801068510370837e-05, "loss": 0.026, "step": 41340 }, { "epoch": 3.25, "learning_rate": 1.876178504085481e-05, "loss": 0.0207, "step": 41360 }, { "epoch": 3.25, "learning_rate": 1.872250157133878e-05, "loss": 0.0089, "step": 41380 }, { "epoch": 3.25, "learning_rate": 1.8683218101822752e-05, "loss": 0.009, "step": 41400 }, { "epoch": 3.25, "learning_rate": 1.8643934632306726e-05, "loss": 0.0118, "step": 41420 }, { "epoch": 3.26, "learning_rate": 1.8604651162790697e-05, "loss": 0.011, "step": 41440 }, { "epoch": 3.26, "learning_rate": 1.856536769327467e-05, "loss": 0.0132, "step": 41460 }, { "epoch": 3.26, "learning_rate": 1.8526084223758645e-05, "loss": 0.0075, "step": 41480 }, { "epoch": 3.26, "learning_rate": 1.8486800754242616e-05, "loss": 0.0235, "step": 41500 }, { "epoch": 3.26, "learning_rate": 1.8447517284726587e-05, "loss": 0.0345, "step": 41520 }, { "epoch": 3.26, "learning_rate": 1.840823381521056e-05, "loss": 0.0228, "step": 41540 }, { "epoch": 3.27, "learning_rate": 1.8368950345694532e-05, "loss": 0.022, "step": 41560 }, { "epoch": 3.27, "learning_rate": 1.8329666876178506e-05, "loss": 0.0116, "step": 41580 }, { "epoch": 3.27, "learning_rate": 1.829038340666248e-05, "loss": 0.0066, "step": 41600 }, { "epoch": 3.27, "learning_rate": 1.8251099937146447e-05, "loss": 0.0156, "step": 41620 }, { "epoch": 3.27, "learning_rate": 1.821181646763042e-05, "loss": 0.0188, "step": 41640 }, { "epoch": 3.27, "learning_rate": 1.8172532998114396e-05, "loss": 0.021, "step": 41660 }, { "epoch": 3.27, "learning_rate": 1.8133249528598366e-05, "loss": 0.0221, "step": 41680 }, { "epoch": 3.28, "learning_rate": 1.809396605908234e-05, "loss": 0.0194, "step": 41700 }, { "epoch": 3.28, "learning_rate": 1.805468258956631e-05, "loss": 0.0187, "step": 41720 }, { "epoch": 3.28, "learning_rate": 1.8015399120050282e-05, "loss": 0.0203, "step": 41740 }, { "epoch": 3.28, "learning_rate": 1.7976115650534256e-05, "loss": 0.0154, "step": 41760 }, { "epoch": 3.28, "learning_rate": 1.793683218101823e-05, "loss": 0.0213, "step": 41780 }, { "epoch": 3.28, "learning_rate": 1.78975487115022e-05, "loss": 0.0218, "step": 41800 }, { "epoch": 3.29, "learning_rate": 1.7858265241986172e-05, "loss": 0.0192, "step": 41820 }, { "epoch": 3.29, "learning_rate": 1.7818981772470146e-05, "loss": 0.017, "step": 41840 }, { "epoch": 3.29, "learning_rate": 1.7779698302954117e-05, "loss": 0.0161, "step": 41860 }, { "epoch": 3.29, "learning_rate": 1.774041483343809e-05, "loss": 0.0146, "step": 41880 }, { "epoch": 3.29, "learning_rate": 1.770113136392206e-05, "loss": 0.0105, "step": 41900 }, { "epoch": 3.29, "learning_rate": 1.7661847894406036e-05, "loss": 0.0077, "step": 41920 }, { "epoch": 3.3, "learning_rate": 1.7622564424890006e-05, "loss": 0.0123, "step": 41940 }, { "epoch": 3.3, "learning_rate": 1.758328095537398e-05, "loss": 0.0067, "step": 41960 }, { "epoch": 3.3, "learning_rate": 1.754399748585795e-05, "loss": 0.0178, "step": 41980 }, { "epoch": 3.3, "learning_rate": 1.7504714016341926e-05, "loss": 0.0253, "step": 42000 }, { "epoch": 3.3, "learning_rate": 1.7465430546825896e-05, "loss": 0.0286, "step": 42020 }, { "epoch": 3.3, "learning_rate": 1.7426147077309867e-05, "loss": 0.0138, "step": 42040 }, { "epoch": 3.3, "learning_rate": 1.738686360779384e-05, "loss": 0.0118, "step": 42060 }, { "epoch": 3.31, "learning_rate": 1.7347580138277815e-05, "loss": 0.0123, "step": 42080 }, { "epoch": 3.31, "learning_rate": 1.7308296668761786e-05, "loss": 0.0194, "step": 42100 }, { "epoch": 3.31, "learning_rate": 1.726901319924576e-05, "loss": 0.0139, "step": 42120 }, { "epoch": 3.31, "learning_rate": 1.722972972972973e-05, "loss": 0.0107, "step": 42140 }, { "epoch": 3.31, "learning_rate": 1.71904462602137e-05, "loss": 0.0119, "step": 42160 }, { "epoch": 3.31, "learning_rate": 1.7151162790697676e-05, "loss": 0.0149, "step": 42180 }, { "epoch": 3.32, "learning_rate": 1.7111879321181646e-05, "loss": 0.0301, "step": 42200 }, { "epoch": 3.32, "learning_rate": 1.707259585166562e-05, "loss": 0.0153, "step": 42220 }, { "epoch": 3.32, "learning_rate": 1.703331238214959e-05, "loss": 0.0127, "step": 42240 }, { "epoch": 3.32, "learning_rate": 1.6994028912633566e-05, "loss": 0.0112, "step": 42260 }, { "epoch": 3.32, "learning_rate": 1.6954745443117536e-05, "loss": 0.0167, "step": 42280 }, { "epoch": 3.32, "learning_rate": 1.691546197360151e-05, "loss": 0.0168, "step": 42300 }, { "epoch": 3.32, "learning_rate": 1.687617850408548e-05, "loss": 0.0103, "step": 42320 }, { "epoch": 3.33, "learning_rate": 1.6836895034569452e-05, "loss": 0.0149, "step": 42340 }, { "epoch": 3.33, "learning_rate": 1.6797611565053426e-05, "loss": 0.0125, "step": 42360 }, { "epoch": 3.33, "learning_rate": 1.6758328095537397e-05, "loss": 0.0296, "step": 42380 }, { "epoch": 3.33, "learning_rate": 1.671904462602137e-05, "loss": 0.0101, "step": 42400 }, { "epoch": 3.33, "learning_rate": 1.6679761156505345e-05, "loss": 0.0051, "step": 42420 }, { "epoch": 3.33, "learning_rate": 1.6640477686989316e-05, "loss": 0.0184, "step": 42440 }, { "epoch": 3.34, "learning_rate": 1.6601194217473287e-05, "loss": 0.0207, "step": 42460 }, { "epoch": 3.34, "learning_rate": 1.656191074795726e-05, "loss": 0.011, "step": 42480 }, { "epoch": 3.34, "learning_rate": 1.652262727844123e-05, "loss": 0.0143, "step": 42500 }, { "epoch": 3.34, "learning_rate": 1.6483343808925206e-05, "loss": 0.0114, "step": 42520 }, { "epoch": 3.34, "learning_rate": 1.644406033940918e-05, "loss": 0.0084, "step": 42540 }, { "epoch": 3.34, "learning_rate": 1.6404776869893147e-05, "loss": 0.0214, "step": 42560 }, { "epoch": 3.35, "learning_rate": 1.636549340037712e-05, "loss": 0.0068, "step": 42580 }, { "epoch": 3.35, "learning_rate": 1.6326209930861095e-05, "loss": 0.0116, "step": 42600 }, { "epoch": 3.35, "learning_rate": 1.6286926461345066e-05, "loss": 0.0312, "step": 42620 }, { "epoch": 3.35, "learning_rate": 1.624764299182904e-05, "loss": 0.0108, "step": 42640 }, { "epoch": 3.35, "learning_rate": 1.620835952231301e-05, "loss": 0.0136, "step": 42660 }, { "epoch": 3.35, "learning_rate": 1.616907605279698e-05, "loss": 0.0156, "step": 42680 }, { "epoch": 3.35, "learning_rate": 1.6129792583280956e-05, "loss": 0.0198, "step": 42700 }, { "epoch": 3.36, "learning_rate": 1.609050911376493e-05, "loss": 0.0086, "step": 42720 }, { "epoch": 3.36, "learning_rate": 1.60512256442489e-05, "loss": 0.0161, "step": 42740 }, { "epoch": 3.36, "learning_rate": 1.601194217473287e-05, "loss": 0.0164, "step": 42760 }, { "epoch": 3.36, "learning_rate": 1.5972658705216846e-05, "loss": 0.0091, "step": 42780 }, { "epoch": 3.36, "learning_rate": 1.5933375235700816e-05, "loss": 0.0161, "step": 42800 }, { "epoch": 3.36, "learning_rate": 1.589409176618479e-05, "loss": 0.0094, "step": 42820 }, { "epoch": 3.37, "learning_rate": 1.5854808296668765e-05, "loss": 0.0118, "step": 42840 }, { "epoch": 3.37, "learning_rate": 1.5815524827152735e-05, "loss": 0.027, "step": 42860 }, { "epoch": 3.37, "learning_rate": 1.5776241357636706e-05, "loss": 0.0057, "step": 42880 }, { "epoch": 3.37, "learning_rate": 1.573695788812068e-05, "loss": 0.0104, "step": 42900 }, { "epoch": 3.37, "learning_rate": 1.569767441860465e-05, "loss": 0.0117, "step": 42920 }, { "epoch": 3.37, "learning_rate": 1.5658390949088625e-05, "loss": 0.0086, "step": 42940 }, { "epoch": 3.38, "learning_rate": 1.56191074795726e-05, "loss": 0.0126, "step": 42960 }, { "epoch": 3.38, "learning_rate": 1.5579824010056567e-05, "loss": 0.0225, "step": 42980 }, { "epoch": 3.38, "learning_rate": 1.554054054054054e-05, "loss": 0.0107, "step": 43000 }, { "epoch": 3.38, "learning_rate": 1.5501257071024515e-05, "loss": 0.0104, "step": 43020 }, { "epoch": 3.38, "learning_rate": 1.5461973601508486e-05, "loss": 0.0182, "step": 43040 }, { "epoch": 3.38, "learning_rate": 1.542269013199246e-05, "loss": 0.0149, "step": 43060 }, { "epoch": 3.38, "learning_rate": 1.538340666247643e-05, "loss": 0.0153, "step": 43080 }, { "epoch": 3.39, "learning_rate": 1.53441231929604e-05, "loss": 0.0102, "step": 43100 }, { "epoch": 3.39, "learning_rate": 1.5304839723444375e-05, "loss": 0.0155, "step": 43120 }, { "epoch": 3.39, "learning_rate": 1.526555625392835e-05, "loss": 0.0135, "step": 43140 }, { "epoch": 3.39, "learning_rate": 1.522627278441232e-05, "loss": 0.0102, "step": 43160 }, { "epoch": 3.39, "learning_rate": 1.5186989314896291e-05, "loss": 0.0149, "step": 43180 }, { "epoch": 3.39, "learning_rate": 1.5147705845380263e-05, "loss": 0.0124, "step": 43200 }, { "epoch": 3.4, "learning_rate": 1.5108422375864237e-05, "loss": 0.0105, "step": 43220 }, { "epoch": 3.4, "learning_rate": 1.506913890634821e-05, "loss": 0.0106, "step": 43240 }, { "epoch": 3.4, "learning_rate": 1.5029855436832182e-05, "loss": 0.0184, "step": 43260 }, { "epoch": 3.4, "learning_rate": 1.4990571967316155e-05, "loss": 0.0171, "step": 43280 }, { "epoch": 3.4, "learning_rate": 1.4951288497800126e-05, "loss": 0.0102, "step": 43300 }, { "epoch": 3.4, "learning_rate": 1.4912005028284098e-05, "loss": 0.0133, "step": 43320 }, { "epoch": 3.41, "learning_rate": 1.4872721558768072e-05, "loss": 0.0101, "step": 43340 }, { "epoch": 3.41, "learning_rate": 1.4833438089252045e-05, "loss": 0.0139, "step": 43360 }, { "epoch": 3.41, "learning_rate": 1.4794154619736017e-05, "loss": 0.0124, "step": 43380 }, { "epoch": 3.41, "learning_rate": 1.4754871150219988e-05, "loss": 0.009, "step": 43400 }, { "epoch": 3.41, "learning_rate": 1.471558768070396e-05, "loss": 0.0074, "step": 43420 }, { "epoch": 3.41, "learning_rate": 1.4676304211187933e-05, "loss": 0.0115, "step": 43440 }, { "epoch": 3.41, "learning_rate": 1.4637020741671905e-05, "loss": 0.0151, "step": 43460 }, { "epoch": 3.42, "learning_rate": 1.459773727215588e-05, "loss": 0.0251, "step": 43480 }, { "epoch": 3.42, "learning_rate": 1.4558453802639848e-05, "loss": 0.0126, "step": 43500 }, { "epoch": 3.42, "learning_rate": 1.4519170333123822e-05, "loss": 0.008, "step": 43520 }, { "epoch": 3.42, "learning_rate": 1.4479886863607795e-05, "loss": 0.0131, "step": 43540 }, { "epoch": 3.42, "learning_rate": 1.4440603394091767e-05, "loss": 0.0169, "step": 43560 }, { "epoch": 3.42, "learning_rate": 1.440131992457574e-05, "loss": 0.0173, "step": 43580 }, { "epoch": 3.43, "learning_rate": 1.436203645505971e-05, "loss": 0.0293, "step": 43600 }, { "epoch": 3.43, "learning_rate": 1.4322752985543683e-05, "loss": 0.0377, "step": 43620 }, { "epoch": 3.43, "learning_rate": 1.4283469516027655e-05, "loss": 0.0159, "step": 43640 }, { "epoch": 3.43, "learning_rate": 1.424418604651163e-05, "loss": 0.0052, "step": 43660 }, { "epoch": 3.43, "learning_rate": 1.4204902576995602e-05, "loss": 0.0262, "step": 43680 }, { "epoch": 3.43, "learning_rate": 1.4165619107479574e-05, "loss": 0.0212, "step": 43700 }, { "epoch": 3.43, "learning_rate": 1.4126335637963545e-05, "loss": 0.016, "step": 43720 }, { "epoch": 3.44, "learning_rate": 1.4087052168447517e-05, "loss": 0.0167, "step": 43740 }, { "epoch": 3.44, "learning_rate": 1.404776869893149e-05, "loss": 0.0195, "step": 43760 }, { "epoch": 3.44, "learning_rate": 1.4008485229415464e-05, "loss": 0.0101, "step": 43780 }, { "epoch": 3.44, "learning_rate": 1.3969201759899436e-05, "loss": 0.0209, "step": 43800 }, { "epoch": 3.44, "learning_rate": 1.3929918290383406e-05, "loss": 0.0273, "step": 43820 }, { "epoch": 3.44, "learning_rate": 1.389063482086738e-05, "loss": 0.0137, "step": 43840 }, { "epoch": 3.45, "learning_rate": 1.3851351351351352e-05, "loss": 0.011, "step": 43860 }, { "epoch": 3.45, "learning_rate": 1.3812067881835325e-05, "loss": 0.0085, "step": 43880 }, { "epoch": 3.45, "learning_rate": 1.3772784412319299e-05, "loss": 0.0078, "step": 43900 }, { "epoch": 3.45, "learning_rate": 1.3733500942803268e-05, "loss": 0.0209, "step": 43920 }, { "epoch": 3.45, "learning_rate": 1.369421747328724e-05, "loss": 0.0037, "step": 43940 }, { "epoch": 3.45, "learning_rate": 1.3654934003771214e-05, "loss": 0.0133, "step": 43960 }, { "epoch": 3.46, "learning_rate": 1.3615650534255187e-05, "loss": 0.0128, "step": 43980 }, { "epoch": 3.46, "learning_rate": 1.357636706473916e-05, "loss": 0.0189, "step": 44000 }, { "epoch": 3.46, "learning_rate": 1.353708359522313e-05, "loss": 0.0099, "step": 44020 }, { "epoch": 3.46, "learning_rate": 1.3497800125707102e-05, "loss": 0.0164, "step": 44040 }, { "epoch": 3.46, "learning_rate": 1.3458516656191075e-05, "loss": 0.0125, "step": 44060 }, { "epoch": 3.46, "learning_rate": 1.3419233186675049e-05, "loss": 0.0139, "step": 44080 }, { "epoch": 3.46, "learning_rate": 1.3379949717159021e-05, "loss": 0.0117, "step": 44100 }, { "epoch": 3.47, "learning_rate": 1.334066624764299e-05, "loss": 0.0161, "step": 44120 }, { "epoch": 3.47, "learning_rate": 1.3301382778126965e-05, "loss": 0.021, "step": 44140 }, { "epoch": 3.47, "learning_rate": 1.3262099308610937e-05, "loss": 0.0146, "step": 44160 }, { "epoch": 3.47, "learning_rate": 1.322281583909491e-05, "loss": 0.0146, "step": 44180 }, { "epoch": 3.47, "learning_rate": 1.3183532369578882e-05, "loss": 0.0163, "step": 44200 }, { "epoch": 3.47, "learning_rate": 1.3144248900062856e-05, "loss": 0.0176, "step": 44220 }, { "epoch": 3.48, "learning_rate": 1.3104965430546825e-05, "loss": 0.0203, "step": 44240 }, { "epoch": 3.48, "learning_rate": 1.30656819610308e-05, "loss": 0.018, "step": 44260 }, { "epoch": 3.48, "learning_rate": 1.3026398491514772e-05, "loss": 0.0218, "step": 44280 }, { "epoch": 3.48, "learning_rate": 1.2987115021998744e-05, "loss": 0.0216, "step": 44300 }, { "epoch": 3.48, "learning_rate": 1.2947831552482716e-05, "loss": 0.008, "step": 44320 }, { "epoch": 3.48, "learning_rate": 1.2908548082966687e-05, "loss": 0.0106, "step": 44340 }, { "epoch": 3.49, "learning_rate": 1.286926461345066e-05, "loss": 0.0126, "step": 44360 }, { "epoch": 3.49, "learning_rate": 1.2829981143934632e-05, "loss": 0.0128, "step": 44380 }, { "epoch": 3.49, "learning_rate": 1.2790697674418606e-05, "loss": 0.0097, "step": 44400 }, { "epoch": 3.49, "learning_rate": 1.2751414204902579e-05, "loss": 0.0153, "step": 44420 }, { "epoch": 3.49, "learning_rate": 1.271213073538655e-05, "loss": 0.0129, "step": 44440 }, { "epoch": 3.49, "learning_rate": 1.2672847265870522e-05, "loss": 0.0191, "step": 44460 }, { "epoch": 3.49, "learning_rate": 1.2633563796354494e-05, "loss": 0.0123, "step": 44480 }, { "epoch": 3.5, "learning_rate": 1.2594280326838467e-05, "loss": 0.0088, "step": 44500 }, { "epoch": 3.5, "learning_rate": 1.2554996857322441e-05, "loss": 0.0088, "step": 44520 }, { "epoch": 3.5, "learning_rate": 1.251571338780641e-05, "loss": 0.0156, "step": 44540 }, { "epoch": 3.5, "learning_rate": 1.2476429918290384e-05, "loss": 0.0168, "step": 44560 }, { "epoch": 3.5, "learning_rate": 1.2437146448774357e-05, "loss": 0.0042, "step": 44580 }, { "epoch": 3.5, "learning_rate": 1.2397862979258329e-05, "loss": 0.0155, "step": 44600 }, { "epoch": 3.51, "learning_rate": 1.23585795097423e-05, "loss": 0.0094, "step": 44620 }, { "epoch": 3.51, "learning_rate": 1.2319296040226274e-05, "loss": 0.0136, "step": 44640 }, { "epoch": 3.51, "learning_rate": 1.2280012570710246e-05, "loss": 0.0121, "step": 44660 }, { "epoch": 3.51, "learning_rate": 1.2240729101194217e-05, "loss": 0.0092, "step": 44680 }, { "epoch": 3.51, "learning_rate": 1.2201445631678191e-05, "loss": 0.0178, "step": 44700 }, { "epoch": 3.51, "learning_rate": 1.2162162162162164e-05, "loss": 0.0181, "step": 44720 }, { "epoch": 3.52, "learning_rate": 1.2122878692646134e-05, "loss": 0.0116, "step": 44740 }, { "epoch": 3.52, "learning_rate": 1.2083595223130108e-05, "loss": 0.0071, "step": 44760 }, { "epoch": 3.52, "learning_rate": 1.204431175361408e-05, "loss": 0.0116, "step": 44780 }, { "epoch": 3.52, "learning_rate": 1.2005028284098052e-05, "loss": 0.0068, "step": 44800 }, { "epoch": 3.52, "learning_rate": 1.1965744814582026e-05, "loss": 0.0061, "step": 44820 }, { "epoch": 3.52, "learning_rate": 1.1926461345065997e-05, "loss": 0.0073, "step": 44840 }, { "epoch": 3.52, "learning_rate": 1.1887177875549969e-05, "loss": 0.0256, "step": 44860 }, { "epoch": 3.53, "learning_rate": 1.1847894406033941e-05, "loss": 0.0189, "step": 44880 }, { "epoch": 3.53, "learning_rate": 1.1808610936517914e-05, "loss": 0.0072, "step": 44900 }, { "epoch": 3.53, "learning_rate": 1.1769327467001886e-05, "loss": 0.0237, "step": 44920 }, { "epoch": 3.53, "learning_rate": 1.1730043997485859e-05, "loss": 0.029, "step": 44940 }, { "epoch": 3.53, "learning_rate": 1.1690760527969831e-05, "loss": 0.0173, "step": 44960 }, { "epoch": 3.53, "learning_rate": 1.1651477058453804e-05, "loss": 0.0227, "step": 44980 }, { "epoch": 3.54, "learning_rate": 1.1612193588937776e-05, "loss": 0.0088, "step": 45000 }, { "epoch": 3.54, "learning_rate": 1.1572910119421748e-05, "loss": 0.0158, "step": 45020 }, { "epoch": 3.54, "learning_rate": 1.153362664990572e-05, "loss": 0.0083, "step": 45040 }, { "epoch": 3.54, "learning_rate": 1.1494343180389693e-05, "loss": 0.0097, "step": 45060 }, { "epoch": 3.54, "learning_rate": 1.1455059710873666e-05, "loss": 0.0151, "step": 45080 }, { "epoch": 3.54, "learning_rate": 1.1415776241357637e-05, "loss": 0.0121, "step": 45100 }, { "epoch": 3.54, "learning_rate": 1.1376492771841609e-05, "loss": 0.0107, "step": 45120 }, { "epoch": 3.55, "learning_rate": 1.1337209302325581e-05, "loss": 0.0091, "step": 45140 }, { "epoch": 3.55, "learning_rate": 1.1297925832809554e-05, "loss": 0.0172, "step": 45160 }, { "epoch": 3.55, "learning_rate": 1.1258642363293526e-05, "loss": 0.0194, "step": 45180 }, { "epoch": 3.55, "learning_rate": 1.1219358893777499e-05, "loss": 0.0078, "step": 45200 }, { "epoch": 3.55, "learning_rate": 1.1180075424261471e-05, "loss": 0.0208, "step": 45220 }, { "epoch": 3.55, "learning_rate": 1.1140791954745444e-05, "loss": 0.0149, "step": 45240 }, { "epoch": 3.56, "learning_rate": 1.1101508485229416e-05, "loss": 0.016, "step": 45260 }, { "epoch": 3.56, "learning_rate": 1.1062225015713388e-05, "loss": 0.0098, "step": 45280 }, { "epoch": 3.56, "learning_rate": 1.1022941546197361e-05, "loss": 0.0111, "step": 45300 }, { "epoch": 3.56, "learning_rate": 1.0983658076681333e-05, "loss": 0.0162, "step": 45320 }, { "epoch": 3.56, "learning_rate": 1.0944374607165306e-05, "loss": 0.0119, "step": 45340 }, { "epoch": 3.56, "learning_rate": 1.0905091137649277e-05, "loss": 0.0081, "step": 45360 }, { "epoch": 3.57, "learning_rate": 1.086580766813325e-05, "loss": 0.0087, "step": 45380 }, { "epoch": 3.57, "learning_rate": 1.0826524198617223e-05, "loss": 0.0148, "step": 45400 }, { "epoch": 3.57, "learning_rate": 1.0787240729101194e-05, "loss": 0.0211, "step": 45420 }, { "epoch": 3.57, "learning_rate": 1.0747957259585168e-05, "loss": 0.0123, "step": 45440 }, { "epoch": 3.57, "learning_rate": 1.0708673790069139e-05, "loss": 0.0076, "step": 45460 }, { "epoch": 3.57, "learning_rate": 1.0669390320553111e-05, "loss": 0.0137, "step": 45480 }, { "epoch": 3.57, "learning_rate": 1.0630106851037085e-05, "loss": 0.0136, "step": 45500 }, { "epoch": 3.58, "learning_rate": 1.0590823381521056e-05, "loss": 0.0054, "step": 45520 }, { "epoch": 3.58, "learning_rate": 1.0551539912005028e-05, "loss": 0.0055, "step": 45540 }, { "epoch": 3.58, "learning_rate": 1.0512256442489001e-05, "loss": 0.0153, "step": 45560 }, { "epoch": 3.58, "learning_rate": 1.0472972972972973e-05, "loss": 0.0055, "step": 45580 }, { "epoch": 3.58, "learning_rate": 1.0433689503456946e-05, "loss": 0.0132, "step": 45600 }, { "epoch": 3.58, "learning_rate": 1.0394406033940918e-05, "loss": 0.0201, "step": 45620 }, { "epoch": 3.59, "learning_rate": 1.035512256442489e-05, "loss": 0.0148, "step": 45640 }, { "epoch": 3.59, "learning_rate": 1.0315839094908863e-05, "loss": 0.018, "step": 45660 }, { "epoch": 3.59, "learning_rate": 1.0276555625392836e-05, "loss": 0.0032, "step": 45680 }, { "epoch": 3.59, "learning_rate": 1.0237272155876808e-05, "loss": 0.0111, "step": 45700 }, { "epoch": 3.59, "learning_rate": 1.0197988686360779e-05, "loss": 0.0128, "step": 45720 }, { "epoch": 3.59, "learning_rate": 1.0158705216844753e-05, "loss": 0.0148, "step": 45740 }, { "epoch": 3.6, "learning_rate": 1.0119421747328725e-05, "loss": 0.0145, "step": 45760 }, { "epoch": 3.6, "learning_rate": 1.0080138277812696e-05, "loss": 0.0136, "step": 45780 }, { "epoch": 3.6, "learning_rate": 1.004085480829667e-05, "loss": 0.0145, "step": 45800 }, { "epoch": 3.6, "learning_rate": 1.0001571338780641e-05, "loss": 0.0057, "step": 45820 }, { "epoch": 3.6, "learning_rate": 9.962287869264613e-06, "loss": 0.0145, "step": 45840 }, { "epoch": 3.6, "learning_rate": 9.923004399748587e-06, "loss": 0.0266, "step": 45860 }, { "epoch": 3.6, "learning_rate": 9.883720930232558e-06, "loss": 0.0182, "step": 45880 }, { "epoch": 3.61, "learning_rate": 9.84443746071653e-06, "loss": 0.0143, "step": 45900 }, { "epoch": 3.61, "learning_rate": 9.805153991200503e-06, "loss": 0.0136, "step": 45920 }, { "epoch": 3.61, "learning_rate": 9.765870521684476e-06, "loss": 0.0067, "step": 45940 }, { "epoch": 3.61, "learning_rate": 9.726587052168448e-06, "loss": 0.0128, "step": 45960 }, { "epoch": 3.61, "learning_rate": 9.68730358265242e-06, "loss": 0.0086, "step": 45980 }, { "epoch": 3.61, "learning_rate": 9.648020113136393e-06, "loss": 0.0176, "step": 46000 }, { "epoch": 3.62, "learning_rate": 9.608736643620365e-06, "loss": 0.0112, "step": 46020 }, { "epoch": 3.62, "learning_rate": 9.569453174104338e-06, "loss": 0.0125, "step": 46040 }, { "epoch": 3.62, "learning_rate": 9.53016970458831e-06, "loss": 0.0136, "step": 46060 }, { "epoch": 3.62, "learning_rate": 9.490886235072283e-06, "loss": 0.0117, "step": 46080 }, { "epoch": 3.62, "learning_rate": 9.451602765556253e-06, "loss": 0.0165, "step": 46100 }, { "epoch": 3.62, "learning_rate": 9.412319296040227e-06, "loss": 0.0123, "step": 46120 }, { "epoch": 3.63, "learning_rate": 9.373035826524198e-06, "loss": 0.0085, "step": 46140 }, { "epoch": 3.63, "learning_rate": 9.33375235700817e-06, "loss": 0.0088, "step": 46160 }, { "epoch": 3.63, "learning_rate": 9.294468887492145e-06, "loss": 0.0137, "step": 46180 }, { "epoch": 3.63, "learning_rate": 9.255185417976116e-06, "loss": 0.0037, "step": 46200 }, { "epoch": 3.63, "learning_rate": 9.215901948460088e-06, "loss": 0.0067, "step": 46220 }, { "epoch": 3.63, "learning_rate": 9.17661847894406e-06, "loss": 0.008, "step": 46240 }, { "epoch": 3.63, "learning_rate": 9.137335009428033e-06, "loss": 0.0218, "step": 46260 }, { "epoch": 3.64, "learning_rate": 9.098051539912005e-06, "loss": 0.0084, "step": 46280 }, { "epoch": 3.64, "learning_rate": 9.058768070395978e-06, "loss": 0.0242, "step": 46300 }, { "epoch": 3.64, "learning_rate": 9.01948460087995e-06, "loss": 0.0096, "step": 46320 }, { "epoch": 3.64, "learning_rate": 8.980201131363923e-06, "loss": 0.0219, "step": 46340 }, { "epoch": 3.64, "learning_rate": 8.940917661847895e-06, "loss": 0.0102, "step": 46360 }, { "epoch": 3.64, "learning_rate": 8.901634192331867e-06, "loss": 0.0093, "step": 46380 }, { "epoch": 3.65, "learning_rate": 8.862350722815838e-06, "loss": 0.0049, "step": 46400 }, { "epoch": 3.65, "learning_rate": 8.823067253299812e-06, "loss": 0.0317, "step": 46420 }, { "epoch": 3.65, "learning_rate": 8.783783783783785e-06, "loss": 0.0063, "step": 46440 }, { "epoch": 3.65, "learning_rate": 8.744500314267756e-06, "loss": 0.0113, "step": 46460 }, { "epoch": 3.65, "learning_rate": 8.70521684475173e-06, "loss": 0.0072, "step": 46480 }, { "epoch": 3.65, "learning_rate": 8.665933375235702e-06, "loss": 0.0134, "step": 46500 }, { "epoch": 3.65, "learning_rate": 8.626649905719673e-06, "loss": 0.004, "step": 46520 }, { "epoch": 3.66, "learning_rate": 8.587366436203647e-06, "loss": 0.011, "step": 46540 }, { "epoch": 3.66, "learning_rate": 8.548082966687618e-06, "loss": 0.0106, "step": 46560 }, { "epoch": 3.66, "learning_rate": 8.50879949717159e-06, "loss": 0.0309, "step": 46580 }, { "epoch": 3.66, "learning_rate": 8.469516027655564e-06, "loss": 0.0087, "step": 46600 }, { "epoch": 3.66, "learning_rate": 8.430232558139535e-06, "loss": 0.0063, "step": 46620 }, { "epoch": 3.66, "learning_rate": 8.390949088623507e-06, "loss": 0.0084, "step": 46640 }, { "epoch": 3.67, "learning_rate": 8.35166561910748e-06, "loss": 0.0104, "step": 46660 }, { "epoch": 3.67, "learning_rate": 8.312382149591452e-06, "loss": 0.0119, "step": 46680 }, { "epoch": 3.67, "learning_rate": 8.273098680075425e-06, "loss": 0.0128, "step": 46700 }, { "epoch": 3.67, "learning_rate": 8.233815210559397e-06, "loss": 0.0024, "step": 46720 }, { "epoch": 3.67, "learning_rate": 8.19453174104337e-06, "loss": 0.0198, "step": 46740 }, { "epoch": 3.67, "learning_rate": 8.155248271527342e-06, "loss": 0.0114, "step": 46760 }, { "epoch": 3.68, "learning_rate": 8.115964802011315e-06, "loss": 0.0087, "step": 46780 }, { "epoch": 3.68, "learning_rate": 8.076681332495287e-06, "loss": 0.0155, "step": 46800 }, { "epoch": 3.68, "learning_rate": 8.037397862979258e-06, "loss": 0.0132, "step": 46820 }, { "epoch": 3.68, "learning_rate": 7.998114393463232e-06, "loss": 0.0174, "step": 46840 }, { "epoch": 3.68, "learning_rate": 7.958830923947204e-06, "loss": 0.0096, "step": 46860 }, { "epoch": 3.68, "learning_rate": 7.919547454431175e-06, "loss": 0.0126, "step": 46880 }, { "epoch": 3.68, "learning_rate": 7.880263984915147e-06, "loss": 0.0172, "step": 46900 }, { "epoch": 3.69, "learning_rate": 7.84098051539912e-06, "loss": 0.0143, "step": 46920 }, { "epoch": 3.69, "learning_rate": 7.801697045883092e-06, "loss": 0.0084, "step": 46940 }, { "epoch": 3.69, "learning_rate": 7.762413576367065e-06, "loss": 0.0172, "step": 46960 }, { "epoch": 3.69, "learning_rate": 7.723130106851037e-06, "loss": 0.0119, "step": 46980 }, { "epoch": 3.69, "learning_rate": 7.68384663733501e-06, "loss": 0.0207, "step": 47000 }, { "epoch": 3.69, "learning_rate": 7.644563167818982e-06, "loss": 0.0093, "step": 47020 }, { "epoch": 3.7, "learning_rate": 7.6052796983029546e-06, "loss": 0.0068, "step": 47040 }, { "epoch": 3.7, "learning_rate": 7.565996228786927e-06, "loss": 0.009, "step": 47060 }, { "epoch": 3.7, "learning_rate": 7.526712759270899e-06, "loss": 0.0114, "step": 47080 }, { "epoch": 3.7, "learning_rate": 7.487429289754872e-06, "loss": 0.0122, "step": 47100 }, { "epoch": 3.7, "learning_rate": 7.448145820238844e-06, "loss": 0.0091, "step": 47120 }, { "epoch": 3.7, "learning_rate": 7.408862350722816e-06, "loss": 0.0166, "step": 47140 }, { "epoch": 3.71, "learning_rate": 7.369578881206788e-06, "loss": 0.0097, "step": 47160 }, { "epoch": 3.71, "learning_rate": 7.330295411690762e-06, "loss": 0.0083, "step": 47180 }, { "epoch": 3.71, "learning_rate": 7.291011942174733e-06, "loss": 0.0124, "step": 47200 }, { "epoch": 3.71, "learning_rate": 7.251728472658706e-06, "loss": 0.0091, "step": 47220 }, { "epoch": 3.71, "learning_rate": 7.212445003142677e-06, "loss": 0.0099, "step": 47240 }, { "epoch": 3.71, "learning_rate": 7.1731615336266505e-06, "loss": 0.013, "step": 47260 }, { "epoch": 3.71, "learning_rate": 7.133878064110623e-06, "loss": 0.021, "step": 47280 }, { "epoch": 3.72, "learning_rate": 7.0945945945945946e-06, "loss": 0.0036, "step": 47300 }, { "epoch": 3.72, "learning_rate": 7.055311125078568e-06, "loss": 0.0133, "step": 47320 }, { "epoch": 3.72, "learning_rate": 7.016027655562539e-06, "loss": 0.0235, "step": 47340 }, { "epoch": 3.72, "learning_rate": 6.976744186046512e-06, "loss": 0.0187, "step": 47360 }, { "epoch": 3.72, "learning_rate": 6.937460716530484e-06, "loss": 0.0121, "step": 47380 }, { "epoch": 3.72, "learning_rate": 6.898177247014456e-06, "loss": 0.0305, "step": 47400 }, { "epoch": 3.73, "learning_rate": 6.858893777498429e-06, "loss": 0.01, "step": 47420 }, { "epoch": 3.73, "learning_rate": 6.819610307982402e-06, "loss": 0.0069, "step": 47440 }, { "epoch": 3.73, "learning_rate": 6.780326838466373e-06, "loss": 0.0129, "step": 47460 }, { "epoch": 3.73, "learning_rate": 6.7410433689503465e-06, "loss": 0.0129, "step": 47480 }, { "epoch": 3.73, "learning_rate": 6.701759899434318e-06, "loss": 0.0067, "step": 47500 }, { "epoch": 3.73, "learning_rate": 6.6624764299182905e-06, "loss": 0.0057, "step": 47520 }, { "epoch": 3.74, "learning_rate": 6.623192960402264e-06, "loss": 0.0087, "step": 47540 }, { "epoch": 3.74, "learning_rate": 6.583909490886235e-06, "loss": 0.0071, "step": 47560 }, { "epoch": 3.74, "learning_rate": 6.544626021370208e-06, "loss": 0.0117, "step": 47580 }, { "epoch": 3.74, "learning_rate": 6.5053425518541795e-06, "loss": 0.0139, "step": 47600 }, { "epoch": 3.74, "learning_rate": 6.466059082338152e-06, "loss": 0.0085, "step": 47620 }, { "epoch": 3.74, "learning_rate": 6.426775612822125e-06, "loss": 0.0055, "step": 47640 }, { "epoch": 3.74, "learning_rate": 6.387492143306097e-06, "loss": 0.0113, "step": 47660 }, { "epoch": 3.75, "learning_rate": 6.348208673790069e-06, "loss": 0.0193, "step": 47680 }, { "epoch": 3.75, "learning_rate": 6.3089252042740425e-06, "loss": 0.0059, "step": 47700 }, { "epoch": 3.75, "learning_rate": 6.269641734758014e-06, "loss": 0.0136, "step": 47720 }, { "epoch": 3.75, "learning_rate": 6.2303582652419865e-06, "loss": 0.0043, "step": 47740 }, { "epoch": 3.75, "learning_rate": 6.191074795725959e-06, "loss": 0.0068, "step": 47760 }, { "epoch": 3.75, "learning_rate": 6.151791326209931e-06, "loss": 0.0232, "step": 47780 }, { "epoch": 3.76, "learning_rate": 6.112507856693903e-06, "loss": 0.0127, "step": 47800 }, { "epoch": 3.76, "learning_rate": 6.073224387177876e-06, "loss": 0.0103, "step": 47820 }, { "epoch": 3.76, "learning_rate": 6.033940917661849e-06, "loss": 0.0091, "step": 47840 }, { "epoch": 3.76, "learning_rate": 5.99465744814582e-06, "loss": 0.0109, "step": 47860 }, { "epoch": 3.76, "learning_rate": 5.955373978629793e-06, "loss": 0.0126, "step": 47880 }, { "epoch": 3.76, "learning_rate": 5.916090509113765e-06, "loss": 0.0068, "step": 47900 }, { "epoch": 3.76, "learning_rate": 5.876807039597738e-06, "loss": 0.0061, "step": 47920 }, { "epoch": 3.77, "learning_rate": 5.83752357008171e-06, "loss": 0.0253, "step": 47940 }, { "epoch": 3.77, "learning_rate": 5.7982401005656825e-06, "loss": 0.0136, "step": 47960 }, { "epoch": 3.77, "learning_rate": 5.758956631049654e-06, "loss": 0.0023, "step": 47980 }, { "epoch": 3.77, "learning_rate": 5.719673161533627e-06, "loss": 0.0093, "step": 48000 }, { "epoch": 3.77, "learning_rate": 5.680389692017599e-06, "loss": 0.012, "step": 48020 }, { "epoch": 3.77, "learning_rate": 5.641106222501571e-06, "loss": 0.0082, "step": 48040 }, { "epoch": 3.78, "learning_rate": 5.601822752985544e-06, "loss": 0.018, "step": 48060 }, { "epoch": 3.78, "learning_rate": 5.562539283469516e-06, "loss": 0.0083, "step": 48080 }, { "epoch": 3.78, "learning_rate": 5.523255813953489e-06, "loss": 0.0097, "step": 48100 }, { "epoch": 3.78, "learning_rate": 5.483972344437461e-06, "loss": 0.0224, "step": 48120 }, { "epoch": 3.78, "learning_rate": 5.444688874921433e-06, "loss": 0.0128, "step": 48140 }, { "epoch": 3.78, "learning_rate": 5.405405405405406e-06, "loss": 0.0236, "step": 48160 }, { "epoch": 3.79, "learning_rate": 5.3661219358893785e-06, "loss": 0.0043, "step": 48180 }, { "epoch": 3.79, "learning_rate": 5.32683846637335e-06, "loss": 0.0082, "step": 48200 }, { "epoch": 3.79, "learning_rate": 5.2875549968573225e-06, "loss": 0.0107, "step": 48220 }, { "epoch": 3.79, "learning_rate": 5.248271527341296e-06, "loss": 0.0076, "step": 48240 }, { "epoch": 3.79, "learning_rate": 5.208988057825267e-06, "loss": 0.0094, "step": 48260 }, { "epoch": 3.79, "learning_rate": 5.16970458830924e-06, "loss": 0.0083, "step": 48280 }, { "epoch": 3.79, "learning_rate": 5.130421118793212e-06, "loss": 0.0092, "step": 48300 }, { "epoch": 3.8, "learning_rate": 5.091137649277184e-06, "loss": 0.0147, "step": 48320 }, { "epoch": 3.8, "learning_rate": 5.051854179761157e-06, "loss": 0.008, "step": 48340 }, { "epoch": 3.8, "learning_rate": 5.012570710245129e-06, "loss": 0.0188, "step": 48360 }, { "epoch": 3.8, "learning_rate": 4.973287240729101e-06, "loss": 0.0222, "step": 48380 }, { "epoch": 3.8, "learning_rate": 4.934003771213074e-06, "loss": 0.0103, "step": 48400 }, { "epoch": 3.8, "learning_rate": 4.894720301697046e-06, "loss": 0.0047, "step": 48420 }, { "epoch": 3.81, "learning_rate": 4.8554368321810185e-06, "loss": 0.0088, "step": 48440 }, { "epoch": 3.81, "learning_rate": 4.816153362664991e-06, "loss": 0.0108, "step": 48460 }, { "epoch": 3.81, "learning_rate": 4.7768698931489625e-06, "loss": 0.0167, "step": 48480 }, { "epoch": 3.81, "learning_rate": 4.737586423632936e-06, "loss": 0.0049, "step": 48500 }, { "epoch": 3.81, "learning_rate": 4.698302954116908e-06, "loss": 0.009, "step": 48520 }, { "epoch": 3.81, "learning_rate": 4.65901948460088e-06, "loss": 0.0119, "step": 48540 }, { "epoch": 3.82, "learning_rate": 4.619736015084852e-06, "loss": 0.0137, "step": 48560 }, { "epoch": 3.82, "learning_rate": 4.5804525455688255e-06, "loss": 0.0058, "step": 48580 }, { "epoch": 3.82, "learning_rate": 4.541169076052797e-06, "loss": 0.0164, "step": 48600 }, { "epoch": 3.82, "learning_rate": 4.5018856065367696e-06, "loss": 0.0154, "step": 48620 }, { "epoch": 3.82, "learning_rate": 4.462602137020742e-06, "loss": 0.0116, "step": 48640 }, { "epoch": 3.82, "learning_rate": 4.423318667504714e-06, "loss": 0.0087, "step": 48660 }, { "epoch": 3.82, "learning_rate": 4.384035197988687e-06, "loss": 0.0097, "step": 48680 }, { "epoch": 3.83, "learning_rate": 4.344751728472659e-06, "loss": 0.0072, "step": 48700 }, { "epoch": 3.83, "learning_rate": 4.305468258956631e-06, "loss": 0.0053, "step": 48720 }, { "epoch": 3.83, "learning_rate": 4.266184789440603e-06, "loss": 0.0173, "step": 48740 }, { "epoch": 3.83, "learning_rate": 4.226901319924576e-06, "loss": 0.0114, "step": 48760 }, { "epoch": 3.83, "learning_rate": 4.187617850408548e-06, "loss": 0.0166, "step": 48780 }, { "epoch": 3.83, "learning_rate": 4.148334380892521e-06, "loss": 0.0095, "step": 48800 }, { "epoch": 3.84, "learning_rate": 4.109050911376493e-06, "loss": 0.0119, "step": 48820 }, { "epoch": 3.84, "learning_rate": 4.0697674418604655e-06, "loss": 0.0035, "step": 48840 }, { "epoch": 3.84, "learning_rate": 4.030483972344438e-06, "loss": 0.0049, "step": 48860 }, { "epoch": 3.84, "learning_rate": 3.99120050282841e-06, "loss": 0.011, "step": 48880 }, { "epoch": 3.84, "learning_rate": 3.951917033312382e-06, "loss": 0.0064, "step": 48900 }, { "epoch": 3.84, "learning_rate": 3.912633563796355e-06, "loss": 0.0113, "step": 48920 }, { "epoch": 3.85, "learning_rate": 3.873350094280327e-06, "loss": 0.0118, "step": 48940 }, { "epoch": 3.85, "learning_rate": 3.834066624764299e-06, "loss": 0.008, "step": 48960 }, { "epoch": 3.85, "learning_rate": 3.7947831552482713e-06, "loss": 0.0033, "step": 48980 }, { "epoch": 3.85, "learning_rate": 3.7554996857322438e-06, "loss": 0.0162, "step": 49000 }, { "epoch": 3.85, "learning_rate": 3.7162162162162166e-06, "loss": 0.0121, "step": 49020 }, { "epoch": 3.85, "learning_rate": 3.6769327467001887e-06, "loss": 0.0149, "step": 49040 }, { "epoch": 3.85, "learning_rate": 3.637649277184161e-06, "loss": 0.0051, "step": 49060 }, { "epoch": 3.86, "learning_rate": 3.598365807668133e-06, "loss": 0.005, "step": 49080 }, { "epoch": 3.86, "learning_rate": 3.559082338152106e-06, "loss": 0.0135, "step": 49100 }, { "epoch": 3.86, "learning_rate": 3.519798868636078e-06, "loss": 0.017, "step": 49120 }, { "epoch": 3.86, "learning_rate": 3.4805153991200504e-06, "loss": 0.011, "step": 49140 }, { "epoch": 3.86, "learning_rate": 3.4412319296040224e-06, "loss": 0.0157, "step": 49160 }, { "epoch": 3.86, "learning_rate": 3.4019484600879953e-06, "loss": 0.0058, "step": 49180 }, { "epoch": 3.87, "learning_rate": 3.3626649905719677e-06, "loss": 0.0079, "step": 49200 }, { "epoch": 3.87, "learning_rate": 3.3233815210559398e-06, "loss": 0.0126, "step": 49220 }, { "epoch": 3.87, "learning_rate": 3.2840980515399118e-06, "loss": 0.0046, "step": 49240 }, { "epoch": 3.87, "learning_rate": 3.2448145820238846e-06, "loss": 0.0101, "step": 49260 }, { "epoch": 3.87, "learning_rate": 3.205531112507857e-06, "loss": 0.0103, "step": 49280 }, { "epoch": 3.87, "learning_rate": 3.166247642991829e-06, "loss": 0.0145, "step": 49300 }, { "epoch": 3.87, "learning_rate": 3.1269641734758015e-06, "loss": 0.0081, "step": 49320 }, { "epoch": 3.88, "learning_rate": 3.087680703959774e-06, "loss": 0.0037, "step": 49340 }, { "epoch": 3.88, "learning_rate": 3.048397234443746e-06, "loss": 0.0059, "step": 49360 }, { "epoch": 3.88, "learning_rate": 3.0091137649277184e-06, "loss": 0.0088, "step": 49380 }, { "epoch": 3.88, "learning_rate": 2.969830295411691e-06, "loss": 0.0235, "step": 49400 }, { "epoch": 3.88, "learning_rate": 2.9305468258956633e-06, "loss": 0.0325, "step": 49420 }, { "epoch": 3.88, "learning_rate": 2.8912633563796353e-06, "loss": 0.0139, "step": 49440 }, { "epoch": 3.89, "learning_rate": 2.851979886863608e-06, "loss": 0.0186, "step": 49460 }, { "epoch": 3.89, "learning_rate": 2.81269641734758e-06, "loss": 0.0066, "step": 49480 }, { "epoch": 3.89, "learning_rate": 2.7734129478315526e-06, "loss": 0.0045, "step": 49500 }, { "epoch": 3.89, "learning_rate": 2.734129478315525e-06, "loss": 0.0172, "step": 49520 }, { "epoch": 3.89, "learning_rate": 2.6948460087994975e-06, "loss": 0.0095, "step": 49540 }, { "epoch": 3.89, "learning_rate": 2.6555625392834695e-06, "loss": 0.0085, "step": 49560 }, { "epoch": 3.9, "learning_rate": 2.616279069767442e-06, "loss": 0.0167, "step": 49580 }, { "epoch": 3.9, "learning_rate": 2.5769956002514144e-06, "loss": 0.0079, "step": 49600 }, { "epoch": 3.9, "learning_rate": 2.537712130735387e-06, "loss": 0.0103, "step": 49620 }, { "epoch": 3.9, "learning_rate": 2.498428661219359e-06, "loss": 0.0129, "step": 49640 }, { "epoch": 3.9, "learning_rate": 2.4591451917033313e-06, "loss": 0.0098, "step": 49660 }, { "epoch": 3.9, "learning_rate": 2.4198617221873037e-06, "loss": 0.0104, "step": 49680 }, { "epoch": 3.9, "learning_rate": 2.3805782526712757e-06, "loss": 0.0061, "step": 49700 }, { "epoch": 3.91, "learning_rate": 2.3412947831552486e-06, "loss": 0.0122, "step": 49720 }, { "epoch": 3.91, "learning_rate": 2.3020113136392206e-06, "loss": 0.0034, "step": 49740 }, { "epoch": 3.91, "learning_rate": 2.262727844123193e-06, "loss": 0.0076, "step": 49760 }, { "epoch": 3.91, "learning_rate": 2.2234443746071655e-06, "loss": 0.0128, "step": 49780 }, { "epoch": 3.91, "learning_rate": 2.184160905091138e-06, "loss": 0.0136, "step": 49800 }, { "epoch": 3.91, "learning_rate": 2.14487743557511e-06, "loss": 0.0038, "step": 49820 }, { "epoch": 3.92, "learning_rate": 2.1055939660590824e-06, "loss": 0.0106, "step": 49840 }, { "epoch": 3.92, "learning_rate": 2.066310496543055e-06, "loss": 0.0055, "step": 49860 }, { "epoch": 3.92, "learning_rate": 2.0270270270270273e-06, "loss": 0.0109, "step": 49880 }, { "epoch": 3.92, "learning_rate": 1.9877435575109993e-06, "loss": 0.0132, "step": 49900 }, { "epoch": 3.92, "learning_rate": 1.948460087994972e-06, "loss": 0.0174, "step": 49920 }, { "epoch": 3.92, "learning_rate": 1.909176618478944e-06, "loss": 0.0124, "step": 49940 }, { "epoch": 3.93, "learning_rate": 1.8698931489629166e-06, "loss": 0.0046, "step": 49960 }, { "epoch": 3.93, "learning_rate": 1.8306096794468888e-06, "loss": 0.0045, "step": 49980 }, { "epoch": 3.93, "learning_rate": 1.7913262099308613e-06, "loss": 0.0092, "step": 50000 }, { "epoch": 3.93, "eval_loss": 0.349536269903183, "eval_matthews_correlation": 0.4389188148304588, "eval_runtime": 1176.0932, "eval_samples_per_second": 96.961, "eval_steps_per_second": 12.121, "step": 50000 }, { "epoch": 3.93, "learning_rate": 1.7520427404148335e-06, "loss": 0.0049, "step": 50020 }, { "epoch": 3.93, "learning_rate": 1.7127592708988057e-06, "loss": 0.0094, "step": 50040 }, { "epoch": 3.93, "learning_rate": 1.6734758013827781e-06, "loss": 0.0208, "step": 50060 }, { "epoch": 3.93, "learning_rate": 1.6341923318667504e-06, "loss": 0.0174, "step": 50080 }, { "epoch": 3.94, "learning_rate": 1.594908862350723e-06, "loss": 0.0163, "step": 50100 }, { "epoch": 3.94, "learning_rate": 1.5556253928346953e-06, "loss": 0.0065, "step": 50120 }, { "epoch": 3.94, "learning_rate": 1.5163419233186677e-06, "loss": 0.0089, "step": 50140 }, { "epoch": 3.94, "learning_rate": 1.47705845380264e-06, "loss": 0.0036, "step": 50160 }, { "epoch": 3.94, "learning_rate": 1.4377749842866121e-06, "loss": 0.0091, "step": 50180 }, { "epoch": 3.94, "learning_rate": 1.3984915147705846e-06, "loss": 0.0246, "step": 50200 }, { "epoch": 3.95, "learning_rate": 1.3592080452545568e-06, "loss": 0.0121, "step": 50220 }, { "epoch": 3.95, "learning_rate": 1.3199245757385292e-06, "loss": 0.0055, "step": 50240 }, { "epoch": 3.95, "learning_rate": 1.2806411062225017e-06, "loss": 0.011, "step": 50260 }, { "epoch": 3.95, "learning_rate": 1.241357636706474e-06, "loss": 0.0156, "step": 50280 }, { "epoch": 3.95, "learning_rate": 1.2020741671904464e-06, "loss": 0.0125, "step": 50300 }, { "epoch": 3.95, "learning_rate": 1.1627906976744186e-06, "loss": 0.0104, "step": 50320 }, { "epoch": 3.96, "learning_rate": 1.123507228158391e-06, "loss": 0.0223, "step": 50340 }, { "epoch": 3.96, "learning_rate": 1.0842237586423635e-06, "loss": 0.0088, "step": 50360 }, { "epoch": 3.96, "learning_rate": 1.0449402891263357e-06, "loss": 0.0096, "step": 50380 }, { "epoch": 3.96, "learning_rate": 1.0056568196103081e-06, "loss": 0.0047, "step": 50400 }, { "epoch": 3.96, "learning_rate": 9.663733500942803e-07, "loss": 0.01, "step": 50420 }, { "epoch": 3.96, "learning_rate": 9.270898805782528e-07, "loss": 0.0153, "step": 50440 }, { "epoch": 3.96, "learning_rate": 8.878064110622251e-07, "loss": 0.0157, "step": 50460 }, { "epoch": 3.97, "learning_rate": 8.485229415461974e-07, "loss": 0.0138, "step": 50480 }, { "epoch": 3.97, "learning_rate": 8.092394720301698e-07, "loss": 0.0132, "step": 50500 }, { "epoch": 3.97, "learning_rate": 7.699560025141421e-07, "loss": 0.0208, "step": 50520 }, { "epoch": 3.97, "learning_rate": 7.306725329981144e-07, "loss": 0.0126, "step": 50540 }, { "epoch": 3.97, "learning_rate": 6.913890634820868e-07, "loss": 0.0113, "step": 50560 }, { "epoch": 3.97, "learning_rate": 6.521055939660591e-07, "loss": 0.0098, "step": 50580 }, { "epoch": 3.98, "learning_rate": 6.128221244500314e-07, "loss": 0.006, "step": 50600 }, { "epoch": 3.98, "learning_rate": 5.735386549340038e-07, "loss": 0.0137, "step": 50620 }, { "epoch": 3.98, "learning_rate": 5.342551854179761e-07, "loss": 0.0195, "step": 50640 }, { "epoch": 3.98, "learning_rate": 4.949717159019484e-07, "loss": 0.0134, "step": 50660 }, { "epoch": 3.98, "learning_rate": 4.556882463859208e-07, "loss": 0.0074, "step": 50680 }, { "epoch": 3.98, "learning_rate": 4.1640477686989316e-07, "loss": 0.0123, "step": 50700 }, { "epoch": 3.98, "learning_rate": 3.7712130735386554e-07, "loss": 0.0171, "step": 50720 }, { "epoch": 3.99, "learning_rate": 3.378378378378379e-07, "loss": 0.013, "step": 50740 }, { "epoch": 3.99, "learning_rate": 2.9855436832181016e-07, "loss": 0.0079, "step": 50760 }, { "epoch": 3.99, "learning_rate": 2.5927089880578254e-07, "loss": 0.0149, "step": 50780 }, { "epoch": 3.99, "learning_rate": 2.1998742928975487e-07, "loss": 0.007, "step": 50800 }, { "epoch": 3.99, "learning_rate": 1.807039597737272e-07, "loss": 0.011, "step": 50820 }, { "epoch": 3.99, "learning_rate": 1.4142049025769957e-07, "loss": 0.0083, "step": 50840 }, { "epoch": 4.0, "learning_rate": 1.021370207416719e-07, "loss": 0.0076, "step": 50860 }, { "epoch": 4.0, "learning_rate": 6.285355122564426e-08, "loss": 0.0168, "step": 50880 }, { "epoch": 4.0, "learning_rate": 2.3570081709616597e-08, "loss": 0.0154, "step": 50900 }, { "epoch": 4.0, "step": 50912, "total_flos": 2.524899799855159e+20, "train_loss": 0.0028621005677307255, "train_runtime": 12160.5208, "train_samples_per_second": 267.939, "train_steps_per_second": 4.187 } ], "logging_steps": 20, "max_steps": 50912, "num_train_epochs": 4, "save_steps": 10000, "total_flos": 2.524899799855159e+20, "trial_name": null, "trial_params": null }