diff --git "a/robin-7b/trainer_state.json" "b/robin-7b/trainer_state.json" deleted file mode 100644--- "a/robin-7b/trainer_state.json" +++ /dev/null @@ -1,13375 +0,0 @@ -{ - "best_metric": null, - "best_model_checkpoint": null, - "epoch": 5.0, - "global_step": 44500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.0, - "learning_rate": 1.4981273408239701e-06, - "loss": 1.6359, - "step": 20 - }, - { - "epoch": 0.0, - "learning_rate": 2.9962546816479402e-06, - "loss": 1.6528, - "step": 40 - }, - { - "epoch": 0.01, - "learning_rate": 4.49438202247191e-06, - "loss": 1.6445, - "step": 60 - }, - { - "epoch": 0.01, - "learning_rate": 5.9925093632958805e-06, - "loss": 1.6479, - "step": 80 - }, - { - "epoch": 0.01, - "learning_rate": 7.490636704119851e-06, - "loss": 1.608, - "step": 100 - }, - { - "epoch": 0.01, - "learning_rate": 8.98876404494382e-06, - "loss": 1.6177, - "step": 120 - }, - { - "epoch": 0.02, - "learning_rate": 1.0486891385767791e-05, - "loss": 1.5548, - "step": 140 - }, - { - "epoch": 0.02, - "learning_rate": 1.1985018726591761e-05, - "loss": 1.527, - "step": 160 - }, - { - "epoch": 0.02, - "learning_rate": 1.348314606741573e-05, - "loss": 1.4841, - "step": 180 - }, - { - "epoch": 0.02, - "learning_rate": 1.4981273408239702e-05, - "loss": 1.4651, - "step": 200 - }, - { - "epoch": 0.02, - "learning_rate": 1.647940074906367e-05, - "loss": 1.4259, - "step": 220 - }, - { - "epoch": 0.03, - "learning_rate": 1.797752808988764e-05, - "loss": 1.4087, - "step": 240 - }, - { - "epoch": 0.03, - "learning_rate": 1.9475655430711613e-05, - "loss": 1.4239, - "step": 260 - }, - { - "epoch": 0.03, - "learning_rate": 2.0973782771535582e-05, - "loss": 1.4119, - "step": 280 - }, - { - "epoch": 0.03, - "learning_rate": 2.2471910112359552e-05, - "loss": 1.3844, - "step": 300 - }, - { - "epoch": 0.04, - "learning_rate": 2.3970037453183522e-05, - "loss": 1.3582, - "step": 320 - }, - { - "epoch": 0.04, - "learning_rate": 2.546816479400749e-05, - "loss": 1.3717, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 2.696629213483146e-05, - "loss": 1.3852, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 2.846441947565543e-05, - "loss": 1.3896, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 2.9962546816479404e-05, - "loss": 1.3512, - "step": 400 - }, - { - "epoch": 0.05, - "learning_rate": 3.1460674157303374e-05, - "loss": 1.3857, - "step": 420 - }, - { - "epoch": 0.05, - "learning_rate": 3.295880149812734e-05, - "loss": 1.3396, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 3.445692883895131e-05, - "loss": 1.3324, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 3.595505617977528e-05, - "loss": 1.3522, - "step": 480 - }, - { - "epoch": 0.06, - "learning_rate": 3.745318352059925e-05, - "loss": 1.3429, - "step": 500 - }, - { - "epoch": 0.06, - "learning_rate": 3.8951310861423226e-05, - "loss": 1.3368, - "step": 520 - }, - { - "epoch": 0.06, - "learning_rate": 4.044943820224719e-05, - "loss": 1.3165, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 4.1947565543071165e-05, - "loss": 1.3356, - "step": 560 - }, - { - "epoch": 0.07, - "learning_rate": 4.344569288389513e-05, - "loss": 1.2966, - "step": 580 - }, - { - "epoch": 0.07, - "learning_rate": 4.4943820224719104e-05, - "loss": 1.3255, - "step": 600 - }, - { - "epoch": 0.07, - "learning_rate": 4.644194756554308e-05, - "loss": 1.2908, - "step": 620 - }, - { - "epoch": 0.07, - "learning_rate": 4.7940074906367044e-05, - "loss": 1.2983, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 4.943820224719101e-05, - "loss": 1.2979, - "step": 660 - }, - { - "epoch": 0.08, - "learning_rate": 5.093632958801498e-05, - "loss": 1.3171, - "step": 680 - }, - { - "epoch": 0.08, - "learning_rate": 5.243445692883895e-05, - "loss": 1.3026, - "step": 700 - }, - { - "epoch": 0.08, - "learning_rate": 5.393258426966292e-05, - "loss": 1.3105, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 5.5430711610486895e-05, - "loss": 1.3146, - "step": 740 - }, - { - "epoch": 0.09, - "learning_rate": 5.692883895131086e-05, - "loss": 1.2834, - "step": 760 - }, - { - "epoch": 0.09, - "learning_rate": 5.8426966292134835e-05, - "loss": 1.2993, - "step": 780 - }, - { - "epoch": 0.09, - "learning_rate": 5.992509363295881e-05, - "loss": 1.287, - "step": 800 - }, - { - "epoch": 0.09, - "learning_rate": 6.142322097378277e-05, - "loss": 1.293, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 6.292134831460675e-05, - "loss": 1.2556, - "step": 840 - }, - { - "epoch": 0.1, - "learning_rate": 6.441947565543071e-05, - "loss": 1.2979, - "step": 860 - }, - { - "epoch": 0.1, - "learning_rate": 6.591760299625468e-05, - "loss": 1.3002, - "step": 880 - }, - { - "epoch": 0.1, - "learning_rate": 6.741573033707866e-05, - "loss": 1.2408, - "step": 900 - }, - { - "epoch": 0.1, - "learning_rate": 6.891385767790263e-05, - "loss": 1.2753, - "step": 920 - }, - { - "epoch": 0.11, - "learning_rate": 7.04119850187266e-05, - "loss": 1.2671, - "step": 940 - }, - { - "epoch": 0.11, - "learning_rate": 7.191011235955056e-05, - "loss": 1.2925, - "step": 960 - }, - { - "epoch": 0.11, - "learning_rate": 7.340823970037454e-05, - "loss": 1.2709, - "step": 980 - }, - { - "epoch": 0.11, - "learning_rate": 7.49063670411985e-05, - "loss": 1.2614, - "step": 1000 - }, - { - "epoch": 0.11, - "learning_rate": 7.640449438202247e-05, - "loss": 1.2979, - "step": 1020 - }, - { - "epoch": 0.12, - "learning_rate": 7.790262172284645e-05, - "loss": 1.2534, - "step": 1040 - }, - { - "epoch": 0.12, - "learning_rate": 7.940074906367042e-05, - "loss": 1.2425, - "step": 1060 - }, - { - "epoch": 0.12, - "learning_rate": 8.089887640449438e-05, - "loss": 1.2671, - "step": 1080 - }, - { - "epoch": 0.12, - "learning_rate": 8.239700374531836e-05, - "loss": 1.247, - "step": 1100 - }, - { - "epoch": 0.13, - "learning_rate": 8.389513108614233e-05, - "loss": 1.2785, - "step": 1120 - }, - { - "epoch": 0.13, - "learning_rate": 8.53932584269663e-05, - "loss": 1.2611, - "step": 1140 - }, - { - "epoch": 0.13, - "learning_rate": 8.689138576779026e-05, - "loss": 1.238, - "step": 1160 - }, - { - "epoch": 0.13, - "learning_rate": 8.838951310861424e-05, - "loss": 1.2352, - "step": 1180 - }, - { - "epoch": 0.13, - "learning_rate": 8.988764044943821e-05, - "loss": 1.2389, - "step": 1200 - }, - { - "epoch": 0.14, - "learning_rate": 9.138576779026217e-05, - "loss": 1.2519, - "step": 1220 - }, - { - "epoch": 0.14, - "learning_rate": 9.288389513108615e-05, - "loss": 1.2466, - "step": 1240 - }, - { - "epoch": 0.14, - "learning_rate": 9.438202247191012e-05, - "loss": 1.2414, - "step": 1260 - }, - { - "epoch": 0.14, - "learning_rate": 9.588014981273409e-05, - "loss": 1.2565, - "step": 1280 - }, - { - "epoch": 0.15, - "learning_rate": 9.737827715355807e-05, - "loss": 1.2661, - "step": 1300 - }, - { - "epoch": 0.15, - "learning_rate": 9.887640449438202e-05, - "loss": 1.2708, - "step": 1320 - }, - { - "epoch": 0.15, - "learning_rate": 9.999999668932716e-05, - "loss": 1.2292, - "step": 1340 - }, - { - "epoch": 0.15, - "learning_rate": 9.999991723320065e-05, - "loss": 1.2506, - "step": 1360 - }, - { - "epoch": 0.16, - "learning_rate": 9.999973183573581e-05, - "loss": 1.2434, - "step": 1380 - }, - { - "epoch": 0.16, - "learning_rate": 9.999944049732545e-05, - "loss": 1.2425, - "step": 1400 - }, - { - "epoch": 0.16, - "learning_rate": 9.99990432185869e-05, - "loss": 1.2279, - "step": 1420 - }, - { - "epoch": 0.16, - "learning_rate": 9.999854000036192e-05, - "loss": 1.2039, - "step": 1440 - }, - { - "epoch": 0.16, - "learning_rate": 9.999793084371672e-05, - "loss": 1.2298, - "step": 1460 - }, - { - "epoch": 0.17, - "learning_rate": 9.999721574994201e-05, - "loss": 1.2467, - "step": 1480 - }, - { - "epoch": 0.17, - "learning_rate": 9.999639472055294e-05, - "loss": 1.2039, - "step": 1500 - }, - { - "epoch": 0.17, - "learning_rate": 9.999546775728917e-05, - "loss": 1.2668, - "step": 1520 - }, - { - "epoch": 0.17, - "learning_rate": 9.999443486211473e-05, - "loss": 1.2045, - "step": 1540 - }, - { - "epoch": 0.18, - "learning_rate": 9.99932960372182e-05, - "loss": 1.2156, - "step": 1560 - }, - { - "epoch": 0.18, - "learning_rate": 9.99920512850125e-05, - "loss": 1.2157, - "step": 1580 - }, - { - "epoch": 0.18, - "learning_rate": 9.999070060813509e-05, - "loss": 1.2027, - "step": 1600 - }, - { - "epoch": 0.18, - "learning_rate": 9.99892440094478e-05, - "loss": 1.222, - "step": 1620 - }, - { - "epoch": 0.18, - "learning_rate": 9.998768149203695e-05, - "loss": 1.2139, - "step": 1640 - }, - { - "epoch": 0.19, - "learning_rate": 9.998601305921322e-05, - "loss": 1.2042, - "step": 1660 - }, - { - "epoch": 0.19, - "learning_rate": 9.998423871451174e-05, - "loss": 1.2379, - "step": 1680 - }, - { - "epoch": 0.19, - "learning_rate": 9.998235846169204e-05, - "loss": 1.1764, - "step": 1700 - }, - { - "epoch": 0.19, - "learning_rate": 9.998037230473809e-05, - "loss": 1.2254, - "step": 1720 - }, - { - "epoch": 0.2, - "learning_rate": 9.997828024785817e-05, - "loss": 1.1891, - "step": 1740 - }, - { - "epoch": 0.2, - "learning_rate": 9.997608229548504e-05, - "loss": 1.1889, - "step": 1760 - }, - { - "epoch": 0.2, - "learning_rate": 9.997377845227576e-05, - "loss": 1.2035, - "step": 1780 - }, - { - "epoch": 0.2, - "learning_rate": 9.997136872311177e-05, - "loss": 1.2186, - "step": 1800 - }, - { - "epoch": 0.2, - "learning_rate": 9.996885311309891e-05, - "loss": 1.1837, - "step": 1820 - }, - { - "epoch": 0.21, - "learning_rate": 9.996623162756733e-05, - "loss": 1.183, - "step": 1840 - }, - { - "epoch": 0.21, - "learning_rate": 9.996350427207148e-05, - "loss": 1.1707, - "step": 1860 - }, - { - "epoch": 0.21, - "learning_rate": 9.99606710523902e-05, - "loss": 1.2172, - "step": 1880 - }, - { - "epoch": 0.21, - "learning_rate": 9.995773197452657e-05, - "loss": 1.1877, - "step": 1900 - }, - { - "epoch": 0.22, - "learning_rate": 9.995468704470802e-05, - "loss": 1.2208, - "step": 1920 - }, - { - "epoch": 0.22, - "learning_rate": 9.995153626938623e-05, - "loss": 1.1727, - "step": 1940 - }, - { - "epoch": 0.22, - "learning_rate": 9.994827965523716e-05, - "loss": 1.2127, - "step": 1960 - }, - { - "epoch": 0.22, - "learning_rate": 9.994491720916102e-05, - "loss": 1.1912, - "step": 1980 - }, - { - "epoch": 0.22, - "learning_rate": 9.994144893828226e-05, - "loss": 1.1852, - "step": 2000 - }, - { - "epoch": 0.23, - "learning_rate": 9.993787484994957e-05, - "loss": 1.1689, - "step": 2020 - }, - { - "epoch": 0.23, - "learning_rate": 9.993419495173582e-05, - "loss": 1.19, - "step": 2040 - }, - { - "epoch": 0.23, - "learning_rate": 9.99304092514381e-05, - "loss": 1.1833, - "step": 2060 - }, - { - "epoch": 0.23, - "learning_rate": 9.992651775707768e-05, - "loss": 1.1944, - "step": 2080 - }, - { - "epoch": 0.24, - "learning_rate": 9.992252047689997e-05, - "loss": 1.1592, - "step": 2100 - }, - { - "epoch": 0.24, - "learning_rate": 9.991841741937448e-05, - "loss": 1.1744, - "step": 2120 - }, - { - "epoch": 0.24, - "learning_rate": 9.991420859319496e-05, - "loss": 1.1884, - "step": 2140 - }, - { - "epoch": 0.24, - "learning_rate": 9.990989400727916e-05, - "loss": 1.1372, - "step": 2160 - }, - { - "epoch": 0.24, - "learning_rate": 9.990547367076896e-05, - "loss": 1.1767, - "step": 2180 - }, - { - "epoch": 0.25, - "learning_rate": 9.990094759303033e-05, - "loss": 1.1837, - "step": 2200 - }, - { - "epoch": 0.25, - "learning_rate": 9.989631578365322e-05, - "loss": 1.1564, - "step": 2220 - }, - { - "epoch": 0.25, - "learning_rate": 9.989157825245167e-05, - "loss": 1.1807, - "step": 2240 - }, - { - "epoch": 0.25, - "learning_rate": 9.98867350094637e-05, - "loss": 1.1898, - "step": 2260 - }, - { - "epoch": 0.26, - "learning_rate": 9.988178606495132e-05, - "loss": 1.2028, - "step": 2280 - }, - { - "epoch": 0.26, - "learning_rate": 9.98767314294005e-05, - "loss": 1.1629, - "step": 2300 - }, - { - "epoch": 0.26, - "learning_rate": 9.987157111352117e-05, - "loss": 1.1963, - "step": 2320 - }, - { - "epoch": 0.26, - "learning_rate": 9.986630512824715e-05, - "loss": 1.1642, - "step": 2340 - }, - { - "epoch": 0.27, - "learning_rate": 9.986093348473617e-05, - "loss": 1.1624, - "step": 2360 - }, - { - "epoch": 0.27, - "learning_rate": 9.985545619436984e-05, - "loss": 1.1595, - "step": 2380 - }, - { - "epoch": 0.27, - "learning_rate": 9.984987326875359e-05, - "loss": 1.1985, - "step": 2400 - }, - { - "epoch": 0.27, - "learning_rate": 9.984418471971671e-05, - "loss": 1.1912, - "step": 2420 - }, - { - "epoch": 0.27, - "learning_rate": 9.983839055931226e-05, - "loss": 1.2146, - "step": 2440 - }, - { - "epoch": 0.28, - "learning_rate": 9.983249079981709e-05, - "loss": 1.183, - "step": 2460 - }, - { - "epoch": 0.28, - "learning_rate": 9.982648545373177e-05, - "loss": 1.1561, - "step": 2480 - }, - { - "epoch": 0.28, - "learning_rate": 9.982037453378063e-05, - "loss": 1.1517, - "step": 2500 - }, - { - "epoch": 0.28, - "learning_rate": 9.981415805291168e-05, - "loss": 1.1485, - "step": 2520 - }, - { - "epoch": 0.29, - "learning_rate": 9.980783602429656e-05, - "loss": 1.1721, - "step": 2540 - }, - { - "epoch": 0.29, - "learning_rate": 9.98014084613306e-05, - "loss": 1.1825, - "step": 2560 - }, - { - "epoch": 0.29, - "learning_rate": 9.979487537763269e-05, - "loss": 1.1338, - "step": 2580 - }, - { - "epoch": 0.29, - "learning_rate": 9.978823678704533e-05, - "loss": 1.193, - "step": 2600 - }, - { - "epoch": 0.29, - "learning_rate": 9.978149270363462e-05, - "loss": 1.1809, - "step": 2620 - }, - { - "epoch": 0.3, - "learning_rate": 9.977464314169005e-05, - "loss": 1.1425, - "step": 2640 - }, - { - "epoch": 0.3, - "learning_rate": 9.976768811572473e-05, - "loss": 1.1428, - "step": 2660 - }, - { - "epoch": 0.3, - "learning_rate": 9.976062764047515e-05, - "loss": 1.1511, - "step": 2680 - }, - { - "epoch": 0.3, - "learning_rate": 9.975346173090128e-05, - "loss": 1.161, - "step": 2700 - }, - { - "epoch": 0.31, - "learning_rate": 9.974619040218644e-05, - "loss": 1.168, - "step": 2720 - }, - { - "epoch": 0.31, - "learning_rate": 9.973881366973738e-05, - "loss": 1.1474, - "step": 2740 - }, - { - "epoch": 0.31, - "learning_rate": 9.973133154918413e-05, - "loss": 1.1605, - "step": 2760 - }, - { - "epoch": 0.31, - "learning_rate": 9.972374405638e-05, - "loss": 1.1591, - "step": 2780 - }, - { - "epoch": 0.31, - "learning_rate": 9.971605120740166e-05, - "loss": 1.1494, - "step": 2800 - }, - { - "epoch": 0.32, - "learning_rate": 9.970825301854889e-05, - "loss": 1.1596, - "step": 2820 - }, - { - "epoch": 0.32, - "learning_rate": 9.970034950634478e-05, - "loss": 1.1543, - "step": 2840 - }, - { - "epoch": 0.32, - "learning_rate": 9.96923406875355e-05, - "loss": 1.128, - "step": 2860 - }, - { - "epoch": 0.32, - "learning_rate": 9.968422657909037e-05, - "loss": 1.1567, - "step": 2880 - }, - { - "epoch": 0.33, - "learning_rate": 9.967600719820183e-05, - "loss": 1.1439, - "step": 2900 - }, - { - "epoch": 0.33, - "learning_rate": 9.966768256228536e-05, - "loss": 1.1156, - "step": 2920 - }, - { - "epoch": 0.33, - "learning_rate": 9.965925268897942e-05, - "loss": 1.1695, - "step": 2940 - }, - { - "epoch": 0.33, - "learning_rate": 9.96507175961455e-05, - "loss": 1.1286, - "step": 2960 - }, - { - "epoch": 0.33, - "learning_rate": 9.964207730186804e-05, - "loss": 1.1687, - "step": 2980 - }, - { - "epoch": 0.34, - "learning_rate": 9.963333182445429e-05, - "loss": 1.1401, - "step": 3000 - }, - { - "epoch": 0.34, - "learning_rate": 9.962448118243451e-05, - "loss": 1.1419, - "step": 3020 - }, - { - "epoch": 0.34, - "learning_rate": 9.961552539456163e-05, - "loss": 1.1224, - "step": 3040 - }, - { - "epoch": 0.34, - "learning_rate": 9.96064644798115e-05, - "loss": 1.1506, - "step": 3060 - }, - { - "epoch": 0.35, - "learning_rate": 9.959729845738264e-05, - "loss": 1.143, - "step": 3080 - }, - { - "epoch": 0.35, - "learning_rate": 9.958802734669633e-05, - "loss": 1.1684, - "step": 3100 - }, - { - "epoch": 0.35, - "learning_rate": 9.957865116739641e-05, - "loss": 1.1226, - "step": 3120 - }, - { - "epoch": 0.35, - "learning_rate": 9.956916993934947e-05, - "loss": 1.1404, - "step": 3140 - }, - { - "epoch": 0.36, - "learning_rate": 9.95595836826446e-05, - "loss": 1.1483, - "step": 3160 - }, - { - "epoch": 0.36, - "learning_rate": 9.954989241759346e-05, - "loss": 1.137, - "step": 3180 - }, - { - "epoch": 0.36, - "learning_rate": 9.954009616473019e-05, - "loss": 1.1019, - "step": 3200 - }, - { - "epoch": 0.36, - "learning_rate": 9.95301949448114e-05, - "loss": 1.1063, - "step": 3220 - }, - { - "epoch": 0.36, - "learning_rate": 9.952018877881606e-05, - "loss": 1.1487, - "step": 3240 - }, - { - "epoch": 0.37, - "learning_rate": 9.951007768794558e-05, - "loss": 1.128, - "step": 3260 - }, - { - "epoch": 0.37, - "learning_rate": 9.949986169362362e-05, - "loss": 1.1343, - "step": 3280 - }, - { - "epoch": 0.37, - "learning_rate": 9.948954081749616e-05, - "loss": 1.1342, - "step": 3300 - }, - { - "epoch": 0.37, - "learning_rate": 9.947911508143135e-05, - "loss": 1.1387, - "step": 3320 - }, - { - "epoch": 0.38, - "learning_rate": 9.946858450751958e-05, - "loss": 1.1217, - "step": 3340 - }, - { - "epoch": 0.38, - "learning_rate": 9.945794911807334e-05, - "loss": 1.1276, - "step": 3360 - }, - { - "epoch": 0.38, - "learning_rate": 9.944720893562722e-05, - "loss": 1.1567, - "step": 3380 - }, - { - "epoch": 0.38, - "learning_rate": 9.943636398293785e-05, - "loss": 1.1307, - "step": 3400 - }, - { - "epoch": 0.38, - "learning_rate": 9.942541428298384e-05, - "loss": 1.1264, - "step": 3420 - }, - { - "epoch": 0.39, - "learning_rate": 9.941435985896573e-05, - "loss": 1.1148, - "step": 3440 - }, - { - "epoch": 0.39, - "learning_rate": 9.940320073430598e-05, - "loss": 1.1417, - "step": 3460 - }, - { - "epoch": 0.39, - "learning_rate": 9.93919369326489e-05, - "loss": 1.1565, - "step": 3480 - }, - { - "epoch": 0.39, - "learning_rate": 9.938056847786053e-05, - "loss": 1.1517, - "step": 3500 - }, - { - "epoch": 0.4, - "learning_rate": 9.936909539402874e-05, - "loss": 1.1505, - "step": 3520 - }, - { - "epoch": 0.4, - "learning_rate": 9.935751770546302e-05, - "loss": 1.1276, - "step": 3540 - }, - { - "epoch": 0.4, - "learning_rate": 9.934583543669453e-05, - "loss": 1.1169, - "step": 3560 - }, - { - "epoch": 0.4, - "learning_rate": 9.933404861247603e-05, - "loss": 1.1185, - "step": 3580 - }, - { - "epoch": 0.4, - "learning_rate": 9.93221572577818e-05, - "loss": 1.1566, - "step": 3600 - }, - { - "epoch": 0.41, - "learning_rate": 9.931016139780758e-05, - "loss": 1.1288, - "step": 3620 - }, - { - "epoch": 0.41, - "learning_rate": 9.929806105797058e-05, - "loss": 1.137, - "step": 3640 - }, - { - "epoch": 0.41, - "learning_rate": 9.928585626390935e-05, - "loss": 1.1266, - "step": 3660 - }, - { - "epoch": 0.41, - "learning_rate": 9.927354704148382e-05, - "loss": 1.1436, - "step": 3680 - }, - { - "epoch": 0.42, - "learning_rate": 9.926113341677507e-05, - "loss": 1.0938, - "step": 3700 - }, - { - "epoch": 0.42, - "learning_rate": 9.924861541608553e-05, - "loss": 1.1159, - "step": 3720 - }, - { - "epoch": 0.42, - "learning_rate": 9.92359930659387e-05, - "loss": 1.0964, - "step": 3740 - }, - { - "epoch": 0.42, - "learning_rate": 9.922326639307917e-05, - "loss": 1.1329, - "step": 3760 - }, - { - "epoch": 0.42, - "learning_rate": 9.921043542447264e-05, - "loss": 1.1393, - "step": 3780 - }, - { - "epoch": 0.43, - "learning_rate": 9.919750018730571e-05, - "loss": 1.1251, - "step": 3800 - }, - { - "epoch": 0.43, - "learning_rate": 9.918446070898601e-05, - "loss": 1.1018, - "step": 3820 - }, - { - "epoch": 0.43, - "learning_rate": 9.917131701714192e-05, - "loss": 1.1376, - "step": 3840 - }, - { - "epoch": 0.43, - "learning_rate": 9.915806913962274e-05, - "loss": 1.1901, - "step": 3860 - }, - { - "epoch": 0.44, - "learning_rate": 9.914471710449845e-05, - "loss": 1.1236, - "step": 3880 - }, - { - "epoch": 0.44, - "learning_rate": 9.913126094005976e-05, - "loss": 1.1188, - "step": 3900 - }, - { - "epoch": 0.44, - "learning_rate": 9.911770067481798e-05, - "loss": 1.1358, - "step": 3920 - }, - { - "epoch": 0.44, - "learning_rate": 9.910403633750502e-05, - "loss": 1.14, - "step": 3940 - }, - { - "epoch": 0.44, - "learning_rate": 9.909026795707331e-05, - "loss": 1.1145, - "step": 3960 - }, - { - "epoch": 0.45, - "learning_rate": 9.907639556269566e-05, - "loss": 1.1162, - "step": 3980 - }, - { - "epoch": 0.45, - "learning_rate": 9.906241918376537e-05, - "loss": 1.1131, - "step": 4000 - }, - { - "epoch": 0.45, - "learning_rate": 9.904833884989602e-05, - "loss": 1.1174, - "step": 4020 - }, - { - "epoch": 0.45, - "learning_rate": 9.90341545909214e-05, - "loss": 1.1196, - "step": 4040 - }, - { - "epoch": 0.46, - "learning_rate": 9.901986643689559e-05, - "loss": 1.1053, - "step": 4060 - }, - { - "epoch": 0.46, - "learning_rate": 9.900547441809272e-05, - "loss": 1.1168, - "step": 4080 - }, - { - "epoch": 0.46, - "learning_rate": 9.899097856500707e-05, - "loss": 1.101, - "step": 4100 - }, - { - "epoch": 0.46, - "learning_rate": 9.897637890835289e-05, - "loss": 1.1039, - "step": 4120 - }, - { - "epoch": 0.47, - "learning_rate": 9.896167547906437e-05, - "loss": 1.1027, - "step": 4140 - }, - { - "epoch": 0.47, - "learning_rate": 9.894686830829558e-05, - "loss": 1.1028, - "step": 4160 - }, - { - "epoch": 0.47, - "learning_rate": 9.89319574274204e-05, - "loss": 1.0957, - "step": 4180 - }, - { - "epoch": 0.47, - "learning_rate": 9.891694286803246e-05, - "loss": 1.1397, - "step": 4200 - }, - { - "epoch": 0.47, - "learning_rate": 9.890182466194505e-05, - "loss": 1.109, - "step": 4220 - }, - { - "epoch": 0.48, - "learning_rate": 9.88866028411911e-05, - "loss": 1.1414, - "step": 4240 - }, - { - "epoch": 0.48, - "learning_rate": 9.887127743802304e-05, - "loss": 1.136, - "step": 4260 - }, - { - "epoch": 0.48, - "learning_rate": 9.885584848491285e-05, - "loss": 1.1458, - "step": 4280 - }, - { - "epoch": 0.48, - "learning_rate": 9.884031601455179e-05, - "loss": 1.1379, - "step": 4300 - }, - { - "epoch": 0.49, - "learning_rate": 9.88246800598506e-05, - "loss": 1.1083, - "step": 4320 - }, - { - "epoch": 0.49, - "learning_rate": 9.880894065393915e-05, - "loss": 1.1063, - "step": 4340 - }, - { - "epoch": 0.49, - "learning_rate": 9.879309783016663e-05, - "loss": 1.101, - "step": 4360 - }, - { - "epoch": 0.49, - "learning_rate": 9.877715162210123e-05, - "loss": 1.1023, - "step": 4380 - }, - { - "epoch": 0.49, - "learning_rate": 9.876110206353033e-05, - "loss": 1.1223, - "step": 4400 - }, - { - "epoch": 0.5, - "learning_rate": 9.874494918846017e-05, - "loss": 1.1348, - "step": 4420 - }, - { - "epoch": 0.5, - "learning_rate": 9.872869303111595e-05, - "loss": 1.0998, - "step": 4440 - }, - { - "epoch": 0.5, - "learning_rate": 9.871233362594175e-05, - "loss": 1.0983, - "step": 4460 - }, - { - "epoch": 0.5, - "learning_rate": 9.869587100760034e-05, - "loss": 1.1114, - "step": 4480 - }, - { - "epoch": 0.51, - "learning_rate": 9.86793052109732e-05, - "loss": 1.0765, - "step": 4500 - }, - { - "epoch": 0.51, - "learning_rate": 9.866263627116049e-05, - "loss": 1.089, - "step": 4520 - }, - { - "epoch": 0.51, - "learning_rate": 9.864586422348081e-05, - "loss": 1.101, - "step": 4540 - }, - { - "epoch": 0.51, - "learning_rate": 9.862898910347132e-05, - "loss": 1.1247, - "step": 4560 - }, - { - "epoch": 0.51, - "learning_rate": 9.861201094688752e-05, - "loss": 1.113, - "step": 4580 - }, - { - "epoch": 0.52, - "learning_rate": 9.859492978970325e-05, - "loss": 1.1354, - "step": 4600 - }, - { - "epoch": 0.52, - "learning_rate": 9.857774566811058e-05, - "loss": 1.095, - "step": 4620 - }, - { - "epoch": 0.52, - "learning_rate": 9.856045861851975e-05, - "loss": 1.1188, - "step": 4640 - }, - { - "epoch": 0.52, - "learning_rate": 9.854306867755906e-05, - "loss": 1.0938, - "step": 4660 - }, - { - "epoch": 0.53, - "learning_rate": 9.852557588207487e-05, - "loss": 1.1113, - "step": 4680 - }, - { - "epoch": 0.53, - "learning_rate": 9.850798026913145e-05, - "loss": 1.088, - "step": 4700 - }, - { - "epoch": 0.53, - "learning_rate": 9.849028187601091e-05, - "loss": 1.1256, - "step": 4720 - }, - { - "epoch": 0.53, - "learning_rate": 9.847248074021312e-05, - "loss": 1.0979, - "step": 4740 - }, - { - "epoch": 0.53, - "learning_rate": 9.845457689945567e-05, - "loss": 1.1227, - "step": 4760 - }, - { - "epoch": 0.54, - "learning_rate": 9.84365703916738e-05, - "loss": 1.1265, - "step": 4780 - }, - { - "epoch": 0.54, - "learning_rate": 9.841846125502021e-05, - "loss": 1.0997, - "step": 4800 - }, - { - "epoch": 0.54, - "learning_rate": 9.840024952786508e-05, - "loss": 1.0951, - "step": 4820 - }, - { - "epoch": 0.54, - "learning_rate": 9.838193524879599e-05, - "loss": 1.1091, - "step": 4840 - }, - { - "epoch": 0.55, - "learning_rate": 9.836351845661777e-05, - "loss": 1.1148, - "step": 4860 - }, - { - "epoch": 0.55, - "learning_rate": 9.834499919035249e-05, - "loss": 1.1211, - "step": 4880 - }, - { - "epoch": 0.55, - "learning_rate": 9.832637748923934e-05, - "loss": 1.1033, - "step": 4900 - }, - { - "epoch": 0.55, - "learning_rate": 9.830765339273454e-05, - "loss": 1.0925, - "step": 4920 - }, - { - "epoch": 0.56, - "learning_rate": 9.828882694051124e-05, - "loss": 1.1061, - "step": 4940 - }, - { - "epoch": 0.56, - "learning_rate": 9.826989817245953e-05, - "loss": 1.1083, - "step": 4960 - }, - { - "epoch": 0.56, - "learning_rate": 9.825086712868625e-05, - "loss": 1.0864, - "step": 4980 - }, - { - "epoch": 0.56, - "learning_rate": 9.823173384951496e-05, - "loss": 1.1202, - "step": 5000 - }, - { - "epoch": 0.56, - "learning_rate": 9.821249837548582e-05, - "loss": 1.101, - "step": 5020 - }, - { - "epoch": 0.57, - "learning_rate": 9.819316074735554e-05, - "loss": 1.1191, - "step": 5040 - }, - { - "epoch": 0.57, - "learning_rate": 9.817372100609726e-05, - "loss": 1.0535, - "step": 5060 - }, - { - "epoch": 0.57, - "learning_rate": 9.81541791929005e-05, - "loss": 1.1082, - "step": 5080 - }, - { - "epoch": 0.57, - "learning_rate": 9.813453534917105e-05, - "loss": 1.1366, - "step": 5100 - }, - { - "epoch": 0.58, - "learning_rate": 9.811478951653088e-05, - "loss": 1.072, - "step": 5120 - }, - { - "epoch": 0.58, - "learning_rate": 9.809494173681804e-05, - "loss": 1.0954, - "step": 5140 - }, - { - "epoch": 0.58, - "learning_rate": 9.807499205208663e-05, - "loss": 1.1014, - "step": 5160 - }, - { - "epoch": 0.58, - "learning_rate": 9.805494050460666e-05, - "loss": 1.0966, - "step": 5180 - }, - { - "epoch": 0.58, - "learning_rate": 9.803478713686391e-05, - "loss": 1.1035, - "step": 5200 - }, - { - "epoch": 0.59, - "learning_rate": 9.801453199155996e-05, - "loss": 1.1061, - "step": 5220 - }, - { - "epoch": 0.59, - "learning_rate": 9.799417511161206e-05, - "loss": 1.0943, - "step": 5240 - }, - { - "epoch": 0.59, - "learning_rate": 9.797371654015296e-05, - "loss": 1.0943, - "step": 5260 - }, - { - "epoch": 0.59, - "learning_rate": 9.795315632053088e-05, - "loss": 1.0939, - "step": 5280 - }, - { - "epoch": 0.6, - "learning_rate": 9.793249449630946e-05, - "loss": 1.083, - "step": 5300 - }, - { - "epoch": 0.6, - "learning_rate": 9.791173111126759e-05, - "loss": 1.0958, - "step": 5320 - }, - { - "epoch": 0.6, - "learning_rate": 9.789086620939936e-05, - "loss": 1.0671, - "step": 5340 - }, - { - "epoch": 0.6, - "learning_rate": 9.786989983491397e-05, - "loss": 1.0766, - "step": 5360 - }, - { - "epoch": 0.6, - "learning_rate": 9.784883203223558e-05, - "loss": 1.1059, - "step": 5380 - }, - { - "epoch": 0.61, - "learning_rate": 9.782766284600332e-05, - "loss": 1.1136, - "step": 5400 - }, - { - "epoch": 0.61, - "learning_rate": 9.780639232107108e-05, - "loss": 1.1049, - "step": 5420 - }, - { - "epoch": 0.61, - "learning_rate": 9.778502050250749e-05, - "loss": 1.0934, - "step": 5440 - }, - { - "epoch": 0.61, - "learning_rate": 9.776354743559583e-05, - "loss": 1.0905, - "step": 5460 - }, - { - "epoch": 0.62, - "learning_rate": 9.774197316583387e-05, - "loss": 1.0722, - "step": 5480 - }, - { - "epoch": 0.62, - "learning_rate": 9.77202977389338e-05, - "loss": 1.0761, - "step": 5500 - }, - { - "epoch": 0.62, - "learning_rate": 9.769852120082222e-05, - "loss": 1.0705, - "step": 5520 - }, - { - "epoch": 0.62, - "learning_rate": 9.767664359763991e-05, - "loss": 1.0709, - "step": 5540 - }, - { - "epoch": 0.62, - "learning_rate": 9.765466497574175e-05, - "loss": 1.0927, - "step": 5560 - }, - { - "epoch": 0.63, - "learning_rate": 9.763258538169675e-05, - "loss": 1.0942, - "step": 5580 - }, - { - "epoch": 0.63, - "learning_rate": 9.761040486228783e-05, - "loss": 1.1113, - "step": 5600 - }, - { - "epoch": 0.63, - "learning_rate": 9.758812346451171e-05, - "loss": 1.0902, - "step": 5620 - }, - { - "epoch": 0.63, - "learning_rate": 9.756574123557893e-05, - "loss": 1.1216, - "step": 5640 - }, - { - "epoch": 0.64, - "learning_rate": 9.754325822291362e-05, - "loss": 1.1029, - "step": 5660 - }, - { - "epoch": 0.64, - "learning_rate": 9.752067447415342e-05, - "loss": 1.1237, - "step": 5680 - }, - { - "epoch": 0.64, - "learning_rate": 9.749799003714954e-05, - "loss": 1.0988, - "step": 5700 - }, - { - "epoch": 0.64, - "learning_rate": 9.747520495996641e-05, - "loss": 1.087, - "step": 5720 - }, - { - "epoch": 0.64, - "learning_rate": 9.745231929088174e-05, - "loss": 1.0668, - "step": 5740 - }, - { - "epoch": 0.65, - "learning_rate": 9.74293330783864e-05, - "loss": 1.0756, - "step": 5760 - }, - { - "epoch": 0.65, - "learning_rate": 9.740624637118425e-05, - "loss": 1.1091, - "step": 5780 - }, - { - "epoch": 0.65, - "learning_rate": 9.73830592181921e-05, - "loss": 1.0985, - "step": 5800 - }, - { - "epoch": 0.65, - "learning_rate": 9.735977166853962e-05, - "loss": 1.0667, - "step": 5820 - }, - { - "epoch": 0.66, - "learning_rate": 9.733638377156915e-05, - "loss": 1.0753, - "step": 5840 - }, - { - "epoch": 0.66, - "learning_rate": 9.731289557683567e-05, - "loss": 1.0869, - "step": 5860 - }, - { - "epoch": 0.66, - "learning_rate": 9.72893071341067e-05, - "loss": 1.0944, - "step": 5880 - }, - { - "epoch": 0.66, - "learning_rate": 9.726561849336216e-05, - "loss": 1.0817, - "step": 5900 - }, - { - "epoch": 0.67, - "learning_rate": 9.724182970479422e-05, - "loss": 1.1044, - "step": 5920 - }, - { - "epoch": 0.67, - "learning_rate": 9.72179408188073e-05, - "loss": 1.1296, - "step": 5940 - }, - { - "epoch": 0.67, - "learning_rate": 9.71939518860179e-05, - "loss": 1.0678, - "step": 5960 - }, - { - "epoch": 0.67, - "learning_rate": 9.71698629572545e-05, - "loss": 1.0837, - "step": 5980 - }, - { - "epoch": 0.67, - "learning_rate": 9.714567408355744e-05, - "loss": 1.0911, - "step": 6000 - }, - { - "epoch": 0.68, - "learning_rate": 9.712138531617883e-05, - "loss": 1.0979, - "step": 6020 - }, - { - "epoch": 0.68, - "learning_rate": 9.709699670658248e-05, - "loss": 1.0742, - "step": 6040 - }, - { - "epoch": 0.68, - "learning_rate": 9.707250830644367e-05, - "loss": 1.0689, - "step": 6060 - }, - { - "epoch": 0.68, - "learning_rate": 9.704792016764922e-05, - "loss": 1.1154, - "step": 6080 - }, - { - "epoch": 0.69, - "learning_rate": 9.702323234229717e-05, - "loss": 1.089, - "step": 6100 - }, - { - "epoch": 0.69, - "learning_rate": 9.699844488269687e-05, - "loss": 1.0787, - "step": 6120 - }, - { - "epoch": 0.69, - "learning_rate": 9.69735578413687e-05, - "loss": 1.0688, - "step": 6140 - }, - { - "epoch": 0.69, - "learning_rate": 9.69485712710441e-05, - "loss": 1.0665, - "step": 6160 - }, - { - "epoch": 0.69, - "learning_rate": 9.692348522466537e-05, - "loss": 1.0686, - "step": 6180 - }, - { - "epoch": 0.7, - "learning_rate": 9.689829975538559e-05, - "loss": 1.0872, - "step": 6200 - }, - { - "epoch": 0.7, - "learning_rate": 9.687301491656849e-05, - "loss": 1.0818, - "step": 6220 - }, - { - "epoch": 0.7, - "learning_rate": 9.684763076178836e-05, - "loss": 1.0844, - "step": 6240 - }, - { - "epoch": 0.7, - "learning_rate": 9.682214734482989e-05, - "loss": 1.0977, - "step": 6260 - }, - { - "epoch": 0.71, - "learning_rate": 9.679656471968814e-05, - "loss": 1.072, - "step": 6280 - }, - { - "epoch": 0.71, - "learning_rate": 9.677088294056833e-05, - "loss": 1.0869, - "step": 6300 - }, - { - "epoch": 0.71, - "learning_rate": 9.674510206188584e-05, - "loss": 1.0839, - "step": 6320 - }, - { - "epoch": 0.71, - "learning_rate": 9.671922213826589e-05, - "loss": 1.077, - "step": 6340 - }, - { - "epoch": 0.71, - "learning_rate": 9.669324322454373e-05, - "loss": 1.0974, - "step": 6360 - }, - { - "epoch": 0.72, - "learning_rate": 9.666716537576422e-05, - "loss": 1.1057, - "step": 6380 - }, - { - "epoch": 0.72, - "learning_rate": 9.664098864718191e-05, - "loss": 1.0543, - "step": 6400 - }, - { - "epoch": 0.72, - "learning_rate": 9.661471309426085e-05, - "loss": 1.0699, - "step": 6420 - }, - { - "epoch": 0.72, - "learning_rate": 9.658833877267448e-05, - "loss": 1.091, - "step": 6440 - }, - { - "epoch": 0.73, - "learning_rate": 9.65618657383055e-05, - "loss": 1.0425, - "step": 6460 - }, - { - "epoch": 0.73, - "learning_rate": 9.653529404724578e-05, - "loss": 1.0519, - "step": 6480 - }, - { - "epoch": 0.73, - "learning_rate": 9.650862375579622e-05, - "loss": 1.1099, - "step": 6500 - }, - { - "epoch": 0.73, - "learning_rate": 9.648185492046663e-05, - "loss": 1.1131, - "step": 6520 - }, - { - "epoch": 0.73, - "learning_rate": 9.645498759797566e-05, - "loss": 1.1014, - "step": 6540 - }, - { - "epoch": 0.74, - "learning_rate": 9.642802184525058e-05, - "loss": 1.0645, - "step": 6560 - }, - { - "epoch": 0.74, - "learning_rate": 9.640095771942725e-05, - "loss": 1.0989, - "step": 6580 - }, - { - "epoch": 0.74, - "learning_rate": 9.637379527784997e-05, - "loss": 1.1347, - "step": 6600 - }, - { - "epoch": 0.74, - "learning_rate": 9.634653457807135e-05, - "loss": 1.1031, - "step": 6620 - }, - { - "epoch": 0.75, - "learning_rate": 9.631917567785213e-05, - "loss": 1.1107, - "step": 6640 - }, - { - "epoch": 0.75, - "learning_rate": 9.629171863516126e-05, - "loss": 1.0845, - "step": 6660 - }, - { - "epoch": 0.75, - "learning_rate": 9.626416350817549e-05, - "loss": 1.0804, - "step": 6680 - }, - { - "epoch": 0.75, - "learning_rate": 9.623651035527947e-05, - "loss": 1.081, - "step": 6700 - }, - { - "epoch": 0.76, - "learning_rate": 9.620875923506556e-05, - "loss": 1.1133, - "step": 6720 - }, - { - "epoch": 0.76, - "learning_rate": 9.618091020633365e-05, - "loss": 1.0605, - "step": 6740 - }, - { - "epoch": 0.76, - "learning_rate": 9.615296332809112e-05, - "loss": 1.034, - "step": 6760 - }, - { - "epoch": 0.76, - "learning_rate": 9.612491865955265e-05, - "loss": 1.0704, - "step": 6780 - }, - { - "epoch": 0.76, - "learning_rate": 9.609677626014015e-05, - "loss": 1.0791, - "step": 6800 - }, - { - "epoch": 0.77, - "learning_rate": 9.606853618948256e-05, - "loss": 1.0931, - "step": 6820 - }, - { - "epoch": 0.77, - "learning_rate": 9.604019850741582e-05, - "loss": 1.0579, - "step": 6840 - }, - { - "epoch": 0.77, - "learning_rate": 9.601176327398267e-05, - "loss": 1.0252, - "step": 6860 - }, - { - "epoch": 0.77, - "learning_rate": 9.598323054943252e-05, - "loss": 1.0754, - "step": 6880 - }, - { - "epoch": 0.78, - "learning_rate": 9.59546003942214e-05, - "loss": 1.0665, - "step": 6900 - }, - { - "epoch": 0.78, - "learning_rate": 9.592587286901172e-05, - "loss": 1.1004, - "step": 6920 - }, - { - "epoch": 0.78, - "learning_rate": 9.589704803467225e-05, - "loss": 1.1064, - "step": 6940 - }, - { - "epoch": 0.78, - "learning_rate": 9.586812595227792e-05, - "loss": 1.0677, - "step": 6960 - }, - { - "epoch": 0.78, - "learning_rate": 9.583910668310971e-05, - "loss": 1.0727, - "step": 6980 - }, - { - "epoch": 0.79, - "learning_rate": 9.580999028865452e-05, - "loss": 1.0799, - "step": 7000 - }, - { - "epoch": 0.79, - "learning_rate": 9.578077683060507e-05, - "loss": 1.0665, - "step": 7020 - }, - { - "epoch": 0.79, - "learning_rate": 9.57514663708597e-05, - "loss": 1.0729, - "step": 7040 - }, - { - "epoch": 0.79, - "learning_rate": 9.572205897152229e-05, - "loss": 1.0472, - "step": 7060 - }, - { - "epoch": 0.8, - "learning_rate": 9.569255469490214e-05, - "loss": 1.0633, - "step": 7080 - }, - { - "epoch": 0.8, - "learning_rate": 9.566295360351383e-05, - "loss": 1.0578, - "step": 7100 - }, - { - "epoch": 0.8, - "learning_rate": 9.563325576007701e-05, - "loss": 1.0679, - "step": 7120 - }, - { - "epoch": 0.8, - "learning_rate": 9.560346122751638e-05, - "loss": 1.0878, - "step": 7140 - }, - { - "epoch": 0.8, - "learning_rate": 9.557357006896152e-05, - "loss": 1.0521, - "step": 7160 - }, - { - "epoch": 0.81, - "learning_rate": 9.554358234774669e-05, - "loss": 1.0543, - "step": 7180 - }, - { - "epoch": 0.81, - "learning_rate": 9.55134981274108e-05, - "loss": 1.0781, - "step": 7200 - }, - { - "epoch": 0.81, - "learning_rate": 9.548331747169719e-05, - "loss": 1.0668, - "step": 7220 - }, - { - "epoch": 0.81, - "learning_rate": 9.545304044455357e-05, - "loss": 1.0908, - "step": 7240 - }, - { - "epoch": 0.82, - "learning_rate": 9.542266711013182e-05, - "loss": 1.0535, - "step": 7260 - }, - { - "epoch": 0.82, - "learning_rate": 9.539219753278785e-05, - "loss": 1.0587, - "step": 7280 - }, - { - "epoch": 0.82, - "learning_rate": 9.536163177708155e-05, - "loss": 1.0406, - "step": 7300 - }, - { - "epoch": 0.82, - "learning_rate": 9.533096990777657e-05, - "loss": 1.0645, - "step": 7320 - }, - { - "epoch": 0.82, - "learning_rate": 9.530021198984019e-05, - "loss": 1.0425, - "step": 7340 - }, - { - "epoch": 0.83, - "learning_rate": 9.526935808844324e-05, - "loss": 1.0564, - "step": 7360 - }, - { - "epoch": 0.83, - "learning_rate": 9.523840826895988e-05, - "loss": 1.0811, - "step": 7380 - }, - { - "epoch": 0.83, - "learning_rate": 9.520736259696753e-05, - "loss": 1.0727, - "step": 7400 - }, - { - "epoch": 0.83, - "learning_rate": 9.51762211382467e-05, - "loss": 1.0687, - "step": 7420 - }, - { - "epoch": 0.84, - "learning_rate": 9.514498395878086e-05, - "loss": 1.0902, - "step": 7440 - }, - { - "epoch": 0.84, - "learning_rate": 9.51136511247563e-05, - "loss": 1.0613, - "step": 7460 - }, - { - "epoch": 0.84, - "learning_rate": 9.508222270256195e-05, - "loss": 1.0809, - "step": 7480 - }, - { - "epoch": 0.84, - "learning_rate": 9.505069875878934e-05, - "loss": 1.0682, - "step": 7500 - }, - { - "epoch": 0.84, - "learning_rate": 9.501907936023231e-05, - "loss": 1.07, - "step": 7520 - }, - { - "epoch": 0.85, - "learning_rate": 9.498736457388703e-05, - "loss": 1.0797, - "step": 7540 - }, - { - "epoch": 0.85, - "learning_rate": 9.495555446695175e-05, - "loss": 1.0468, - "step": 7560 - }, - { - "epoch": 0.85, - "learning_rate": 9.492364910682668e-05, - "loss": 1.0903, - "step": 7580 - }, - { - "epoch": 0.85, - "learning_rate": 9.489164856111387e-05, - "loss": 1.0929, - "step": 7600 - }, - { - "epoch": 0.86, - "learning_rate": 9.485955289761703e-05, - "loss": 1.0669, - "step": 7620 - }, - { - "epoch": 0.86, - "learning_rate": 9.482736218434143e-05, - "loss": 1.0938, - "step": 7640 - }, - { - "epoch": 0.86, - "learning_rate": 9.479507648949372e-05, - "loss": 1.0582, - "step": 7660 - }, - { - "epoch": 0.86, - "learning_rate": 9.476269588148186e-05, - "loss": 1.0793, - "step": 7680 - }, - { - "epoch": 0.87, - "learning_rate": 9.473022042891477e-05, - "loss": 1.0696, - "step": 7700 - }, - { - "epoch": 0.87, - "learning_rate": 9.469765020060251e-05, - "loss": 1.0599, - "step": 7720 - }, - { - "epoch": 0.87, - "learning_rate": 9.46649852655558e-05, - "loss": 1.0667, - "step": 7740 - }, - { - "epoch": 0.87, - "learning_rate": 9.463222569298615e-05, - "loss": 1.0126, - "step": 7760 - }, - { - "epoch": 0.87, - "learning_rate": 9.459937155230549e-05, - "loss": 1.0694, - "step": 7780 - }, - { - "epoch": 0.88, - "learning_rate": 9.456642291312618e-05, - "loss": 1.0566, - "step": 7800 - }, - { - "epoch": 0.88, - "learning_rate": 9.45333798452608e-05, - "loss": 1.0664, - "step": 7820 - }, - { - "epoch": 0.88, - "learning_rate": 9.450024241872201e-05, - "loss": 1.0408, - "step": 7840 - }, - { - "epoch": 0.88, - "learning_rate": 9.446701070372237e-05, - "loss": 1.0734, - "step": 7860 - }, - { - "epoch": 0.89, - "learning_rate": 9.443368477067427e-05, - "loss": 1.0604, - "step": 7880 - }, - { - "epoch": 0.89, - "learning_rate": 9.440026469018968e-05, - "loss": 1.0746, - "step": 7900 - }, - { - "epoch": 0.89, - "learning_rate": 9.43667505330801e-05, - "loss": 1.0358, - "step": 7920 - }, - { - "epoch": 0.89, - "learning_rate": 9.433314237035631e-05, - "loss": 1.0631, - "step": 7940 - }, - { - "epoch": 0.89, - "learning_rate": 9.429944027322834e-05, - "loss": 1.0296, - "step": 7960 - }, - { - "epoch": 0.9, - "learning_rate": 9.426564431310521e-05, - "loss": 1.0412, - "step": 7980 - }, - { - "epoch": 0.9, - "learning_rate": 9.42317545615948e-05, - "loss": 1.0554, - "step": 8000 - }, - { - "epoch": 0.9, - "learning_rate": 9.419777109050376e-05, - "loss": 1.0978, - "step": 8020 - }, - { - "epoch": 0.9, - "learning_rate": 9.416369397183728e-05, - "loss": 1.0215, - "step": 8040 - }, - { - "epoch": 0.91, - "learning_rate": 9.4129523277799e-05, - "loss": 1.0424, - "step": 8060 - }, - { - "epoch": 0.91, - "learning_rate": 9.409525908079082e-05, - "loss": 1.0453, - "step": 8080 - }, - { - "epoch": 0.91, - "learning_rate": 9.406090145341277e-05, - "loss": 1.0588, - "step": 8100 - }, - { - "epoch": 0.91, - "learning_rate": 9.402645046846281e-05, - "loss": 1.0439, - "step": 8120 - }, - { - "epoch": 0.91, - "learning_rate": 9.399190619893676e-05, - "loss": 1.0501, - "step": 8140 - }, - { - "epoch": 0.92, - "learning_rate": 9.395726871802804e-05, - "loss": 1.0565, - "step": 8160 - }, - { - "epoch": 0.92, - "learning_rate": 9.392253809912758e-05, - "loss": 1.0456, - "step": 8180 - }, - { - "epoch": 0.92, - "learning_rate": 9.388771441582369e-05, - "loss": 1.0547, - "step": 8200 - }, - { - "epoch": 0.92, - "learning_rate": 9.385279774190184e-05, - "loss": 1.0344, - "step": 8220 - }, - { - "epoch": 0.93, - "learning_rate": 9.381778815134455e-05, - "loss": 1.0286, - "step": 8240 - }, - { - "epoch": 0.93, - "learning_rate": 9.378268571833116e-05, - "loss": 1.1033, - "step": 8260 - }, - { - "epoch": 0.93, - "learning_rate": 9.374749051723781e-05, - "loss": 1.0667, - "step": 8280 - }, - { - "epoch": 0.93, - "learning_rate": 9.371220262263713e-05, - "loss": 1.0416, - "step": 8300 - }, - { - "epoch": 0.93, - "learning_rate": 9.36768221092982e-05, - "loss": 1.0674, - "step": 8320 - }, - { - "epoch": 0.94, - "learning_rate": 9.364134905218632e-05, - "loss": 1.0305, - "step": 8340 - }, - { - "epoch": 0.94, - "learning_rate": 9.360578352646285e-05, - "loss": 1.0604, - "step": 8360 - }, - { - "epoch": 0.94, - "learning_rate": 9.357012560748513e-05, - "loss": 1.061, - "step": 8380 - }, - { - "epoch": 0.94, - "learning_rate": 9.353437537080625e-05, - "loss": 1.0678, - "step": 8400 - }, - { - "epoch": 0.95, - "learning_rate": 9.349853289217485e-05, - "loss": 1.0767, - "step": 8420 - }, - { - "epoch": 0.95, - "learning_rate": 9.34625982475351e-05, - "loss": 1.0562, - "step": 8440 - }, - { - "epoch": 0.95, - "learning_rate": 9.342657151302637e-05, - "loss": 1.0301, - "step": 8460 - }, - { - "epoch": 0.95, - "learning_rate": 9.339045276498325e-05, - "loss": 1.063, - "step": 8480 - }, - { - "epoch": 0.96, - "learning_rate": 9.33542420799352e-05, - "loss": 1.0157, - "step": 8500 - }, - { - "epoch": 0.96, - "learning_rate": 9.331793953460653e-05, - "loss": 1.0564, - "step": 8520 - }, - { - "epoch": 0.96, - "learning_rate": 9.328154520591614e-05, - "loss": 1.0817, - "step": 8540 - }, - { - "epoch": 0.96, - "learning_rate": 9.324505917097749e-05, - "loss": 1.0453, - "step": 8560 - }, - { - "epoch": 0.96, - "learning_rate": 9.320848150709826e-05, - "loss": 1.0442, - "step": 8580 - }, - { - "epoch": 0.97, - "learning_rate": 9.317181229178031e-05, - "loss": 1.0379, - "step": 8600 - }, - { - "epoch": 0.97, - "learning_rate": 9.313505160271952e-05, - "loss": 1.0686, - "step": 8620 - }, - { - "epoch": 0.97, - "learning_rate": 9.30981995178055e-05, - "loss": 1.0417, - "step": 8640 - }, - { - "epoch": 0.97, - "learning_rate": 9.306125611512159e-05, - "loss": 1.0569, - "step": 8660 - }, - { - "epoch": 0.98, - "learning_rate": 9.302422147294458e-05, - "loss": 1.0646, - "step": 8680 - }, - { - "epoch": 0.98, - "learning_rate": 9.298709566974462e-05, - "loss": 1.0439, - "step": 8700 - }, - { - "epoch": 0.98, - "learning_rate": 9.294987878418495e-05, - "loss": 1.0529, - "step": 8720 - }, - { - "epoch": 0.98, - "learning_rate": 9.291257089512185e-05, - "loss": 1.0369, - "step": 8740 - }, - { - "epoch": 0.98, - "learning_rate": 9.287517208160439e-05, - "loss": 1.0509, - "step": 8760 - }, - { - "epoch": 0.99, - "learning_rate": 9.283768242287433e-05, - "loss": 1.0825, - "step": 8780 - }, - { - "epoch": 0.99, - "learning_rate": 9.280010199836588e-05, - "loss": 1.0583, - "step": 8800 - }, - { - "epoch": 0.99, - "learning_rate": 9.276243088770559e-05, - "loss": 1.0528, - "step": 8820 - }, - { - "epoch": 0.99, - "learning_rate": 9.272466917071216e-05, - "loss": 1.0307, - "step": 8840 - }, - { - "epoch": 1.0, - "learning_rate": 9.268681692739623e-05, - "loss": 1.0538, - "step": 8860 - }, - { - "epoch": 1.0, - "learning_rate": 9.264887423796029e-05, - "loss": 1.0459, - "step": 8880 - }, - { - "epoch": 1.0, - "learning_rate": 9.261084118279847e-05, - "loss": 1.052, - "step": 8900 - }, - { - "epoch": 1.0, - "learning_rate": 9.257271784249635e-05, - "loss": 0.9985, - "step": 8920 - }, - { - "epoch": 1.0, - "learning_rate": 9.253450429783081e-05, - "loss": 1.0312, - "step": 8940 - }, - { - "epoch": 1.01, - "learning_rate": 9.249620062976988e-05, - "loss": 1.0433, - "step": 8960 - }, - { - "epoch": 1.01, - "learning_rate": 9.245780691947252e-05, - "loss": 1.0061, - "step": 8980 - }, - { - "epoch": 1.01, - "learning_rate": 9.24193232482885e-05, - "loss": 1.0237, - "step": 9000 - }, - { - "epoch": 1.01, - "learning_rate": 9.238074969775818e-05, - "loss": 1.0311, - "step": 9020 - }, - { - "epoch": 1.02, - "learning_rate": 9.234208634961236e-05, - "loss": 1.0467, - "step": 9040 - }, - { - "epoch": 1.02, - "learning_rate": 9.230333328577212e-05, - "loss": 1.0217, - "step": 9060 - }, - { - "epoch": 1.02, - "learning_rate": 9.226449058834863e-05, - "loss": 1.024, - "step": 9080 - }, - { - "epoch": 1.02, - "learning_rate": 9.222555833964296e-05, - "loss": 1.0373, - "step": 9100 - }, - { - "epoch": 1.02, - "learning_rate": 9.218653662214593e-05, - "loss": 1.0248, - "step": 9120 - }, - { - "epoch": 1.03, - "learning_rate": 9.214742551853798e-05, - "loss": 1.0597, - "step": 9140 - }, - { - "epoch": 1.03, - "learning_rate": 9.210822511168884e-05, - "loss": 1.0138, - "step": 9160 - }, - { - "epoch": 1.03, - "learning_rate": 9.206893548465758e-05, - "loss": 1.0406, - "step": 9180 - }, - { - "epoch": 1.03, - "learning_rate": 9.20295567206922e-05, - "loss": 1.0399, - "step": 9200 - }, - { - "epoch": 1.04, - "learning_rate": 9.199008890322963e-05, - "loss": 1.0282, - "step": 9220 - }, - { - "epoch": 1.04, - "learning_rate": 9.19505321158955e-05, - "loss": 1.0156, - "step": 9240 - }, - { - "epoch": 1.04, - "learning_rate": 9.191088644250389e-05, - "loss": 1.0146, - "step": 9260 - }, - { - "epoch": 1.04, - "learning_rate": 9.187115196705731e-05, - "loss": 0.9898, - "step": 9280 - }, - { - "epoch": 1.04, - "learning_rate": 9.183132877374631e-05, - "loss": 1.0027, - "step": 9300 - }, - { - "epoch": 1.05, - "learning_rate": 9.17914169469495e-05, - "loss": 1.0047, - "step": 9320 - }, - { - "epoch": 1.05, - "learning_rate": 9.17514165712333e-05, - "loss": 1.0628, - "step": 9340 - }, - { - "epoch": 1.05, - "learning_rate": 9.171132773135165e-05, - "loss": 1.041, - "step": 9360 - }, - { - "epoch": 1.05, - "learning_rate": 9.167115051224606e-05, - "loss": 1.0387, - "step": 9380 - }, - { - "epoch": 1.06, - "learning_rate": 9.16308849990452e-05, - "loss": 1.0521, - "step": 9400 - }, - { - "epoch": 1.06, - "learning_rate": 9.159053127706487e-05, - "loss": 0.9995, - "step": 9420 - }, - { - "epoch": 1.06, - "learning_rate": 9.155008943180776e-05, - "loss": 1.0068, - "step": 9440 - }, - { - "epoch": 1.06, - "learning_rate": 9.150955954896327e-05, - "loss": 1.0396, - "step": 9460 - }, - { - "epoch": 1.07, - "learning_rate": 9.146894171440735e-05, - "loss": 0.9964, - "step": 9480 - }, - { - "epoch": 1.07, - "learning_rate": 9.14282360142023e-05, - "loss": 0.9995, - "step": 9500 - }, - { - "epoch": 1.07, - "learning_rate": 9.138744253459658e-05, - "loss": 1.0396, - "step": 9520 - }, - { - "epoch": 1.07, - "learning_rate": 9.134656136202466e-05, - "loss": 1.0167, - "step": 9540 - }, - { - "epoch": 1.07, - "learning_rate": 9.130559258310679e-05, - "loss": 1.0319, - "step": 9560 - }, - { - "epoch": 1.08, - "learning_rate": 9.126453628464888e-05, - "loss": 1.0222, - "step": 9580 - }, - { - "epoch": 1.08, - "learning_rate": 9.122339255364224e-05, - "loss": 0.9881, - "step": 9600 - }, - { - "epoch": 1.08, - "learning_rate": 9.118216147726347e-05, - "loss": 1.0193, - "step": 9620 - }, - { - "epoch": 1.08, - "learning_rate": 9.11408431428742e-05, - "loss": 1.0327, - "step": 9640 - }, - { - "epoch": 1.09, - "learning_rate": 9.109943763802097e-05, - "loss": 1.0149, - "step": 9660 - }, - { - "epoch": 1.09, - "learning_rate": 9.105794505043505e-05, - "loss": 0.9916, - "step": 9680 - }, - { - "epoch": 1.09, - "learning_rate": 9.101636546803218e-05, - "loss": 1.0443, - "step": 9700 - }, - { - "epoch": 1.09, - "learning_rate": 9.09746989789124e-05, - "loss": 1.0485, - "step": 9720 - }, - { - "epoch": 1.09, - "learning_rate": 9.093294567135998e-05, - "loss": 1.0536, - "step": 9740 - }, - { - "epoch": 1.1, - "learning_rate": 9.089110563384304e-05, - "loss": 1.0167, - "step": 9760 - }, - { - "epoch": 1.1, - "learning_rate": 9.084917895501357e-05, - "loss": 1.0192, - "step": 9780 - }, - { - "epoch": 1.1, - "learning_rate": 9.080716572370704e-05, - "loss": 1.0163, - "step": 9800 - }, - { - "epoch": 1.1, - "learning_rate": 9.07650660289424e-05, - "loss": 1.0041, - "step": 9820 - }, - { - "epoch": 1.11, - "learning_rate": 9.072287995992172e-05, - "loss": 1.0364, - "step": 9840 - }, - { - "epoch": 1.11, - "learning_rate": 9.068060760603014e-05, - "loss": 1.0567, - "step": 9860 - }, - { - "epoch": 1.11, - "learning_rate": 9.063824905683562e-05, - "loss": 1.0234, - "step": 9880 - }, - { - "epoch": 1.11, - "learning_rate": 9.059580440208869e-05, - "loss": 1.0011, - "step": 9900 - }, - { - "epoch": 1.11, - "learning_rate": 9.05532737317224e-05, - "loss": 1.0375, - "step": 9920 - }, - { - "epoch": 1.12, - "learning_rate": 9.051065713585203e-05, - "loss": 1.0142, - "step": 9940 - }, - { - "epoch": 1.12, - "learning_rate": 9.04679547047749e-05, - "loss": 1.0143, - "step": 9960 - }, - { - "epoch": 1.12, - "learning_rate": 9.04251665289702e-05, - "loss": 1.0232, - "step": 9980 - }, - { - "epoch": 1.12, - "learning_rate": 9.038229269909883e-05, - "loss": 1.0121, - "step": 10000 - }, - { - "epoch": 1.13, - "learning_rate": 9.033933330600316e-05, - "loss": 1.0262, - "step": 10020 - }, - { - "epoch": 1.13, - "learning_rate": 9.029628844070686e-05, - "loss": 1.0398, - "step": 10040 - }, - { - "epoch": 1.13, - "learning_rate": 9.025315819441467e-05, - "loss": 1.0101, - "step": 10060 - }, - { - "epoch": 1.13, - "learning_rate": 9.020994265851226e-05, - "loss": 1.0367, - "step": 10080 - }, - { - "epoch": 1.13, - "learning_rate": 9.016664192456604e-05, - "loss": 0.9912, - "step": 10100 - }, - { - "epoch": 1.14, - "learning_rate": 9.012325608432291e-05, - "loss": 1.0516, - "step": 10120 - }, - { - "epoch": 1.14, - "learning_rate": 9.00797852297101e-05, - "loss": 1.0297, - "step": 10140 - }, - { - "epoch": 1.14, - "learning_rate": 9.003622945283496e-05, - "loss": 1.0401, - "step": 10160 - }, - { - "epoch": 1.14, - "learning_rate": 8.99925888459848e-05, - "loss": 1.0365, - "step": 10180 - }, - { - "epoch": 1.15, - "learning_rate": 8.994886350162666e-05, - "loss": 1.0227, - "step": 10200 - }, - { - "epoch": 1.15, - "learning_rate": 8.990505351240714e-05, - "loss": 1.0176, - "step": 10220 - }, - { - "epoch": 1.15, - "learning_rate": 8.986115897115213e-05, - "loss": 1.0419, - "step": 10240 - }, - { - "epoch": 1.15, - "learning_rate": 8.981717997086674e-05, - "loss": 1.0206, - "step": 10260 - }, - { - "epoch": 1.16, - "learning_rate": 8.977311660473499e-05, - "loss": 0.9932, - "step": 10280 - }, - { - "epoch": 1.16, - "learning_rate": 8.972896896611971e-05, - "loss": 1.0083, - "step": 10300 - }, - { - "epoch": 1.16, - "learning_rate": 8.968473714856222e-05, - "loss": 1.0006, - "step": 10320 - }, - { - "epoch": 1.16, - "learning_rate": 8.964042124578224e-05, - "loss": 1.0136, - "step": 10340 - }, - { - "epoch": 1.16, - "learning_rate": 8.959602135167766e-05, - "loss": 1.0254, - "step": 10360 - }, - { - "epoch": 1.17, - "learning_rate": 8.955153756032428e-05, - "loss": 0.9969, - "step": 10380 - }, - { - "epoch": 1.17, - "learning_rate": 8.950696996597576e-05, - "loss": 0.9852, - "step": 10400 - }, - { - "epoch": 1.17, - "learning_rate": 8.946231866306325e-05, - "loss": 0.9993, - "step": 10420 - }, - { - "epoch": 1.17, - "learning_rate": 8.941758374619525e-05, - "loss": 1.019, - "step": 10440 - }, - { - "epoch": 1.18, - "learning_rate": 8.93727653101575e-05, - "loss": 1.0028, - "step": 10460 - }, - { - "epoch": 1.18, - "learning_rate": 8.932786344991264e-05, - "loss": 1.028, - "step": 10480 - }, - { - "epoch": 1.18, - "learning_rate": 8.928287826060009e-05, - "loss": 1.0152, - "step": 10500 - }, - { - "epoch": 1.18, - "learning_rate": 8.923780983753583e-05, - "loss": 1.0209, - "step": 10520 - }, - { - "epoch": 1.18, - "learning_rate": 8.919265827621218e-05, - "loss": 1.0409, - "step": 10540 - }, - { - "epoch": 1.19, - "learning_rate": 8.914742367229768e-05, - "loss": 1.0155, - "step": 10560 - }, - { - "epoch": 1.19, - "learning_rate": 8.910210612163673e-05, - "loss": 0.994, - "step": 10580 - }, - { - "epoch": 1.19, - "learning_rate": 8.905670572024958e-05, - "loss": 1.0145, - "step": 10600 - }, - { - "epoch": 1.19, - "learning_rate": 8.901122256433195e-05, - "loss": 1.0211, - "step": 10620 - }, - { - "epoch": 1.2, - "learning_rate": 8.89656567502549e-05, - "loss": 0.9977, - "step": 10640 - }, - { - "epoch": 1.2, - "learning_rate": 8.89200083745647e-05, - "loss": 0.9889, - "step": 10660 - }, - { - "epoch": 1.2, - "learning_rate": 8.887427753398248e-05, - "loss": 1.0104, - "step": 10680 - }, - { - "epoch": 1.2, - "learning_rate": 8.882846432540413e-05, - "loss": 1.0535, - "step": 10700 - }, - { - "epoch": 1.2, - "learning_rate": 8.87825688459001e-05, - "loss": 1.0043, - "step": 10720 - }, - { - "epoch": 1.21, - "learning_rate": 8.873659119271507e-05, - "loss": 1.0144, - "step": 10740 - }, - { - "epoch": 1.21, - "learning_rate": 8.869053146326793e-05, - "loss": 1.0118, - "step": 10760 - }, - { - "epoch": 1.21, - "learning_rate": 8.864438975515141e-05, - "loss": 1.0366, - "step": 10780 - }, - { - "epoch": 1.21, - "learning_rate": 8.859816616613194e-05, - "loss": 1.0125, - "step": 10800 - }, - { - "epoch": 1.22, - "learning_rate": 8.855186079414949e-05, - "loss": 0.9938, - "step": 10820 - }, - { - "epoch": 1.22, - "learning_rate": 8.850547373731727e-05, - "loss": 1.0046, - "step": 10840 - }, - { - "epoch": 1.22, - "learning_rate": 8.845900509392158e-05, - "loss": 1.0199, - "step": 10860 - }, - { - "epoch": 1.22, - "learning_rate": 8.841245496242157e-05, - "loss": 1.0019, - "step": 10880 - }, - { - "epoch": 1.22, - "learning_rate": 8.836582344144911e-05, - "loss": 0.9985, - "step": 10900 - }, - { - "epoch": 1.23, - "learning_rate": 8.831911062980845e-05, - "loss": 1.0138, - "step": 10920 - }, - { - "epoch": 1.23, - "learning_rate": 8.827231662647611e-05, - "loss": 1.0053, - "step": 10940 - }, - { - "epoch": 1.23, - "learning_rate": 8.822544153060064e-05, - "loss": 1.012, - "step": 10960 - }, - { - "epoch": 1.23, - "learning_rate": 8.817848544150243e-05, - "loss": 1.0178, - "step": 10980 - }, - { - "epoch": 1.24, - "learning_rate": 8.813144845867345e-05, - "loss": 1.0143, - "step": 11000 - }, - { - "epoch": 1.24, - "learning_rate": 8.808433068177708e-05, - "loss": 1.0087, - "step": 11020 - }, - { - "epoch": 1.24, - "learning_rate": 8.80371322106479e-05, - "loss": 1.0242, - "step": 11040 - }, - { - "epoch": 1.24, - "learning_rate": 8.798985314529146e-05, - "loss": 1.0364, - "step": 11060 - }, - { - "epoch": 1.24, - "learning_rate": 8.794249358588407e-05, - "loss": 0.9967, - "step": 11080 - }, - { - "epoch": 1.25, - "learning_rate": 8.789505363277259e-05, - "loss": 1.0144, - "step": 11100 - }, - { - "epoch": 1.25, - "learning_rate": 8.784753338647424e-05, - "loss": 1.016, - "step": 11120 - }, - { - "epoch": 1.25, - "learning_rate": 8.779993294767635e-05, - "loss": 1.0061, - "step": 11140 - }, - { - "epoch": 1.25, - "learning_rate": 8.77522524172362e-05, - "loss": 1.0166, - "step": 11160 - }, - { - "epoch": 1.26, - "learning_rate": 8.770449189618069e-05, - "loss": 1.0228, - "step": 11180 - }, - { - "epoch": 1.26, - "learning_rate": 8.76566514857063e-05, - "loss": 1.0037, - "step": 11200 - }, - { - "epoch": 1.26, - "learning_rate": 8.76087312871787e-05, - "loss": 1.033, - "step": 11220 - }, - { - "epoch": 1.26, - "learning_rate": 8.75607314021327e-05, - "loss": 1.0347, - "step": 11240 - }, - { - "epoch": 1.27, - "learning_rate": 8.751265193227189e-05, - "loss": 0.9958, - "step": 11260 - }, - { - "epoch": 1.27, - "learning_rate": 8.746449297946853e-05, - "loss": 1.0154, - "step": 11280 - }, - { - "epoch": 1.27, - "learning_rate": 8.741625464576322e-05, - "loss": 1.0308, - "step": 11300 - }, - { - "epoch": 1.27, - "learning_rate": 8.736793703336482e-05, - "loss": 1.0114, - "step": 11320 - }, - { - "epoch": 1.27, - "learning_rate": 8.731954024465017e-05, - "loss": 1.0231, - "step": 11340 - }, - { - "epoch": 1.28, - "learning_rate": 8.727106438216384e-05, - "loss": 1.01, - "step": 11360 - }, - { - "epoch": 1.28, - "learning_rate": 8.722250954861795e-05, - "loss": 0.983, - "step": 11380 - }, - { - "epoch": 1.28, - "learning_rate": 8.717387584689195e-05, - "loss": 0.9938, - "step": 11400 - }, - { - "epoch": 1.28, - "learning_rate": 8.712516338003241e-05, - "loss": 0.9918, - "step": 11420 - }, - { - "epoch": 1.29, - "learning_rate": 8.707637225125276e-05, - "loss": 1.0061, - "step": 11440 - }, - { - "epoch": 1.29, - "learning_rate": 8.702750256393316e-05, - "loss": 1.0493, - "step": 11460 - }, - { - "epoch": 1.29, - "learning_rate": 8.697855442162012e-05, - "loss": 0.9973, - "step": 11480 - }, - { - "epoch": 1.29, - "learning_rate": 8.692952792802651e-05, - "loss": 1.0343, - "step": 11500 - }, - { - "epoch": 1.29, - "learning_rate": 8.688042318703111e-05, - "loss": 1.0333, - "step": 11520 - }, - { - "epoch": 1.3, - "learning_rate": 8.683124030267855e-05, - "loss": 1.0276, - "step": 11540 - }, - { - "epoch": 1.3, - "learning_rate": 8.678197937917901e-05, - "loss": 1.0016, - "step": 11560 - }, - { - "epoch": 1.3, - "learning_rate": 8.673264052090801e-05, - "loss": 0.9907, - "step": 11580 - }, - { - "epoch": 1.3, - "learning_rate": 8.668322383240626e-05, - "loss": 1.0153, - "step": 11600 - }, - { - "epoch": 1.31, - "learning_rate": 8.663372941837929e-05, - "loss": 1.0084, - "step": 11620 - }, - { - "epoch": 1.31, - "learning_rate": 8.658415738369737e-05, - "loss": 1.0255, - "step": 11640 - }, - { - "epoch": 1.31, - "learning_rate": 8.653450783339523e-05, - "loss": 0.9996, - "step": 11660 - }, - { - "epoch": 1.31, - "learning_rate": 8.648478087267187e-05, - "loss": 0.9922, - "step": 11680 - }, - { - "epoch": 1.31, - "learning_rate": 8.643497660689024e-05, - "loss": 1.021, - "step": 11700 - }, - { - "epoch": 1.32, - "learning_rate": 8.638509514157715e-05, - "loss": 1.0102, - "step": 11720 - }, - { - "epoch": 1.32, - "learning_rate": 8.633513658242295e-05, - "loss": 1.0281, - "step": 11740 - }, - { - "epoch": 1.32, - "learning_rate": 8.628510103528134e-05, - "loss": 1.0185, - "step": 11760 - }, - { - "epoch": 1.32, - "learning_rate": 8.623498860616918e-05, - "loss": 0.9624, - "step": 11780 - }, - { - "epoch": 1.33, - "learning_rate": 8.618479940126617e-05, - "loss": 1.0486, - "step": 11800 - }, - { - "epoch": 1.33, - "learning_rate": 8.613453352691473e-05, - "loss": 1.0069, - "step": 11820 - }, - { - "epoch": 1.33, - "learning_rate": 8.608419108961971e-05, - "loss": 1.0449, - "step": 11840 - }, - { - "epoch": 1.33, - "learning_rate": 8.603377219604823e-05, - "loss": 0.9983, - "step": 11860 - }, - { - "epoch": 1.33, - "learning_rate": 8.59832769530293e-05, - "loss": 1.0164, - "step": 11880 - }, - { - "epoch": 1.34, - "learning_rate": 8.59327054675538e-05, - "loss": 1.0222, - "step": 11900 - }, - { - "epoch": 1.34, - "learning_rate": 8.588205784677415e-05, - "loss": 1.0372, - "step": 11920 - }, - { - "epoch": 1.34, - "learning_rate": 8.583133419800404e-05, - "loss": 1.0078, - "step": 11940 - }, - { - "epoch": 1.34, - "learning_rate": 8.578053462871827e-05, - "loss": 1.0216, - "step": 11960 - }, - { - "epoch": 1.35, - "learning_rate": 8.57296592465525e-05, - "loss": 1.0257, - "step": 11980 - }, - { - "epoch": 1.35, - "learning_rate": 8.567870815930305e-05, - "loss": 0.9813, - "step": 12000 - }, - { - "epoch": 1.35, - "learning_rate": 8.562768147492662e-05, - "loss": 0.9851, - "step": 12020 - }, - { - "epoch": 1.35, - "learning_rate": 8.557657930154007e-05, - "loss": 1.0091, - "step": 12040 - }, - { - "epoch": 1.36, - "learning_rate": 8.552540174742025e-05, - "loss": 0.9911, - "step": 12060 - }, - { - "epoch": 1.36, - "learning_rate": 8.547414892100373e-05, - "loss": 1.0127, - "step": 12080 - }, - { - "epoch": 1.36, - "learning_rate": 8.542282093088651e-05, - "loss": 0.9931, - "step": 12100 - }, - { - "epoch": 1.36, - "learning_rate": 8.537141788582393e-05, - "loss": 0.9987, - "step": 12120 - }, - { - "epoch": 1.36, - "learning_rate": 8.53199398947303e-05, - "loss": 1.0192, - "step": 12140 - }, - { - "epoch": 1.37, - "learning_rate": 8.526838706667873e-05, - "loss": 0.984, - "step": 12160 - }, - { - "epoch": 1.37, - "learning_rate": 8.521675951090094e-05, - "loss": 0.9841, - "step": 12180 - }, - { - "epoch": 1.37, - "learning_rate": 8.516505733678695e-05, - "loss": 1.0117, - "step": 12200 - }, - { - "epoch": 1.37, - "learning_rate": 8.511328065388488e-05, - "loss": 0.9758, - "step": 12220 - }, - { - "epoch": 1.38, - "learning_rate": 8.506142957190073e-05, - "loss": 1.0073, - "step": 12240 - }, - { - "epoch": 1.38, - "learning_rate": 8.500950420069817e-05, - "loss": 1.0178, - "step": 12260 - }, - { - "epoch": 1.38, - "learning_rate": 8.495750465029821e-05, - "loss": 1.0222, - "step": 12280 - }, - { - "epoch": 1.38, - "learning_rate": 8.490543103087912e-05, - "loss": 0.9812, - "step": 12300 - }, - { - "epoch": 1.38, - "learning_rate": 8.485328345277603e-05, - "loss": 1.0102, - "step": 12320 - }, - { - "epoch": 1.39, - "learning_rate": 8.48010620264808e-05, - "loss": 1.009, - "step": 12340 - }, - { - "epoch": 1.39, - "learning_rate": 8.47487668626418e-05, - "loss": 1.0279, - "step": 12360 - }, - { - "epoch": 1.39, - "learning_rate": 8.469639807206357e-05, - "loss": 1.019, - "step": 12380 - }, - { - "epoch": 1.39, - "learning_rate": 8.46439557657067e-05, - "loss": 1.0064, - "step": 12400 - }, - { - "epoch": 1.4, - "learning_rate": 8.459144005468756e-05, - "loss": 1.0037, - "step": 12420 - }, - { - "epoch": 1.4, - "learning_rate": 8.453885105027802e-05, - "loss": 0.9955, - "step": 12440 - }, - { - "epoch": 1.4, - "learning_rate": 8.448618886390522e-05, - "loss": 0.9949, - "step": 12460 - }, - { - "epoch": 1.4, - "learning_rate": 8.443345360715143e-05, - "loss": 0.9902, - "step": 12480 - }, - { - "epoch": 1.4, - "learning_rate": 8.43806453917537e-05, - "loss": 1.026, - "step": 12500 - }, - { - "epoch": 1.41, - "learning_rate": 8.432776432960366e-05, - "loss": 0.983, - "step": 12520 - }, - { - "epoch": 1.41, - "learning_rate": 8.427481053274734e-05, - "loss": 1.0039, - "step": 12540 - }, - { - "epoch": 1.41, - "learning_rate": 8.422178411338481e-05, - "loss": 0.99, - "step": 12560 - }, - { - "epoch": 1.41, - "learning_rate": 8.416868518387009e-05, - "loss": 1.0346, - "step": 12580 - }, - { - "epoch": 1.42, - "learning_rate": 8.411551385671077e-05, - "loss": 1.0002, - "step": 12600 - }, - { - "epoch": 1.42, - "learning_rate": 8.406227024456788e-05, - "loss": 0.9916, - "step": 12620 - }, - { - "epoch": 1.42, - "learning_rate": 8.400895446025558e-05, - "loss": 1.0303, - "step": 12640 - }, - { - "epoch": 1.42, - "learning_rate": 8.3955566616741e-05, - "loss": 0.982, - "step": 12660 - }, - { - "epoch": 1.42, - "learning_rate": 8.39021068271439e-05, - "loss": 0.996, - "step": 12680 - }, - { - "epoch": 1.43, - "learning_rate": 8.38485752047365e-05, - "loss": 1.0142, - "step": 12700 - }, - { - "epoch": 1.43, - "learning_rate": 8.379497186294322e-05, - "loss": 1.032, - "step": 12720 - }, - { - "epoch": 1.43, - "learning_rate": 8.374129691534046e-05, - "loss": 1.0146, - "step": 12740 - }, - { - "epoch": 1.43, - "learning_rate": 8.36875504756563e-05, - "loss": 1.0217, - "step": 12760 - }, - { - "epoch": 1.44, - "learning_rate": 8.363373265777034e-05, - "loss": 0.9988, - "step": 12780 - }, - { - "epoch": 1.44, - "learning_rate": 8.357984357571337e-05, - "loss": 1.0119, - "step": 12800 - }, - { - "epoch": 1.44, - "learning_rate": 8.352588334366728e-05, - "loss": 0.9801, - "step": 12820 - }, - { - "epoch": 1.44, - "learning_rate": 8.347185207596457e-05, - "loss": 0.9835, - "step": 12840 - }, - { - "epoch": 1.44, - "learning_rate": 8.341774988708837e-05, - "loss": 0.9979, - "step": 12860 - }, - { - "epoch": 1.45, - "learning_rate": 8.336357689167203e-05, - "loss": 1.0108, - "step": 12880 - }, - { - "epoch": 1.45, - "learning_rate": 8.33093332044989e-05, - "loss": 0.9943, - "step": 12900 - }, - { - "epoch": 1.45, - "learning_rate": 8.325501894050218e-05, - "loss": 0.9962, - "step": 12920 - }, - { - "epoch": 1.45, - "learning_rate": 8.320063421476454e-05, - "loss": 1.0326, - "step": 12940 - }, - { - "epoch": 1.46, - "learning_rate": 8.314617914251805e-05, - "loss": 0.9947, - "step": 12960 - }, - { - "epoch": 1.46, - "learning_rate": 8.30916538391437e-05, - "loss": 1.0253, - "step": 12980 - }, - { - "epoch": 1.46, - "learning_rate": 8.30370584201714e-05, - "loss": 0.9905, - "step": 13000 - }, - { - "epoch": 1.46, - "learning_rate": 8.298239300127954e-05, - "loss": 1.0028, - "step": 13020 - }, - { - "epoch": 1.47, - "learning_rate": 8.292765769829487e-05, - "loss": 1.0467, - "step": 13040 - }, - { - "epoch": 1.47, - "learning_rate": 8.287285262719224e-05, - "loss": 1.0042, - "step": 13060 - }, - { - "epoch": 1.47, - "learning_rate": 8.281797790409425e-05, - "loss": 1.0102, - "step": 13080 - }, - { - "epoch": 1.47, - "learning_rate": 8.276303364527116e-05, - "loss": 1.0038, - "step": 13100 - }, - { - "epoch": 1.47, - "learning_rate": 8.270801996714051e-05, - "loss": 1.0028, - "step": 13120 - }, - { - "epoch": 1.48, - "learning_rate": 8.265293698626694e-05, - "loss": 0.9969, - "step": 13140 - }, - { - "epoch": 1.48, - "learning_rate": 8.259778481936197e-05, - "loss": 0.9955, - "step": 13160 - }, - { - "epoch": 1.48, - "learning_rate": 8.254256358328365e-05, - "loss": 1.0106, - "step": 13180 - }, - { - "epoch": 1.48, - "learning_rate": 8.248727339503641e-05, - "loss": 1.0129, - "step": 13200 - }, - { - "epoch": 1.49, - "learning_rate": 8.243191437177077e-05, - "loss": 0.981, - "step": 13220 - }, - { - "epoch": 1.49, - "learning_rate": 8.237648663078314e-05, - "loss": 1.0168, - "step": 13240 - }, - { - "epoch": 1.49, - "learning_rate": 8.232099028951548e-05, - "loss": 0.9942, - "step": 13260 - }, - { - "epoch": 1.49, - "learning_rate": 8.22654254655551e-05, - "loss": 1.0028, - "step": 13280 - }, - { - "epoch": 1.49, - "learning_rate": 8.22097922766344e-05, - "loss": 1.0011, - "step": 13300 - }, - { - "epoch": 1.5, - "learning_rate": 8.215409084063075e-05, - "loss": 1.0141, - "step": 13320 - }, - { - "epoch": 1.5, - "learning_rate": 8.209832127556598e-05, - "loss": 0.9799, - "step": 13340 - }, - { - "epoch": 1.5, - "learning_rate": 8.204248369960634e-05, - "loss": 0.9972, - "step": 13360 - }, - { - "epoch": 1.5, - "learning_rate": 8.198657823106219e-05, - "loss": 0.9737, - "step": 13380 - }, - { - "epoch": 1.51, - "learning_rate": 8.193060498838774e-05, - "loss": 1.0, - "step": 13400 - }, - { - "epoch": 1.51, - "learning_rate": 8.187456409018074e-05, - "loss": 1.0246, - "step": 13420 - }, - { - "epoch": 1.51, - "learning_rate": 8.18184556551824e-05, - "loss": 1.0043, - "step": 13440 - }, - { - "epoch": 1.51, - "learning_rate": 8.176227980227694e-05, - "loss": 0.9859, - "step": 13460 - }, - { - "epoch": 1.51, - "learning_rate": 8.170603665049146e-05, - "loss": 0.9851, - "step": 13480 - }, - { - "epoch": 1.52, - "learning_rate": 8.164972631899566e-05, - "loss": 1.0025, - "step": 13500 - }, - { - "epoch": 1.52, - "learning_rate": 8.159334892710156e-05, - "loss": 0.9988, - "step": 13520 - }, - { - "epoch": 1.52, - "learning_rate": 8.15369045942633e-05, - "loss": 1.0531, - "step": 13540 - }, - { - "epoch": 1.52, - "learning_rate": 8.148039344007685e-05, - "loss": 0.983, - "step": 13560 - }, - { - "epoch": 1.53, - "learning_rate": 8.142381558427974e-05, - "loss": 1.0149, - "step": 13580 - }, - { - "epoch": 1.53, - "learning_rate": 8.136717114675083e-05, - "loss": 1.0205, - "step": 13600 - }, - { - "epoch": 1.53, - "learning_rate": 8.131046024751009e-05, - "loss": 1.015, - "step": 13620 - }, - { - "epoch": 1.53, - "learning_rate": 8.12536830067183e-05, - "loss": 1.0131, - "step": 13640 - }, - { - "epoch": 1.53, - "learning_rate": 8.119683954467677e-05, - "loss": 1.026, - "step": 13660 - }, - { - "epoch": 1.54, - "learning_rate": 8.113992998182715e-05, - "loss": 0.9792, - "step": 13680 - }, - { - "epoch": 1.54, - "learning_rate": 8.108295443875116e-05, - "loss": 1.0232, - "step": 13700 - }, - { - "epoch": 1.54, - "learning_rate": 8.102591303617031e-05, - "loss": 1.0185, - "step": 13720 - }, - { - "epoch": 1.54, - "learning_rate": 8.096880589494563e-05, - "loss": 0.9907, - "step": 13740 - }, - { - "epoch": 1.55, - "learning_rate": 8.091163313607749e-05, - "loss": 1.0145, - "step": 13760 - }, - { - "epoch": 1.55, - "learning_rate": 8.085439488070521e-05, - "loss": 0.9818, - "step": 13780 - }, - { - "epoch": 1.55, - "learning_rate": 8.079709125010699e-05, - "loss": 0.9885, - "step": 13800 - }, - { - "epoch": 1.55, - "learning_rate": 8.073972236569947e-05, - "loss": 1.0026, - "step": 13820 - }, - { - "epoch": 1.56, - "learning_rate": 8.06822883490376e-05, - "loss": 0.9577, - "step": 13840 - }, - { - "epoch": 1.56, - "learning_rate": 8.06247893218143e-05, - "loss": 1.0351, - "step": 13860 - }, - { - "epoch": 1.56, - "learning_rate": 8.056722540586024e-05, - "loss": 0.9972, - "step": 13880 - }, - { - "epoch": 1.56, - "learning_rate": 8.050959672314359e-05, - "loss": 1.0099, - "step": 13900 - }, - { - "epoch": 1.56, - "learning_rate": 8.045190339576978e-05, - "loss": 0.9896, - "step": 13920 - }, - { - "epoch": 1.57, - "learning_rate": 8.039414554598113e-05, - "loss": 1.0083, - "step": 13940 - }, - { - "epoch": 1.57, - "learning_rate": 8.033632329615676e-05, - "loss": 0.9907, - "step": 13960 - }, - { - "epoch": 1.57, - "learning_rate": 8.027843676881218e-05, - "loss": 1.0125, - "step": 13980 - }, - { - "epoch": 1.57, - "learning_rate": 8.022048608659913e-05, - "loss": 1.0153, - "step": 14000 - }, - { - "epoch": 1.58, - "learning_rate": 8.016247137230525e-05, - "loss": 0.9991, - "step": 14020 - }, - { - "epoch": 1.58, - "learning_rate": 8.010439274885391e-05, - "loss": 0.9889, - "step": 14040 - }, - { - "epoch": 1.58, - "learning_rate": 8.004625033930382e-05, - "loss": 1.0172, - "step": 14060 - }, - { - "epoch": 1.58, - "learning_rate": 7.998804426684889e-05, - "loss": 0.9886, - "step": 14080 - }, - { - "epoch": 1.58, - "learning_rate": 7.992977465481793e-05, - "loss": 1.0062, - "step": 14100 - }, - { - "epoch": 1.59, - "learning_rate": 7.987144162667431e-05, - "loss": 0.9952, - "step": 14120 - }, - { - "epoch": 1.59, - "learning_rate": 7.981304530601586e-05, - "loss": 1.0364, - "step": 14140 - }, - { - "epoch": 1.59, - "learning_rate": 7.975458581657446e-05, - "loss": 1.021, - "step": 14160 - }, - { - "epoch": 1.59, - "learning_rate": 7.969606328221583e-05, - "loss": 1.0118, - "step": 14180 - }, - { - "epoch": 1.6, - "learning_rate": 7.96374778269393e-05, - "loss": 1.0187, - "step": 14200 - }, - { - "epoch": 1.6, - "learning_rate": 7.95788295748775e-05, - "loss": 1.0267, - "step": 14220 - }, - { - "epoch": 1.6, - "learning_rate": 7.952011865029614e-05, - "loss": 1.0121, - "step": 14240 - }, - { - "epoch": 1.6, - "learning_rate": 7.946134517759368e-05, - "loss": 0.971, - "step": 14260 - }, - { - "epoch": 1.6, - "learning_rate": 7.940250928130116e-05, - "loss": 1.0182, - "step": 14280 - }, - { - "epoch": 1.61, - "learning_rate": 7.934361108608183e-05, - "loss": 0.9876, - "step": 14300 - }, - { - "epoch": 1.61, - "learning_rate": 7.9284650716731e-05, - "loss": 0.988, - "step": 14320 - }, - { - "epoch": 1.61, - "learning_rate": 7.922562829817564e-05, - "loss": 1.0128, - "step": 14340 - }, - { - "epoch": 1.61, - "learning_rate": 7.916654395547427e-05, - "loss": 1.029, - "step": 14360 - }, - { - "epoch": 1.62, - "learning_rate": 7.91073978138166e-05, - "loss": 0.9795, - "step": 14380 - }, - { - "epoch": 1.62, - "learning_rate": 7.904818999852323e-05, - "loss": 0.9923, - "step": 14400 - }, - { - "epoch": 1.62, - "learning_rate": 7.898892063504548e-05, - "loss": 1.0132, - "step": 14420 - }, - { - "epoch": 1.62, - "learning_rate": 7.89295898489651e-05, - "loss": 0.999, - "step": 14440 - }, - { - "epoch": 1.62, - "learning_rate": 7.887019776599391e-05, - "loss": 0.9659, - "step": 14460 - }, - { - "epoch": 1.63, - "learning_rate": 7.88107445119737e-05, - "loss": 0.9859, - "step": 14480 - }, - { - "epoch": 1.63, - "learning_rate": 7.875123021287579e-05, - "loss": 0.9863, - "step": 14500 - }, - { - "epoch": 1.63, - "learning_rate": 7.869165499480089e-05, - "loss": 1.0014, - "step": 14520 - }, - { - "epoch": 1.63, - "learning_rate": 7.863201898397878e-05, - "loss": 0.9914, - "step": 14540 - }, - { - "epoch": 1.64, - "learning_rate": 7.857232230676802e-05, - "loss": 0.9893, - "step": 14560 - }, - { - "epoch": 1.64, - "learning_rate": 7.851256508965577e-05, - "loss": 0.9932, - "step": 14580 - }, - { - "epoch": 1.64, - "learning_rate": 7.845274745925744e-05, - "loss": 1.0187, - "step": 14600 - }, - { - "epoch": 1.64, - "learning_rate": 7.83928695423164e-05, - "loss": 1.0001, - "step": 14620 - }, - { - "epoch": 1.64, - "learning_rate": 7.83329314657038e-05, - "loss": 0.991, - "step": 14640 - }, - { - "epoch": 1.65, - "learning_rate": 7.827293335641825e-05, - "loss": 1.0057, - "step": 14660 - }, - { - "epoch": 1.65, - "learning_rate": 7.82128753415856e-05, - "loss": 0.9592, - "step": 14680 - }, - { - "epoch": 1.65, - "learning_rate": 7.815275754845854e-05, - "loss": 1.0121, - "step": 14700 - }, - { - "epoch": 1.65, - "learning_rate": 7.809258010441649e-05, - "loss": 0.9967, - "step": 14720 - }, - { - "epoch": 1.66, - "learning_rate": 7.803234313696524e-05, - "loss": 0.9933, - "step": 14740 - }, - { - "epoch": 1.66, - "learning_rate": 7.79720467737367e-05, - "loss": 0.9841, - "step": 14760 - }, - { - "epoch": 1.66, - "learning_rate": 7.791169114248864e-05, - "loss": 1.0147, - "step": 14780 - }, - { - "epoch": 1.66, - "learning_rate": 7.785127637110438e-05, - "loss": 1.0291, - "step": 14800 - }, - { - "epoch": 1.67, - "learning_rate": 7.779080258759259e-05, - "loss": 0.9922, - "step": 14820 - }, - { - "epoch": 1.67, - "learning_rate": 7.773026992008692e-05, - "loss": 1.0002, - "step": 14840 - }, - { - "epoch": 1.67, - "learning_rate": 7.766967849684584e-05, - "loss": 1.0118, - "step": 14860 - }, - { - "epoch": 1.67, - "learning_rate": 7.760902844625228e-05, - "loss": 1.0237, - "step": 14880 - }, - { - "epoch": 1.67, - "learning_rate": 7.754831989681345e-05, - "loss": 1.0038, - "step": 14900 - }, - { - "epoch": 1.68, - "learning_rate": 7.74875529771604e-05, - "loss": 1.0016, - "step": 14920 - }, - { - "epoch": 1.68, - "learning_rate": 7.742672781604794e-05, - "loss": 0.9977, - "step": 14940 - }, - { - "epoch": 1.68, - "learning_rate": 7.736584454235427e-05, - "loss": 0.9864, - "step": 14960 - }, - { - "epoch": 1.68, - "learning_rate": 7.730490328508072e-05, - "loss": 0.9771, - "step": 14980 - }, - { - "epoch": 1.69, - "learning_rate": 7.724390417335144e-05, - "loss": 0.983, - "step": 15000 - }, - { - "epoch": 1.69, - "learning_rate": 7.718284733641323e-05, - "loss": 0.968, - "step": 15020 - }, - { - "epoch": 1.69, - "learning_rate": 7.712173290363514e-05, - "loss": 0.9642, - "step": 15040 - }, - { - "epoch": 1.69, - "learning_rate": 7.706056100450831e-05, - "loss": 1.0196, - "step": 15060 - }, - { - "epoch": 1.69, - "learning_rate": 7.699933176864558e-05, - "loss": 0.9708, - "step": 15080 - }, - { - "epoch": 1.7, - "learning_rate": 7.693804532578131e-05, - "loss": 0.9916, - "step": 15100 - }, - { - "epoch": 1.7, - "learning_rate": 7.687670180577109e-05, - "loss": 1.0076, - "step": 15120 - }, - { - "epoch": 1.7, - "learning_rate": 7.681530133859142e-05, - "loss": 0.9733, - "step": 15140 - }, - { - "epoch": 1.7, - "learning_rate": 7.675384405433947e-05, - "loss": 0.9965, - "step": 15160 - }, - { - "epoch": 1.71, - "learning_rate": 7.66923300832328e-05, - "loss": 0.9941, - "step": 15180 - }, - { - "epoch": 1.71, - "learning_rate": 7.663075955560906e-05, - "loss": 0.9851, - "step": 15200 - }, - { - "epoch": 1.71, - "learning_rate": 7.656913260192574e-05, - "loss": 0.9879, - "step": 15220 - }, - { - "epoch": 1.71, - "learning_rate": 7.650744935275992e-05, - "loss": 1.0127, - "step": 15240 - }, - { - "epoch": 1.71, - "learning_rate": 7.644570993880791e-05, - "loss": 0.9718, - "step": 15260 - }, - { - "epoch": 1.72, - "learning_rate": 7.63839144908851e-05, - "loss": 0.9495, - "step": 15280 - }, - { - "epoch": 1.72, - "learning_rate": 7.632206313992548e-05, - "loss": 0.9745, - "step": 15300 - }, - { - "epoch": 1.72, - "learning_rate": 7.626015601698163e-05, - "loss": 0.9862, - "step": 15320 - }, - { - "epoch": 1.72, - "learning_rate": 7.619819325322422e-05, - "loss": 0.9923, - "step": 15340 - }, - { - "epoch": 1.73, - "learning_rate": 7.613617497994178e-05, - "loss": 0.9779, - "step": 15360 - }, - { - "epoch": 1.73, - "learning_rate": 7.607410132854059e-05, - "loss": 0.9875, - "step": 15380 - }, - { - "epoch": 1.73, - "learning_rate": 7.60119724305441e-05, - "loss": 1.011, - "step": 15400 - }, - { - "epoch": 1.73, - "learning_rate": 7.594978841759297e-05, - "loss": 0.9933, - "step": 15420 - }, - { - "epoch": 1.73, - "learning_rate": 7.588754942144452e-05, - "loss": 0.9842, - "step": 15440 - }, - { - "epoch": 1.74, - "learning_rate": 7.582525557397264e-05, - "loss": 0.9784, - "step": 15460 - }, - { - "epoch": 1.74, - "learning_rate": 7.576290700716742e-05, - "loss": 0.9794, - "step": 15480 - }, - { - "epoch": 1.74, - "learning_rate": 7.570050385313487e-05, - "loss": 1.0136, - "step": 15500 - }, - { - "epoch": 1.74, - "learning_rate": 7.563804624409672e-05, - "loss": 1.0115, - "step": 15520 - }, - { - "epoch": 1.75, - "learning_rate": 7.557553431239002e-05, - "loss": 0.9926, - "step": 15540 - }, - { - "epoch": 1.75, - "learning_rate": 7.551296819046693e-05, - "loss": 0.9946, - "step": 15560 - }, - { - "epoch": 1.75, - "learning_rate": 7.545034801089448e-05, - "loss": 0.9707, - "step": 15580 - }, - { - "epoch": 1.75, - "learning_rate": 7.538767390635416e-05, - "loss": 0.9644, - "step": 15600 - }, - { - "epoch": 1.76, - "learning_rate": 7.53249460096418e-05, - "loss": 0.9909, - "step": 15620 - }, - { - "epoch": 1.76, - "learning_rate": 7.526216445366713e-05, - "loss": 0.994, - "step": 15640 - }, - { - "epoch": 1.76, - "learning_rate": 7.519932937145364e-05, - "loss": 0.9701, - "step": 15660 - }, - { - "epoch": 1.76, - "learning_rate": 7.513644089613818e-05, - "loss": 0.9868, - "step": 15680 - }, - { - "epoch": 1.76, - "learning_rate": 7.507349916097077e-05, - "loss": 1.018, - "step": 15700 - }, - { - "epoch": 1.77, - "learning_rate": 7.501050429931429e-05, - "loss": 0.9759, - "step": 15720 - }, - { - "epoch": 1.77, - "learning_rate": 7.49474564446441e-05, - "loss": 1.0035, - "step": 15740 - }, - { - "epoch": 1.77, - "learning_rate": 7.488435573054795e-05, - "loss": 0.9836, - "step": 15760 - }, - { - "epoch": 1.77, - "learning_rate": 7.482120229072552e-05, - "loss": 0.9725, - "step": 15780 - }, - { - "epoch": 1.78, - "learning_rate": 7.475799625898825e-05, - "loss": 0.9832, - "step": 15800 - }, - { - "epoch": 1.78, - "learning_rate": 7.469473776925897e-05, - "loss": 0.9895, - "step": 15820 - }, - { - "epoch": 1.78, - "learning_rate": 7.463142695557171e-05, - "loss": 0.979, - "step": 15840 - }, - { - "epoch": 1.78, - "learning_rate": 7.456806395207132e-05, - "loss": 0.987, - "step": 15860 - }, - { - "epoch": 1.78, - "learning_rate": 7.450464889301326e-05, - "loss": 0.9911, - "step": 15880 - }, - { - "epoch": 1.79, - "learning_rate": 7.444118191276326e-05, - "loss": 0.9616, - "step": 15900 - }, - { - "epoch": 1.79, - "learning_rate": 7.43776631457971e-05, - "loss": 0.9772, - "step": 15920 - }, - { - "epoch": 1.79, - "learning_rate": 7.431409272670027e-05, - "loss": 0.9851, - "step": 15940 - }, - { - "epoch": 1.79, - "learning_rate": 7.425047079016765e-05, - "loss": 0.9971, - "step": 15960 - }, - { - "epoch": 1.8, - "learning_rate": 7.418679747100339e-05, - "loss": 0.9858, - "step": 15980 - }, - { - "epoch": 1.8, - "learning_rate": 7.412307290412041e-05, - "loss": 0.9759, - "step": 16000 - }, - { - "epoch": 1.8, - "learning_rate": 7.405929722454026e-05, - "loss": 1.0255, - "step": 16020 - }, - { - "epoch": 1.8, - "learning_rate": 7.399547056739278e-05, - "loss": 0.9645, - "step": 16040 - }, - { - "epoch": 1.8, - "learning_rate": 7.39315930679158e-05, - "loss": 0.9821, - "step": 16060 - }, - { - "epoch": 1.81, - "learning_rate": 7.386766486145496e-05, - "loss": 0.9783, - "step": 16080 - }, - { - "epoch": 1.81, - "learning_rate": 7.380368608346322e-05, - "loss": 0.9899, - "step": 16100 - }, - { - "epoch": 1.81, - "learning_rate": 7.373965686950078e-05, - "loss": 0.9705, - "step": 16120 - }, - { - "epoch": 1.81, - "learning_rate": 7.367557735523467e-05, - "loss": 0.9869, - "step": 16140 - }, - { - "epoch": 1.82, - "learning_rate": 7.361144767643849e-05, - "loss": 0.983, - "step": 16160 - }, - { - "epoch": 1.82, - "learning_rate": 7.354726796899219e-05, - "loss": 1.0142, - "step": 16180 - }, - { - "epoch": 1.82, - "learning_rate": 7.348303836888163e-05, - "loss": 0.9991, - "step": 16200 - }, - { - "epoch": 1.82, - "learning_rate": 7.341875901219845e-05, - "loss": 0.9927, - "step": 16220 - }, - { - "epoch": 1.82, - "learning_rate": 7.33544300351397e-05, - "loss": 1.0073, - "step": 16240 - }, - { - "epoch": 1.83, - "learning_rate": 7.329005157400754e-05, - "loss": 0.998, - "step": 16260 - }, - { - "epoch": 1.83, - "learning_rate": 7.322562376520904e-05, - "loss": 0.9921, - "step": 16280 - }, - { - "epoch": 1.83, - "learning_rate": 7.316114674525578e-05, - "loss": 0.9735, - "step": 16300 - }, - { - "epoch": 1.83, - "learning_rate": 7.30966206507636e-05, - "loss": 0.9668, - "step": 16320 - }, - { - "epoch": 1.84, - "learning_rate": 7.303204561845236e-05, - "loss": 0.9921, - "step": 16340 - }, - { - "epoch": 1.84, - "learning_rate": 7.29674217851456e-05, - "loss": 0.9809, - "step": 16360 - }, - { - "epoch": 1.84, - "learning_rate": 7.290274928777024e-05, - "loss": 0.9915, - "step": 16380 - }, - { - "epoch": 1.84, - "learning_rate": 7.283802826335635e-05, - "loss": 0.9805, - "step": 16400 - }, - { - "epoch": 1.84, - "learning_rate": 7.277325884903674e-05, - "loss": 0.9791, - "step": 16420 - }, - { - "epoch": 1.85, - "learning_rate": 7.270844118204688e-05, - "loss": 0.9702, - "step": 16440 - }, - { - "epoch": 1.85, - "learning_rate": 7.264357539972434e-05, - "loss": 0.9717, - "step": 16460 - }, - { - "epoch": 1.85, - "learning_rate": 7.257866163950873e-05, - "loss": 0.9982, - "step": 16480 - }, - { - "epoch": 1.85, - "learning_rate": 7.251370003894133e-05, - "loss": 0.9703, - "step": 16500 - }, - { - "epoch": 1.86, - "learning_rate": 7.244869073566466e-05, - "loss": 0.9482, - "step": 16520 - }, - { - "epoch": 1.86, - "learning_rate": 7.238363386742249e-05, - "loss": 1.0209, - "step": 16540 - }, - { - "epoch": 1.86, - "learning_rate": 7.23185295720592e-05, - "loss": 0.9672, - "step": 16560 - }, - { - "epoch": 1.86, - "learning_rate": 7.225337798751981e-05, - "loss": 1.0046, - "step": 16580 - }, - { - "epoch": 1.87, - "learning_rate": 7.218817925184944e-05, - "loss": 1.0102, - "step": 16600 - }, - { - "epoch": 1.87, - "learning_rate": 7.212293350319313e-05, - "loss": 0.9578, - "step": 16620 - }, - { - "epoch": 1.87, - "learning_rate": 7.205764087979557e-05, - "loss": 0.9835, - "step": 16640 - }, - { - "epoch": 1.87, - "learning_rate": 7.199230152000074e-05, - "loss": 1.0221, - "step": 16660 - }, - { - "epoch": 1.87, - "learning_rate": 7.192691556225167e-05, - "loss": 0.9698, - "step": 16680 - }, - { - "epoch": 1.88, - "learning_rate": 7.186148314509008e-05, - "loss": 0.9918, - "step": 16700 - }, - { - "epoch": 1.88, - "learning_rate": 7.179600440715615e-05, - "loss": 0.9828, - "step": 16720 - }, - { - "epoch": 1.88, - "learning_rate": 7.173047948718826e-05, - "loss": 1.0046, - "step": 16740 - }, - { - "epoch": 1.88, - "learning_rate": 7.166490852402254e-05, - "loss": 0.9624, - "step": 16760 - }, - { - "epoch": 1.89, - "learning_rate": 7.159929165659277e-05, - "loss": 0.9675, - "step": 16780 - }, - { - "epoch": 1.89, - "learning_rate": 7.153362902392994e-05, - "loss": 0.9811, - "step": 16800 - }, - { - "epoch": 1.89, - "learning_rate": 7.146792076516202e-05, - "loss": 0.9725, - "step": 16820 - }, - { - "epoch": 1.89, - "learning_rate": 7.140216701951366e-05, - "loss": 0.9751, - "step": 16840 - }, - { - "epoch": 1.89, - "learning_rate": 7.13363679263059e-05, - "loss": 0.9734, - "step": 16860 - }, - { - "epoch": 1.9, - "learning_rate": 7.127052362495583e-05, - "loss": 0.9879, - "step": 16880 - }, - { - "epoch": 1.9, - "learning_rate": 7.120463425497637e-05, - "loss": 0.9824, - "step": 16900 - }, - { - "epoch": 1.9, - "learning_rate": 7.11386999559759e-05, - "loss": 0.9833, - "step": 16920 - }, - { - "epoch": 1.9, - "learning_rate": 7.1072720867658e-05, - "loss": 0.9703, - "step": 16940 - }, - { - "epoch": 1.91, - "learning_rate": 7.100669712982119e-05, - "loss": 0.9864, - "step": 16960 - }, - { - "epoch": 1.91, - "learning_rate": 7.094062888235852e-05, - "loss": 0.9809, - "step": 16980 - }, - { - "epoch": 1.91, - "learning_rate": 7.087451626525745e-05, - "loss": 0.9887, - "step": 17000 - }, - { - "epoch": 1.91, - "learning_rate": 7.080835941859932e-05, - "loss": 0.9817, - "step": 17020 - }, - { - "epoch": 1.91, - "learning_rate": 7.074215848255933e-05, - "loss": 0.9697, - "step": 17040 - }, - { - "epoch": 1.92, - "learning_rate": 7.067591359740599e-05, - "loss": 0.9717, - "step": 17060 - }, - { - "epoch": 1.92, - "learning_rate": 7.060962490350098e-05, - "loss": 0.9435, - "step": 17080 - }, - { - "epoch": 1.92, - "learning_rate": 7.05432925412988e-05, - "loss": 0.9578, - "step": 17100 - }, - { - "epoch": 1.92, - "learning_rate": 7.047691665134643e-05, - "loss": 0.9845, - "step": 17120 - }, - { - "epoch": 1.93, - "learning_rate": 7.041049737428316e-05, - "loss": 0.9647, - "step": 17140 - }, - { - "epoch": 1.93, - "learning_rate": 7.034403485084014e-05, - "loss": 0.9781, - "step": 17160 - }, - { - "epoch": 1.93, - "learning_rate": 7.027752922184017e-05, - "loss": 0.9348, - "step": 17180 - }, - { - "epoch": 1.93, - "learning_rate": 7.021098062819743e-05, - "loss": 0.9801, - "step": 17200 - }, - { - "epoch": 1.93, - "learning_rate": 7.014438921091703e-05, - "loss": 0.9987, - "step": 17220 - }, - { - "epoch": 1.94, - "learning_rate": 7.007775511109495e-05, - "loss": 0.9797, - "step": 17240 - }, - { - "epoch": 1.94, - "learning_rate": 7.001107846991751e-05, - "loss": 0.9882, - "step": 17260 - }, - { - "epoch": 1.94, - "learning_rate": 6.994435942866117e-05, - "loss": 0.9706, - "step": 17280 - }, - { - "epoch": 1.94, - "learning_rate": 6.98775981286923e-05, - "loss": 0.9733, - "step": 17300 - }, - { - "epoch": 1.95, - "learning_rate": 6.981079471146672e-05, - "loss": 0.9788, - "step": 17320 - }, - { - "epoch": 1.95, - "learning_rate": 6.974394931852956e-05, - "loss": 0.9797, - "step": 17340 - }, - { - "epoch": 1.95, - "learning_rate": 6.967706209151488e-05, - "loss": 0.9357, - "step": 17360 - }, - { - "epoch": 1.95, - "learning_rate": 6.96101331721453e-05, - "loss": 0.9818, - "step": 17380 - }, - { - "epoch": 1.96, - "learning_rate": 6.954316270223189e-05, - "loss": 0.9728, - "step": 17400 - }, - { - "epoch": 1.96, - "learning_rate": 6.94761508236737e-05, - "loss": 1.0087, - "step": 17420 - }, - { - "epoch": 1.96, - "learning_rate": 6.940909767845753e-05, - "loss": 0.9928, - "step": 17440 - }, - { - "epoch": 1.96, - "learning_rate": 6.934200340865761e-05, - "loss": 0.9464, - "step": 17460 - }, - { - "epoch": 1.96, - "learning_rate": 6.927486815643528e-05, - "loss": 0.9691, - "step": 17480 - }, - { - "epoch": 1.97, - "learning_rate": 6.920769206403881e-05, - "loss": 0.9875, - "step": 17500 - }, - { - "epoch": 1.97, - "learning_rate": 6.914047527380288e-05, - "loss": 0.9622, - "step": 17520 - }, - { - "epoch": 1.97, - "learning_rate": 6.907321792814848e-05, - "loss": 0.9906, - "step": 17540 - }, - { - "epoch": 1.97, - "learning_rate": 6.900592016958252e-05, - "loss": 0.9778, - "step": 17560 - }, - { - "epoch": 1.98, - "learning_rate": 6.89385821406975e-05, - "loss": 0.9606, - "step": 17580 - }, - { - "epoch": 1.98, - "learning_rate": 6.887120398417132e-05, - "loss": 0.9551, - "step": 17600 - }, - { - "epoch": 1.98, - "learning_rate": 6.880378584276682e-05, - "loss": 0.9555, - "step": 17620 - }, - { - "epoch": 1.98, - "learning_rate": 6.87363278593316e-05, - "loss": 0.9735, - "step": 17640 - }, - { - "epoch": 1.98, - "learning_rate": 6.866883017679772e-05, - "loss": 0.9791, - "step": 17660 - }, - { - "epoch": 1.99, - "learning_rate": 6.860129293818124e-05, - "loss": 0.9975, - "step": 17680 - }, - { - "epoch": 1.99, - "learning_rate": 6.853371628658217e-05, - "loss": 0.9816, - "step": 17700 - }, - { - "epoch": 1.99, - "learning_rate": 6.846610036518396e-05, - "loss": 0.985, - "step": 17720 - }, - { - "epoch": 1.99, - "learning_rate": 6.839844531725321e-05, - "loss": 0.9667, - "step": 17740 - }, - { - "epoch": 2.0, - "learning_rate": 6.833075128613955e-05, - "loss": 0.95, - "step": 17760 - }, - { - "epoch": 2.0, - "learning_rate": 6.826301841527512e-05, - "loss": 0.9984, - "step": 17780 - }, - { - "epoch": 2.0, - "learning_rate": 6.819524684817438e-05, - "loss": 0.9795, - "step": 17800 - }, - { - "epoch": 2.0, - "learning_rate": 6.812743672843378e-05, - "loss": 0.932, - "step": 17820 - }, - { - "epoch": 2.0, - "learning_rate": 6.805958819973144e-05, - "loss": 0.9332, - "step": 17840 - }, - { - "epoch": 2.01, - "learning_rate": 6.799170140582689e-05, - "loss": 0.9567, - "step": 17860 - }, - { - "epoch": 2.01, - "learning_rate": 6.792377649056071e-05, - "loss": 0.9452, - "step": 17880 - }, - { - "epoch": 2.01, - "learning_rate": 6.785581359785428e-05, - "loss": 0.9466, - "step": 17900 - }, - { - "epoch": 2.01, - "learning_rate": 6.778781287170946e-05, - "loss": 0.9355, - "step": 17920 - }, - { - "epoch": 2.02, - "learning_rate": 6.771977445620818e-05, - "loss": 0.9449, - "step": 17940 - }, - { - "epoch": 2.02, - "learning_rate": 6.765169849551235e-05, - "loss": 0.9313, - "step": 17960 - }, - { - "epoch": 2.02, - "learning_rate": 6.758358513386335e-05, - "loss": 0.9742, - "step": 17980 - }, - { - "epoch": 2.02, - "learning_rate": 6.751543451558186e-05, - "loss": 0.919, - "step": 18000 - }, - { - "epoch": 2.02, - "learning_rate": 6.744724678506746e-05, - "loss": 0.955, - "step": 18020 - }, - { - "epoch": 2.03, - "learning_rate": 6.737902208679837e-05, - "loss": 0.961, - "step": 18040 - }, - { - "epoch": 2.03, - "learning_rate": 6.731076056533114e-05, - "loss": 0.9699, - "step": 18060 - }, - { - "epoch": 2.03, - "learning_rate": 6.724246236530036e-05, - "loss": 0.9497, - "step": 18080 - }, - { - "epoch": 2.03, - "learning_rate": 6.717412763141832e-05, - "loss": 0.9476, - "step": 18100 - }, - { - "epoch": 2.04, - "learning_rate": 6.710575650847474e-05, - "loss": 0.9641, - "step": 18120 - }, - { - "epoch": 2.04, - "learning_rate": 6.70373491413364e-05, - "loss": 0.9534, - "step": 18140 - }, - { - "epoch": 2.04, - "learning_rate": 6.69689056749469e-05, - "loss": 0.9416, - "step": 18160 - }, - { - "epoch": 2.04, - "learning_rate": 6.690042625432635e-05, - "loss": 0.9359, - "step": 18180 - }, - { - "epoch": 2.04, - "learning_rate": 6.6831911024571e-05, - "loss": 0.9538, - "step": 18200 - }, - { - "epoch": 2.05, - "learning_rate": 6.676336013085302e-05, - "loss": 0.9375, - "step": 18220 - }, - { - "epoch": 2.05, - "learning_rate": 6.669477371842008e-05, - "loss": 0.9479, - "step": 18240 - }, - { - "epoch": 2.05, - "learning_rate": 6.662615193259519e-05, - "loss": 0.9275, - "step": 18260 - }, - { - "epoch": 2.05, - "learning_rate": 6.655749491877623e-05, - "loss": 0.9417, - "step": 18280 - }, - { - "epoch": 2.06, - "learning_rate": 6.648880282243579e-05, - "loss": 0.9558, - "step": 18300 - }, - { - "epoch": 2.06, - "learning_rate": 6.642007578912074e-05, - "loss": 0.9539, - "step": 18320 - }, - { - "epoch": 2.06, - "learning_rate": 6.635131396445199e-05, - "loss": 0.955, - "step": 18340 - }, - { - "epoch": 2.06, - "learning_rate": 6.628251749412421e-05, - "loss": 0.9321, - "step": 18360 - }, - { - "epoch": 2.07, - "learning_rate": 6.621368652390542e-05, - "loss": 0.9354, - "step": 18380 - }, - { - "epoch": 2.07, - "learning_rate": 6.614482119963677e-05, - "loss": 0.932, - "step": 18400 - }, - { - "epoch": 2.07, - "learning_rate": 6.607592166723219e-05, - "loss": 0.9318, - "step": 18420 - }, - { - "epoch": 2.07, - "learning_rate": 6.600698807267811e-05, - "loss": 0.9294, - "step": 18440 - }, - { - "epoch": 2.07, - "learning_rate": 6.59380205620331e-05, - "loss": 0.946, - "step": 18460 - }, - { - "epoch": 2.08, - "learning_rate": 6.586901928142761e-05, - "loss": 0.9338, - "step": 18480 - }, - { - "epoch": 2.08, - "learning_rate": 6.579998437706367e-05, - "loss": 0.9615, - "step": 18500 - }, - { - "epoch": 2.08, - "learning_rate": 6.573091599521448e-05, - "loss": 0.9363, - "step": 18520 - }, - { - "epoch": 2.08, - "learning_rate": 6.566181428222424e-05, - "loss": 0.9891, - "step": 18540 - }, - { - "epoch": 2.09, - "learning_rate": 6.559267938450778e-05, - "loss": 0.9679, - "step": 18560 - }, - { - "epoch": 2.09, - "learning_rate": 6.552351144855015e-05, - "loss": 0.9197, - "step": 18580 - }, - { - "epoch": 2.09, - "learning_rate": 6.545431062090653e-05, - "loss": 0.9464, - "step": 18600 - }, - { - "epoch": 2.09, - "learning_rate": 6.538507704820169e-05, - "loss": 0.9829, - "step": 18620 - }, - { - "epoch": 2.09, - "learning_rate": 6.531581087712984e-05, - "loss": 0.9383, - "step": 18640 - }, - { - "epoch": 2.1, - "learning_rate": 6.524651225445423e-05, - "loss": 0.941, - "step": 18660 - }, - { - "epoch": 2.1, - "learning_rate": 6.517718132700689e-05, - "loss": 0.9647, - "step": 18680 - }, - { - "epoch": 2.1, - "learning_rate": 6.510781824168828e-05, - "loss": 0.9517, - "step": 18700 - }, - { - "epoch": 2.1, - "learning_rate": 6.5038423145467e-05, - "loss": 0.9526, - "step": 18720 - }, - { - "epoch": 2.11, - "learning_rate": 6.496899618537947e-05, - "loss": 0.943, - "step": 18740 - }, - { - "epoch": 2.11, - "learning_rate": 6.489953750852966e-05, - "loss": 0.9427, - "step": 18760 - }, - { - "epoch": 2.11, - "learning_rate": 6.483004726208873e-05, - "loss": 0.9405, - "step": 18780 - }, - { - "epoch": 2.11, - "learning_rate": 6.476052559329467e-05, - "loss": 0.9578, - "step": 18800 - }, - { - "epoch": 2.11, - "learning_rate": 6.469097264945214e-05, - "loss": 0.967, - "step": 18820 - }, - { - "epoch": 2.12, - "learning_rate": 6.4621388577932e-05, - "loss": 0.958, - "step": 18840 - }, - { - "epoch": 2.12, - "learning_rate": 6.45517735261711e-05, - "loss": 0.9582, - "step": 18860 - }, - { - "epoch": 2.12, - "learning_rate": 6.448212764167191e-05, - "loss": 0.9493, - "step": 18880 - }, - { - "epoch": 2.12, - "learning_rate": 6.441245107200223e-05, - "loss": 0.9368, - "step": 18900 - }, - { - "epoch": 2.13, - "learning_rate": 6.43427439647949e-05, - "loss": 0.9792, - "step": 18920 - }, - { - "epoch": 2.13, - "learning_rate": 6.427300646774744e-05, - "loss": 0.9427, - "step": 18940 - }, - { - "epoch": 2.13, - "learning_rate": 6.420323872862179e-05, - "loss": 0.9504, - "step": 18960 - }, - { - "epoch": 2.13, - "learning_rate": 6.413344089524393e-05, - "loss": 0.9439, - "step": 18980 - }, - { - "epoch": 2.13, - "learning_rate": 6.406361311550361e-05, - "loss": 0.92, - "step": 19000 - }, - { - "epoch": 2.14, - "learning_rate": 6.399375553735407e-05, - "loss": 0.9736, - "step": 19020 - }, - { - "epoch": 2.14, - "learning_rate": 6.392386830881164e-05, - "loss": 0.9712, - "step": 19040 - }, - { - "epoch": 2.14, - "learning_rate": 6.385395157795552e-05, - "loss": 0.9777, - "step": 19060 - }, - { - "epoch": 2.14, - "learning_rate": 6.378400549292739e-05, - "loss": 0.9232, - "step": 19080 - }, - { - "epoch": 2.15, - "learning_rate": 6.371403020193109e-05, - "loss": 0.9597, - "step": 19100 - }, - { - "epoch": 2.15, - "learning_rate": 6.364402585323245e-05, - "loss": 0.9131, - "step": 19120 - }, - { - "epoch": 2.15, - "learning_rate": 6.357399259515877e-05, - "loss": 0.9555, - "step": 19140 - }, - { - "epoch": 2.15, - "learning_rate": 6.350393057609865e-05, - "loss": 0.9488, - "step": 19160 - }, - { - "epoch": 2.16, - "learning_rate": 6.343383994450158e-05, - "loss": 0.9597, - "step": 19180 - }, - { - "epoch": 2.16, - "learning_rate": 6.336372084887775e-05, - "loss": 0.9153, - "step": 19200 - }, - { - "epoch": 2.16, - "learning_rate": 6.329357343779763e-05, - "loss": 0.9319, - "step": 19220 - }, - { - "epoch": 2.16, - "learning_rate": 6.322339785989163e-05, - "loss": 0.9723, - "step": 19240 - }, - { - "epoch": 2.16, - "learning_rate": 6.315319426384993e-05, - "loss": 0.9426, - "step": 19260 - }, - { - "epoch": 2.17, - "learning_rate": 6.308296279842205e-05, - "loss": 0.9569, - "step": 19280 - }, - { - "epoch": 2.17, - "learning_rate": 6.301270361241649e-05, - "loss": 0.9376, - "step": 19300 - }, - { - "epoch": 2.17, - "learning_rate": 6.294241685470057e-05, - "loss": 0.953, - "step": 19320 - }, - { - "epoch": 2.17, - "learning_rate": 6.287210267420001e-05, - "loss": 0.9552, - "step": 19340 - }, - { - "epoch": 2.18, - "learning_rate": 6.280176121989861e-05, - "loss": 0.941, - "step": 19360 - }, - { - "epoch": 2.18, - "learning_rate": 6.273139264083798e-05, - "loss": 0.9632, - "step": 19380 - }, - { - "epoch": 2.18, - "learning_rate": 6.266099708611719e-05, - "loss": 0.9531, - "step": 19400 - }, - { - "epoch": 2.18, - "learning_rate": 6.259057470489246e-05, - "loss": 0.9426, - "step": 19420 - }, - { - "epoch": 2.18, - "learning_rate": 6.252012564637689e-05, - "loss": 0.9947, - "step": 19440 - }, - { - "epoch": 2.19, - "learning_rate": 6.244965005984008e-05, - "loss": 0.9713, - "step": 19460 - }, - { - "epoch": 2.19, - "learning_rate": 6.23791480946078e-05, - "loss": 0.9208, - "step": 19480 - }, - { - "epoch": 2.19, - "learning_rate": 6.23086199000618e-05, - "loss": 0.9401, - "step": 19500 - }, - { - "epoch": 2.19, - "learning_rate": 6.223806562563929e-05, - "loss": 0.9537, - "step": 19520 - }, - { - "epoch": 2.2, - "learning_rate": 6.216748542083286e-05, - "loss": 0.9889, - "step": 19540 - }, - { - "epoch": 2.2, - "learning_rate": 6.209687943518996e-05, - "loss": 0.9211, - "step": 19560 - }, - { - "epoch": 2.2, - "learning_rate": 6.202624781831268e-05, - "loss": 0.9332, - "step": 19580 - }, - { - "epoch": 2.2, - "learning_rate": 6.195559071985745e-05, - "loss": 0.9656, - "step": 19600 - }, - { - "epoch": 2.2, - "learning_rate": 6.188490828953465e-05, - "loss": 0.9292, - "step": 19620 - }, - { - "epoch": 2.21, - "learning_rate": 6.181420067710838e-05, - "loss": 0.9479, - "step": 19640 - }, - { - "epoch": 2.21, - "learning_rate": 6.174346803239604e-05, - "loss": 0.9307, - "step": 19660 - }, - { - "epoch": 2.21, - "learning_rate": 6.167271050526812e-05, - "loss": 0.9564, - "step": 19680 - }, - { - "epoch": 2.21, - "learning_rate": 6.160192824564778e-05, - "loss": 0.9316, - "step": 19700 - }, - { - "epoch": 2.22, - "learning_rate": 6.153112140351066e-05, - "loss": 0.9171, - "step": 19720 - }, - { - "epoch": 2.22, - "learning_rate": 6.14602901288844e-05, - "loss": 0.9369, - "step": 19740 - }, - { - "epoch": 2.22, - "learning_rate": 6.138943457184847e-05, - "loss": 0.9351, - "step": 19760 - }, - { - "epoch": 2.22, - "learning_rate": 6.131855488253379e-05, - "loss": 0.9421, - "step": 19780 - }, - { - "epoch": 2.22, - "learning_rate": 6.124765121112233e-05, - "loss": 0.9732, - "step": 19800 - }, - { - "epoch": 2.23, - "learning_rate": 6.1176723707847e-05, - "loss": 0.933, - "step": 19820 - }, - { - "epoch": 2.23, - "learning_rate": 6.110577252299108e-05, - "loss": 0.9656, - "step": 19840 - }, - { - "epoch": 2.23, - "learning_rate": 6.103479780688816e-05, - "loss": 0.9369, - "step": 19860 - }, - { - "epoch": 2.23, - "learning_rate": 6.096379970992157e-05, - "loss": 0.9743, - "step": 19880 - }, - { - "epoch": 2.24, - "learning_rate": 6.089277838252422e-05, - "loss": 0.9686, - "step": 19900 - }, - { - "epoch": 2.24, - "learning_rate": 6.0821733975178276e-05, - "loss": 0.927, - "step": 19920 - }, - { - "epoch": 2.24, - "learning_rate": 6.0750666638414765e-05, - "loss": 0.9462, - "step": 19940 - }, - { - "epoch": 2.24, - "learning_rate": 6.067957652281332e-05, - "loss": 0.9591, - "step": 19960 - }, - { - "epoch": 2.24, - "learning_rate": 6.060846377900182e-05, - "loss": 0.9595, - "step": 19980 - }, - { - "epoch": 2.25, - "learning_rate": 6.0537328557656105e-05, - "loss": 0.9518, - "step": 20000 - }, - { - "epoch": 2.25, - "learning_rate": 6.046617100949965e-05, - "loss": 0.9485, - "step": 20020 - }, - { - "epoch": 2.25, - "learning_rate": 6.0394991285303196e-05, - "loss": 0.961, - "step": 20040 - }, - { - "epoch": 2.25, - "learning_rate": 6.03237895358845e-05, - "loss": 0.9536, - "step": 20060 - }, - { - "epoch": 2.26, - "learning_rate": 6.025256591210799e-05, - "loss": 0.9449, - "step": 20080 - }, - { - "epoch": 2.26, - "learning_rate": 6.0181320564884444e-05, - "loss": 0.947, - "step": 20100 - }, - { - "epoch": 2.26, - "learning_rate": 6.011005364517068e-05, - "loss": 0.9491, - "step": 20120 - }, - { - "epoch": 2.26, - "learning_rate": 6.003876530396916e-05, - "loss": 0.9332, - "step": 20140 - }, - { - "epoch": 2.27, - "learning_rate": 5.99674556923278e-05, - "loss": 0.9612, - "step": 20160 - }, - { - "epoch": 2.27, - "learning_rate": 5.989612496133956e-05, - "loss": 0.9536, - "step": 20180 - }, - { - "epoch": 2.27, - "learning_rate": 5.9824773262142165e-05, - "loss": 0.9351, - "step": 20200 - }, - { - "epoch": 2.27, - "learning_rate": 5.975340074591774e-05, - "loss": 0.955, - "step": 20220 - }, - { - "epoch": 2.27, - "learning_rate": 5.968200756389255e-05, - "loss": 0.9472, - "step": 20240 - }, - { - "epoch": 2.28, - "learning_rate": 5.9610593867336614e-05, - "loss": 0.9185, - "step": 20260 - }, - { - "epoch": 2.28, - "learning_rate": 5.9539159807563437e-05, - "loss": 0.9523, - "step": 20280 - }, - { - "epoch": 2.28, - "learning_rate": 5.9467705535929686e-05, - "loss": 0.9177, - "step": 20300 - }, - { - "epoch": 2.28, - "learning_rate": 5.939623120383481e-05, - "loss": 0.9517, - "step": 20320 - }, - { - "epoch": 2.29, - "learning_rate": 5.9324736962720805e-05, - "loss": 0.9449, - "step": 20340 - }, - { - "epoch": 2.29, - "learning_rate": 5.925322296407181e-05, - "loss": 0.9656, - "step": 20360 - }, - { - "epoch": 2.29, - "learning_rate": 5.918168935941388e-05, - "loss": 0.9617, - "step": 20380 - }, - { - "epoch": 2.29, - "learning_rate": 5.911013630031457e-05, - "loss": 0.9479, - "step": 20400 - }, - { - "epoch": 2.29, - "learning_rate": 5.903856393838265e-05, - "loss": 0.9431, - "step": 20420 - }, - { - "epoch": 2.3, - "learning_rate": 5.896697242526785e-05, - "loss": 0.9583, - "step": 20440 - }, - { - "epoch": 2.3, - "learning_rate": 5.8895361912660374e-05, - "loss": 0.9749, - "step": 20460 - }, - { - "epoch": 2.3, - "learning_rate": 5.882373255229081e-05, - "loss": 0.9783, - "step": 20480 - }, - { - "epoch": 2.3, - "learning_rate": 5.875208449592957e-05, - "loss": 0.9316, - "step": 20500 - }, - { - "epoch": 2.31, - "learning_rate": 5.868041789538675e-05, - "loss": 0.9483, - "step": 20520 - }, - { - "epoch": 2.31, - "learning_rate": 5.8608732902511695e-05, - "loss": 0.9384, - "step": 20540 - }, - { - "epoch": 2.31, - "learning_rate": 5.853702966919275e-05, - "loss": 0.9093, - "step": 20560 - }, - { - "epoch": 2.31, - "learning_rate": 5.8465308347356895e-05, - "loss": 0.9331, - "step": 20580 - }, - { - "epoch": 2.31, - "learning_rate": 5.8393569088969425e-05, - "loss": 0.9641, - "step": 20600 - }, - { - "epoch": 2.32, - "learning_rate": 5.8321812046033666e-05, - "loss": 0.9628, - "step": 20620 - }, - { - "epoch": 2.32, - "learning_rate": 5.825003737059062e-05, - "loss": 0.9644, - "step": 20640 - }, - { - "epoch": 2.32, - "learning_rate": 5.81782452147186e-05, - "loss": 0.9429, - "step": 20660 - }, - { - "epoch": 2.32, - "learning_rate": 5.810643573053306e-05, - "loss": 0.9444, - "step": 20680 - }, - { - "epoch": 2.33, - "learning_rate": 5.803460907018607e-05, - "loss": 0.9412, - "step": 20700 - }, - { - "epoch": 2.33, - "learning_rate": 5.796276538586615e-05, - "loss": 0.9411, - "step": 20720 - }, - { - "epoch": 2.33, - "learning_rate": 5.7890904829797856e-05, - "loss": 0.9342, - "step": 20740 - }, - { - "epoch": 2.33, - "learning_rate": 5.781902755424151e-05, - "loss": 0.9188, - "step": 20760 - }, - { - "epoch": 2.33, - "learning_rate": 5.7747133711492895e-05, - "loss": 0.9423, - "step": 20780 - }, - { - "epoch": 2.34, - "learning_rate": 5.767522345388282e-05, - "loss": 0.9363, - "step": 20800 - }, - { - "epoch": 2.34, - "learning_rate": 5.760329693377693e-05, - "loss": 0.9369, - "step": 20820 - }, - { - "epoch": 2.34, - "learning_rate": 5.7531354303575324e-05, - "loss": 0.9655, - "step": 20840 - }, - { - "epoch": 2.34, - "learning_rate": 5.7459395715712205e-05, - "loss": 0.9417, - "step": 20860 - }, - { - "epoch": 2.35, - "learning_rate": 5.738742132265562e-05, - "loss": 0.9504, - "step": 20880 - }, - { - "epoch": 2.35, - "learning_rate": 5.731543127690709e-05, - "loss": 0.9594, - "step": 20900 - }, - { - "epoch": 2.35, - "learning_rate": 5.724342573100131e-05, - "loss": 0.9268, - "step": 20920 - }, - { - "epoch": 2.35, - "learning_rate": 5.7171404837505796e-05, - "loss": 0.9299, - "step": 20940 - }, - { - "epoch": 2.36, - "learning_rate": 5.709936874902061e-05, - "loss": 0.9372, - "step": 20960 - }, - { - "epoch": 2.36, - "learning_rate": 5.702731761817799e-05, - "loss": 0.9087, - "step": 20980 - }, - { - "epoch": 2.36, - "learning_rate": 5.695525159764206e-05, - "loss": 0.9338, - "step": 21000 - }, - { - "epoch": 2.36, - "learning_rate": 5.688317084010847e-05, - "loss": 0.9435, - "step": 21020 - }, - { - "epoch": 2.36, - "learning_rate": 5.681107549830414e-05, - "loss": 0.9552, - "step": 21040 - }, - { - "epoch": 2.37, - "learning_rate": 5.673896572498683e-05, - "loss": 0.9095, - "step": 21060 - }, - { - "epoch": 2.37, - "learning_rate": 5.6666841672944925e-05, - "loss": 0.9398, - "step": 21080 - }, - { - "epoch": 2.37, - "learning_rate": 5.659470349499707e-05, - "loss": 0.949, - "step": 21100 - }, - { - "epoch": 2.37, - "learning_rate": 5.652255134399178e-05, - "loss": 0.9129, - "step": 21120 - }, - { - "epoch": 2.38, - "learning_rate": 5.645038537280726e-05, - "loss": 0.9701, - "step": 21140 - }, - { - "epoch": 2.38, - "learning_rate": 5.6378205734350916e-05, - "loss": 0.9328, - "step": 21160 - }, - { - "epoch": 2.38, - "learning_rate": 5.630601258155917e-05, - "loss": 0.9371, - "step": 21180 - }, - { - "epoch": 2.38, - "learning_rate": 5.623380606739708e-05, - "loss": 0.9384, - "step": 21200 - }, - { - "epoch": 2.38, - "learning_rate": 5.616158634485793e-05, - "loss": 0.9557, - "step": 21220 - }, - { - "epoch": 2.39, - "learning_rate": 5.608935356696313e-05, - "loss": 0.9288, - "step": 21240 - }, - { - "epoch": 2.39, - "learning_rate": 5.6017107886761634e-05, - "loss": 0.9439, - "step": 21260 - }, - { - "epoch": 2.39, - "learning_rate": 5.5944849457329786e-05, - "loss": 0.9502, - "step": 21280 - }, - { - "epoch": 2.39, - "learning_rate": 5.5872578431770936e-05, - "loss": 0.9466, - "step": 21300 - }, - { - "epoch": 2.4, - "learning_rate": 5.5800294963215116e-05, - "loss": 0.9338, - "step": 21320 - }, - { - "epoch": 2.4, - "learning_rate": 5.5727999204818736e-05, - "loss": 0.9466, - "step": 21340 - }, - { - "epoch": 2.4, - "learning_rate": 5.565569130976422e-05, - "loss": 0.9392, - "step": 21360 - }, - { - "epoch": 2.4, - "learning_rate": 5.5583371431259745e-05, - "loss": 0.9885, - "step": 21380 - }, - { - "epoch": 2.4, - "learning_rate": 5.551103972253884e-05, - "loss": 0.9347, - "step": 21400 - }, - { - "epoch": 2.41, - "learning_rate": 5.543869633686013e-05, - "loss": 0.934, - "step": 21420 - }, - { - "epoch": 2.41, - "learning_rate": 5.536634142750699e-05, - "loss": 0.9347, - "step": 21440 - }, - { - "epoch": 2.41, - "learning_rate": 5.529397514778716e-05, - "loss": 0.9379, - "step": 21460 - }, - { - "epoch": 2.41, - "learning_rate": 5.522159765103251e-05, - "loss": 0.9509, - "step": 21480 - }, - { - "epoch": 2.42, - "learning_rate": 5.5149209090598686e-05, - "loss": 0.9212, - "step": 21500 - }, - { - "epoch": 2.42, - "learning_rate": 5.5076809619864754e-05, - "loss": 0.9608, - "step": 21520 - }, - { - "epoch": 2.42, - "learning_rate": 5.5004399392232906e-05, - "loss": 0.9088, - "step": 21540 - }, - { - "epoch": 2.42, - "learning_rate": 5.493197856112812e-05, - "loss": 0.9442, - "step": 21560 - }, - { - "epoch": 2.42, - "learning_rate": 5.485954727999785e-05, - "loss": 0.9389, - "step": 21580 - }, - { - "epoch": 2.43, - "learning_rate": 5.478710570231168e-05, - "loss": 0.9191, - "step": 21600 - }, - { - "epoch": 2.43, - "learning_rate": 5.4714653981561015e-05, - "loss": 0.9181, - "step": 21620 - }, - { - "epoch": 2.43, - "learning_rate": 5.464219227125877e-05, - "loss": 0.9255, - "step": 21640 - }, - { - "epoch": 2.43, - "learning_rate": 5.4569720724939025e-05, - "loss": 0.9249, - "step": 21660 - }, - { - "epoch": 2.44, - "learning_rate": 5.449723949615664e-05, - "loss": 0.9217, - "step": 21680 - }, - { - "epoch": 2.44, - "learning_rate": 5.442474873848706e-05, - "loss": 0.9316, - "step": 21700 - }, - { - "epoch": 2.44, - "learning_rate": 5.43522486055259e-05, - "loss": 0.9689, - "step": 21720 - }, - { - "epoch": 2.44, - "learning_rate": 5.427973925088865e-05, - "loss": 0.931, - "step": 21740 - }, - { - "epoch": 2.44, - "learning_rate": 5.42072208282103e-05, - "loss": 0.9415, - "step": 21760 - }, - { - "epoch": 2.45, - "learning_rate": 5.4134693491145085e-05, - "loss": 0.9149, - "step": 21780 - }, - { - "epoch": 2.45, - "learning_rate": 5.4062157393366134e-05, - "loss": 0.9355, - "step": 21800 - }, - { - "epoch": 2.45, - "learning_rate": 5.398961268856512e-05, - "loss": 0.9458, - "step": 21820 - }, - { - "epoch": 2.45, - "learning_rate": 5.391705953045195e-05, - "loss": 0.9411, - "step": 21840 - }, - { - "epoch": 2.46, - "learning_rate": 5.3844498072754476e-05, - "loss": 0.9501, - "step": 21860 - }, - { - "epoch": 2.46, - "learning_rate": 5.377192846921808e-05, - "loss": 0.9204, - "step": 21880 - }, - { - "epoch": 2.46, - "learning_rate": 5.369935087360547e-05, - "loss": 0.9033, - "step": 21900 - }, - { - "epoch": 2.46, - "learning_rate": 5.362676543969622e-05, - "loss": 0.9327, - "step": 21920 - }, - { - "epoch": 2.47, - "learning_rate": 5.3554172321286576e-05, - "loss": 0.9579, - "step": 21940 - }, - { - "epoch": 2.47, - "learning_rate": 5.348157167218901e-05, - "loss": 0.9313, - "step": 21960 - }, - { - "epoch": 2.47, - "learning_rate": 5.340896364623198e-05, - "loss": 0.913, - "step": 21980 - }, - { - "epoch": 2.47, - "learning_rate": 5.333634839725958e-05, - "loss": 0.9628, - "step": 22000 - }, - { - "epoch": 2.47, - "learning_rate": 5.3263726079131194e-05, - "loss": 0.9603, - "step": 22020 - }, - { - "epoch": 2.48, - "learning_rate": 5.319109684572118e-05, - "loss": 0.9116, - "step": 22040 - }, - { - "epoch": 2.48, - "learning_rate": 5.311846085091856e-05, - "loss": 0.9344, - "step": 22060 - }, - { - "epoch": 2.48, - "learning_rate": 5.3045818248626676e-05, - "loss": 0.9263, - "step": 22080 - }, - { - "epoch": 2.48, - "learning_rate": 5.29731691927629e-05, - "loss": 0.9622, - "step": 22100 - }, - { - "epoch": 2.49, - "learning_rate": 5.29005138372582e-05, - "loss": 0.9481, - "step": 22120 - }, - { - "epoch": 2.49, - "learning_rate": 5.282785233605698e-05, - "loss": 0.9256, - "step": 22140 - }, - { - "epoch": 2.49, - "learning_rate": 5.2755184843116635e-05, - "loss": 0.9808, - "step": 22160 - }, - { - "epoch": 2.49, - "learning_rate": 5.268251151240722e-05, - "loss": 0.968, - "step": 22180 - }, - { - "epoch": 2.49, - "learning_rate": 5.2609832497911215e-05, - "loss": 0.95, - "step": 22200 - }, - { - "epoch": 2.5, - "learning_rate": 5.253714795362309e-05, - "loss": 0.9662, - "step": 22220 - }, - { - "epoch": 2.5, - "learning_rate": 5.246445803354907e-05, - "loss": 0.9352, - "step": 22240 - }, - { - "epoch": 2.5, - "learning_rate": 5.2391762891706764e-05, - "loss": 0.9437, - "step": 22260 - }, - { - "epoch": 2.5, - "learning_rate": 5.231906268212483e-05, - "loss": 0.9409, - "step": 22280 - }, - { - "epoch": 2.51, - "learning_rate": 5.224635755884268e-05, - "loss": 0.9487, - "step": 22300 - }, - { - "epoch": 2.51, - "learning_rate": 5.217364767591014e-05, - "loss": 0.9401, - "step": 22320 - }, - { - "epoch": 2.51, - "learning_rate": 5.210093318738709e-05, - "loss": 0.952, - "step": 22340 - }, - { - "epoch": 2.51, - "learning_rate": 5.20282142473432e-05, - "loss": 0.9752, - "step": 22360 - }, - { - "epoch": 2.51, - "learning_rate": 5.195549100985756e-05, - "loss": 0.9655, - "step": 22380 - }, - { - "epoch": 2.52, - "learning_rate": 5.188276362901836e-05, - "loss": 0.9752, - "step": 22400 - }, - { - "epoch": 2.52, - "learning_rate": 5.1810032258922605e-05, - "loss": 0.9632, - "step": 22420 - }, - { - "epoch": 2.52, - "learning_rate": 5.173729705367568e-05, - "loss": 0.9166, - "step": 22440 - }, - { - "epoch": 2.52, - "learning_rate": 5.166455816739118e-05, - "loss": 0.9433, - "step": 22460 - }, - { - "epoch": 2.53, - "learning_rate": 5.159181575419043e-05, - "loss": 0.9459, - "step": 22480 - }, - { - "epoch": 2.53, - "learning_rate": 5.151906996820227e-05, - "loss": 0.9316, - "step": 22500 - }, - { - "epoch": 2.53, - "learning_rate": 5.144632096356269e-05, - "loss": 0.945, - "step": 22520 - }, - { - "epoch": 2.53, - "learning_rate": 5.137356889441444e-05, - "loss": 0.9192, - "step": 22540 - }, - { - "epoch": 2.53, - "learning_rate": 5.1300813914906853e-05, - "loss": 0.9338, - "step": 22560 - }, - { - "epoch": 2.54, - "learning_rate": 5.122805617919536e-05, - "loss": 0.9607, - "step": 22580 - }, - { - "epoch": 2.54, - "learning_rate": 5.115529584144125e-05, - "loss": 0.9123, - "step": 22600 - }, - { - "epoch": 2.54, - "learning_rate": 5.108253305581134e-05, - "loss": 0.9547, - "step": 22620 - }, - { - "epoch": 2.54, - "learning_rate": 5.100976797647761e-05, - "loss": 0.9258, - "step": 22640 - }, - { - "epoch": 2.55, - "learning_rate": 5.0937000757616934e-05, - "loss": 0.9028, - "step": 22660 - }, - { - "epoch": 2.55, - "learning_rate": 5.086423155341068e-05, - "loss": 0.9422, - "step": 22680 - }, - { - "epoch": 2.55, - "learning_rate": 5.079146051804444e-05, - "loss": 0.9331, - "step": 22700 - }, - { - "epoch": 2.55, - "learning_rate": 5.071868780570772e-05, - "loss": 0.9227, - "step": 22720 - }, - { - "epoch": 2.56, - "learning_rate": 5.0645913570593484e-05, - "loss": 0.9301, - "step": 22740 - }, - { - "epoch": 2.56, - "learning_rate": 5.057313796689804e-05, - "loss": 0.9385, - "step": 22760 - }, - { - "epoch": 2.56, - "learning_rate": 5.050036114882052e-05, - "loss": 0.9192, - "step": 22780 - }, - { - "epoch": 2.56, - "learning_rate": 5.042758327056265e-05, - "loss": 0.9302, - "step": 22800 - }, - { - "epoch": 2.56, - "learning_rate": 5.03548044863284e-05, - "loss": 0.9305, - "step": 22820 - }, - { - "epoch": 2.57, - "learning_rate": 5.028202495032366e-05, - "loss": 0.9267, - "step": 22840 - }, - { - "epoch": 2.57, - "learning_rate": 5.020924481675593e-05, - "loss": 0.9608, - "step": 22860 - }, - { - "epoch": 2.57, - "learning_rate": 5.013646423983392e-05, - "loss": 0.9333, - "step": 22880 - }, - { - "epoch": 2.57, - "learning_rate": 5.006368337376737e-05, - "loss": 0.9643, - "step": 22900 - }, - { - "epoch": 2.58, - "learning_rate": 4.999090237276657e-05, - "loss": 0.9209, - "step": 22920 - }, - { - "epoch": 2.58, - "learning_rate": 4.991812139104207e-05, - "loss": 0.9363, - "step": 22940 - }, - { - "epoch": 2.58, - "learning_rate": 4.984534058280445e-05, - "loss": 0.9439, - "step": 22960 - }, - { - "epoch": 2.58, - "learning_rate": 4.97725601022639e-05, - "loss": 0.9514, - "step": 22980 - }, - { - "epoch": 2.58, - "learning_rate": 4.969978010362989e-05, - "loss": 0.9453, - "step": 23000 - }, - { - "epoch": 2.59, - "learning_rate": 4.9627000741110865e-05, - "loss": 0.9394, - "step": 23020 - }, - { - "epoch": 2.59, - "learning_rate": 4.955422216891397e-05, - "loss": 0.9316, - "step": 23040 - }, - { - "epoch": 2.59, - "learning_rate": 4.9481444541244665e-05, - "loss": 0.9088, - "step": 23060 - }, - { - "epoch": 2.59, - "learning_rate": 4.9408668012306344e-05, - "loss": 0.9212, - "step": 23080 - }, - { - "epoch": 2.6, - "learning_rate": 4.933589273630013e-05, - "loss": 0.9114, - "step": 23100 - }, - { - "epoch": 2.6, - "learning_rate": 4.9263118867424515e-05, - "loss": 0.9269, - "step": 23120 - }, - { - "epoch": 2.6, - "learning_rate": 4.919034655987493e-05, - "loss": 0.9383, - "step": 23140 - }, - { - "epoch": 2.6, - "learning_rate": 4.911757596784357e-05, - "loss": 0.9495, - "step": 23160 - }, - { - "epoch": 2.6, - "learning_rate": 4.904480724551897e-05, - "loss": 0.9556, - "step": 23180 - }, - { - "epoch": 2.61, - "learning_rate": 4.89720405470857e-05, - "loss": 0.9101, - "step": 23200 - }, - { - "epoch": 2.61, - "learning_rate": 4.8899276026724034e-05, - "loss": 0.9385, - "step": 23220 - }, - { - "epoch": 2.61, - "learning_rate": 4.882651383860963e-05, - "loss": 0.9146, - "step": 23240 - }, - { - "epoch": 2.61, - "learning_rate": 4.875375413691327e-05, - "loss": 0.8875, - "step": 23260 - }, - { - "epoch": 2.62, - "learning_rate": 4.868099707580035e-05, - "loss": 0.9435, - "step": 23280 - }, - { - "epoch": 2.62, - "learning_rate": 4.8608242809430744e-05, - "loss": 0.9215, - "step": 23300 - }, - { - "epoch": 2.62, - "learning_rate": 4.8535491491958415e-05, - "loss": 0.9206, - "step": 23320 - }, - { - "epoch": 2.62, - "learning_rate": 4.846274327753107e-05, - "loss": 0.9159, - "step": 23340 - }, - { - "epoch": 2.62, - "learning_rate": 4.8389998320289785e-05, - "loss": 0.9272, - "step": 23360 - }, - { - "epoch": 2.63, - "learning_rate": 4.8317256774368815e-05, - "loss": 0.9164, - "step": 23380 - }, - { - "epoch": 2.63, - "learning_rate": 4.824451879389513e-05, - "loss": 0.9521, - "step": 23400 - }, - { - "epoch": 2.63, - "learning_rate": 4.8171784532988165e-05, - "loss": 0.9555, - "step": 23420 - }, - { - "epoch": 2.63, - "learning_rate": 4.809905414575947e-05, - "loss": 0.9317, - "step": 23440 - }, - { - "epoch": 2.64, - "learning_rate": 4.802632778631241e-05, - "loss": 0.9336, - "step": 23460 - }, - { - "epoch": 2.64, - "learning_rate": 4.795360560874181e-05, - "loss": 0.9299, - "step": 23480 - }, - { - "epoch": 2.64, - "learning_rate": 4.7880887767133565e-05, - "loss": 0.9365, - "step": 23500 - }, - { - "epoch": 2.64, - "learning_rate": 4.7808174415564484e-05, - "loss": 0.9178, - "step": 23520 - }, - { - "epoch": 2.64, - "learning_rate": 4.773546570810182e-05, - "loss": 0.931, - "step": 23540 - }, - { - "epoch": 2.65, - "learning_rate": 4.766276179880296e-05, - "loss": 0.9326, - "step": 23560 - }, - { - "epoch": 2.65, - "learning_rate": 4.759006284171515e-05, - "loss": 0.9195, - "step": 23580 - }, - { - "epoch": 2.65, - "learning_rate": 4.7517368990875146e-05, - "loss": 0.9536, - "step": 23600 - }, - { - "epoch": 2.65, - "learning_rate": 4.744468040030891e-05, - "loss": 0.9326, - "step": 23620 - }, - { - "epoch": 2.66, - "learning_rate": 4.737199722403117e-05, - "loss": 0.9336, - "step": 23640 - }, - { - "epoch": 2.66, - "learning_rate": 4.729931961604529e-05, - "loss": 0.9292, - "step": 23660 - }, - { - "epoch": 2.66, - "learning_rate": 4.722664773034278e-05, - "loss": 0.9481, - "step": 23680 - }, - { - "epoch": 2.66, - "learning_rate": 4.7153981720902997e-05, - "loss": 0.9314, - "step": 23700 - }, - { - "epoch": 2.67, - "learning_rate": 4.7081321741692904e-05, - "loss": 0.9054, - "step": 23720 - }, - { - "epoch": 2.67, - "learning_rate": 4.7008667946666674e-05, - "loss": 0.946, - "step": 23740 - }, - { - "epoch": 2.67, - "learning_rate": 4.693602048976537e-05, - "loss": 0.9389, - "step": 23760 - }, - { - "epoch": 2.67, - "learning_rate": 4.686337952491659e-05, - "loss": 0.9484, - "step": 23780 - }, - { - "epoch": 2.67, - "learning_rate": 4.679074520603423e-05, - "loss": 0.9318, - "step": 23800 - }, - { - "epoch": 2.68, - "learning_rate": 4.671811768701811e-05, - "loss": 0.9421, - "step": 23820 - }, - { - "epoch": 2.68, - "learning_rate": 4.6645497121753564e-05, - "loss": 0.9526, - "step": 23840 - }, - { - "epoch": 2.68, - "learning_rate": 4.657288366411127e-05, - "loss": 0.9352, - "step": 23860 - }, - { - "epoch": 2.68, - "learning_rate": 4.650027746794686e-05, - "loss": 0.9146, - "step": 23880 - }, - { - "epoch": 2.69, - "learning_rate": 4.642767868710045e-05, - "loss": 0.9241, - "step": 23900 - }, - { - "epoch": 2.69, - "learning_rate": 4.635508747539661e-05, - "loss": 0.9173, - "step": 23920 - }, - { - "epoch": 2.69, - "learning_rate": 4.6282503986643775e-05, - "loss": 0.9367, - "step": 23940 - }, - { - "epoch": 2.69, - "learning_rate": 4.6209928374634036e-05, - "loss": 0.939, - "step": 23960 - }, - { - "epoch": 2.69, - "learning_rate": 4.6137360793142794e-05, - "loss": 0.9138, - "step": 23980 - }, - { - "epoch": 2.7, - "learning_rate": 4.606480139592843e-05, - "loss": 0.9526, - "step": 24000 - }, - { - "epoch": 2.7, - "learning_rate": 4.599225033673203e-05, - "loss": 0.9391, - "step": 24020 - }, - { - "epoch": 2.7, - "learning_rate": 4.591970776927692e-05, - "loss": 0.9484, - "step": 24040 - }, - { - "epoch": 2.7, - "learning_rate": 4.584717384726853e-05, - "loss": 0.9413, - "step": 24060 - }, - { - "epoch": 2.71, - "learning_rate": 4.577464872439391e-05, - "loss": 0.9497, - "step": 24080 - }, - { - "epoch": 2.71, - "learning_rate": 4.57021325543215e-05, - "loss": 0.9306, - "step": 24100 - }, - { - "epoch": 2.71, - "learning_rate": 4.562962549070074e-05, - "loss": 0.9218, - "step": 24120 - }, - { - "epoch": 2.71, - "learning_rate": 4.555712768716179e-05, - "loss": 0.9342, - "step": 24140 - }, - { - "epoch": 2.71, - "learning_rate": 4.548463929731522e-05, - "loss": 0.9354, - "step": 24160 - }, - { - "epoch": 2.72, - "learning_rate": 4.5412160474751595e-05, - "loss": 0.9286, - "step": 24180 - }, - { - "epoch": 2.72, - "learning_rate": 4.5339691373041236e-05, - "loss": 0.9458, - "step": 24200 - }, - { - "epoch": 2.72, - "learning_rate": 4.526723214573389e-05, - "loss": 0.956, - "step": 24220 - }, - { - "epoch": 2.72, - "learning_rate": 4.519478294635837e-05, - "loss": 0.9322, - "step": 24240 - }, - { - "epoch": 2.73, - "learning_rate": 4.51223439284222e-05, - "loss": 0.9161, - "step": 24260 - }, - { - "epoch": 2.73, - "learning_rate": 4.504991524541138e-05, - "loss": 0.9273, - "step": 24280 - }, - { - "epoch": 2.73, - "learning_rate": 4.497749705079001e-05, - "loss": 0.9667, - "step": 24300 - }, - { - "epoch": 2.73, - "learning_rate": 4.490508949799993e-05, - "loss": 0.9419, - "step": 24320 - }, - { - "epoch": 2.73, - "learning_rate": 4.483269274046046e-05, - "loss": 0.9533, - "step": 24340 - }, - { - "epoch": 2.74, - "learning_rate": 4.4760306931568044e-05, - "loss": 0.9396, - "step": 24360 - }, - { - "epoch": 2.74, - "learning_rate": 4.468793222469596e-05, - "loss": 0.917, - "step": 24380 - }, - { - "epoch": 2.74, - "learning_rate": 4.461556877319385e-05, - "loss": 0.9475, - "step": 24400 - }, - { - "epoch": 2.74, - "learning_rate": 4.454321673038766e-05, - "loss": 0.9314, - "step": 24420 - }, - { - "epoch": 2.75, - "learning_rate": 4.447087624957906e-05, - "loss": 0.9221, - "step": 24440 - }, - { - "epoch": 2.75, - "learning_rate": 4.4398547484045245e-05, - "loss": 0.9088, - "step": 24460 - }, - { - "epoch": 2.75, - "learning_rate": 4.4326230587038594e-05, - "loss": 0.9398, - "step": 24480 - }, - { - "epoch": 2.75, - "learning_rate": 4.425392571178635e-05, - "loss": 0.9181, - "step": 24500 - }, - { - "epoch": 2.76, - "learning_rate": 4.418163301149027e-05, - "loss": 0.9506, - "step": 24520 - }, - { - "epoch": 2.76, - "learning_rate": 4.41093526393263e-05, - "loss": 0.9425, - "step": 24540 - }, - { - "epoch": 2.76, - "learning_rate": 4.4037084748444284e-05, - "loss": 0.931, - "step": 24560 - }, - { - "epoch": 2.76, - "learning_rate": 4.3964829491967655e-05, - "loss": 0.9209, - "step": 24580 - }, - { - "epoch": 2.76, - "learning_rate": 4.389258702299298e-05, - "loss": 0.9217, - "step": 24600 - }, - { - "epoch": 2.77, - "learning_rate": 4.3820357494589816e-05, - "loss": 0.9193, - "step": 24620 - }, - { - "epoch": 2.77, - "learning_rate": 4.3748141059800276e-05, - "loss": 0.9129, - "step": 24640 - }, - { - "epoch": 2.77, - "learning_rate": 4.367593787163875e-05, - "loss": 0.9262, - "step": 24660 - }, - { - "epoch": 2.77, - "learning_rate": 4.3603748083091495e-05, - "loss": 0.9432, - "step": 24680 - }, - { - "epoch": 2.78, - "learning_rate": 4.353157184711645e-05, - "loss": 0.942, - "step": 24700 - }, - { - "epoch": 2.78, - "learning_rate": 4.34594093166428e-05, - "loss": 0.94, - "step": 24720 - }, - { - "epoch": 2.78, - "learning_rate": 4.33872606445707e-05, - "loss": 0.9462, - "step": 24740 - }, - { - "epoch": 2.78, - "learning_rate": 4.331512598377092e-05, - "loss": 0.9453, - "step": 24760 - }, - { - "epoch": 2.78, - "learning_rate": 4.3243005487084595e-05, - "loss": 0.9504, - "step": 24780 - }, - { - "epoch": 2.79, - "learning_rate": 4.3170899307322826e-05, - "loss": 0.9104, - "step": 24800 - }, - { - "epoch": 2.79, - "learning_rate": 4.309880759726633e-05, - "loss": 0.9403, - "step": 24820 - }, - { - "epoch": 2.79, - "learning_rate": 4.302673050966523e-05, - "loss": 0.9555, - "step": 24840 - }, - { - "epoch": 2.79, - "learning_rate": 4.295466819723864e-05, - "loss": 0.9586, - "step": 24860 - }, - { - "epoch": 2.8, - "learning_rate": 4.288262081267435e-05, - "loss": 0.9811, - "step": 24880 - }, - { - "epoch": 2.8, - "learning_rate": 4.281058850862856e-05, - "loss": 0.929, - "step": 24900 - }, - { - "epoch": 2.8, - "learning_rate": 4.27385714377255e-05, - "loss": 0.9444, - "step": 24920 - }, - { - "epoch": 2.8, - "learning_rate": 4.266656975255709e-05, - "loss": 0.9514, - "step": 24940 - }, - { - "epoch": 2.8, - "learning_rate": 4.259458360568271e-05, - "loss": 0.9328, - "step": 24960 - }, - { - "epoch": 2.81, - "learning_rate": 4.252261314962878e-05, - "loss": 0.9218, - "step": 24980 - }, - { - "epoch": 2.81, - "learning_rate": 4.245065853688848e-05, - "loss": 0.9279, - "step": 25000 - }, - { - "epoch": 2.81, - "learning_rate": 4.237871991992142e-05, - "loss": 0.9317, - "step": 25020 - }, - { - "epoch": 2.81, - "learning_rate": 4.2306797451153314e-05, - "loss": 0.9481, - "step": 25040 - }, - { - "epoch": 2.82, - "learning_rate": 4.223489128297568e-05, - "loss": 0.9363, - "step": 25060 - }, - { - "epoch": 2.82, - "learning_rate": 4.216300156774548e-05, - "loss": 0.9445, - "step": 25080 - }, - { - "epoch": 2.82, - "learning_rate": 4.209112845778481e-05, - "loss": 0.917, - "step": 25100 - }, - { - "epoch": 2.82, - "learning_rate": 4.201927210538058e-05, - "loss": 0.9566, - "step": 25120 - }, - { - "epoch": 2.82, - "learning_rate": 4.194743266278426e-05, - "loss": 0.9185, - "step": 25140 - }, - { - "epoch": 2.83, - "learning_rate": 4.1875610282211364e-05, - "loss": 0.9006, - "step": 25160 - }, - { - "epoch": 2.83, - "learning_rate": 4.1803805115841366e-05, - "loss": 0.92, - "step": 25180 - }, - { - "epoch": 2.83, - "learning_rate": 4.173201731581724e-05, - "loss": 0.9291, - "step": 25200 - }, - { - "epoch": 2.83, - "learning_rate": 4.166024703424511e-05, - "loss": 0.9299, - "step": 25220 - }, - { - "epoch": 2.84, - "learning_rate": 4.1588494423194046e-05, - "loss": 0.9074, - "step": 25240 - }, - { - "epoch": 2.84, - "learning_rate": 4.151675963469565e-05, - "loss": 0.9473, - "step": 25260 - }, - { - "epoch": 2.84, - "learning_rate": 4.1445042820743764e-05, - "loss": 0.9326, - "step": 25280 - }, - { - "epoch": 2.84, - "learning_rate": 4.137334413329414e-05, - "loss": 0.9276, - "step": 25300 - }, - { - "epoch": 2.84, - "learning_rate": 4.130166372426412e-05, - "loss": 0.9333, - "step": 25320 - }, - { - "epoch": 2.85, - "learning_rate": 4.123000174553235e-05, - "loss": 0.9269, - "step": 25340 - }, - { - "epoch": 2.85, - "learning_rate": 4.1158358348938374e-05, - "loss": 0.9264, - "step": 25360 - }, - { - "epoch": 2.85, - "learning_rate": 4.1086733686282395e-05, - "loss": 0.9503, - "step": 25380 - }, - { - "epoch": 2.85, - "learning_rate": 4.1015127909324936e-05, - "loss": 0.9071, - "step": 25400 - }, - { - "epoch": 2.86, - "learning_rate": 4.094354116978647e-05, - "loss": 0.9654, - "step": 25420 - }, - { - "epoch": 2.86, - "learning_rate": 4.087197361934714e-05, - "loss": 0.949, - "step": 25440 - }, - { - "epoch": 2.86, - "learning_rate": 4.0800425409646456e-05, - "loss": 0.9102, - "step": 25460 - }, - { - "epoch": 2.86, - "learning_rate": 4.0728896692282926e-05, - "loss": 0.943, - "step": 25480 - }, - { - "epoch": 2.87, - "learning_rate": 4.065738761881375e-05, - "loss": 0.9392, - "step": 25500 - }, - { - "epoch": 2.87, - "learning_rate": 4.0585898340754506e-05, - "loss": 0.9151, - "step": 25520 - }, - { - "epoch": 2.87, - "learning_rate": 4.051442900957888e-05, - "loss": 0.937, - "step": 25540 - }, - { - "epoch": 2.87, - "learning_rate": 4.0442979776718237e-05, - "loss": 0.9283, - "step": 25560 - }, - { - "epoch": 2.87, - "learning_rate": 4.037155079356137e-05, - "loss": 0.92, - "step": 25580 - }, - { - "epoch": 2.88, - "learning_rate": 4.030014221145417e-05, - "loss": 0.9313, - "step": 25600 - }, - { - "epoch": 2.88, - "learning_rate": 4.022875418169931e-05, - "loss": 0.9367, - "step": 25620 - }, - { - "epoch": 2.88, - "learning_rate": 4.0157386855555906e-05, - "loss": 0.9642, - "step": 25640 - }, - { - "epoch": 2.88, - "learning_rate": 4.00860403842392e-05, - "loss": 0.9027, - "step": 25660 - }, - { - "epoch": 2.89, - "learning_rate": 4.001471491892026e-05, - "loss": 0.9215, - "step": 25680 - }, - { - "epoch": 2.89, - "learning_rate": 3.9943410610725665e-05, - "loss": 0.9546, - "step": 25700 - }, - { - "epoch": 2.89, - "learning_rate": 3.9872127610737095e-05, - "loss": 0.909, - "step": 25720 - }, - { - "epoch": 2.89, - "learning_rate": 3.9800866069991173e-05, - "loss": 0.9495, - "step": 25740 - }, - { - "epoch": 2.89, - "learning_rate": 3.9729626139478995e-05, - "loss": 0.9311, - "step": 25760 - }, - { - "epoch": 2.9, - "learning_rate": 3.965840797014586e-05, - "loss": 0.9387, - "step": 25780 - }, - { - "epoch": 2.9, - "learning_rate": 3.9587211712891005e-05, - "loss": 0.919, - "step": 25800 - }, - { - "epoch": 2.9, - "learning_rate": 3.9516037518567204e-05, - "loss": 0.938, - "step": 25820 - }, - { - "epoch": 2.9, - "learning_rate": 3.9444885537980526e-05, - "loss": 0.929, - "step": 25840 - }, - { - "epoch": 2.91, - "learning_rate": 3.9373755921889886e-05, - "loss": 0.9502, - "step": 25860 - }, - { - "epoch": 2.91, - "learning_rate": 3.93026488210069e-05, - "loss": 0.9294, - "step": 25880 - }, - { - "epoch": 2.91, - "learning_rate": 3.9231564385995476e-05, - "loss": 0.9643, - "step": 25900 - }, - { - "epoch": 2.91, - "learning_rate": 3.91605027674714e-05, - "loss": 0.9125, - "step": 25920 - }, - { - "epoch": 2.91, - "learning_rate": 3.908946411600222e-05, - "loss": 0.9356, - "step": 25940 - }, - { - "epoch": 2.92, - "learning_rate": 3.9018448582106795e-05, - "loss": 0.9298, - "step": 25960 - }, - { - "epoch": 2.92, - "learning_rate": 3.894745631625495e-05, - "loss": 0.9532, - "step": 25980 - }, - { - "epoch": 2.92, - "learning_rate": 3.887648746886727e-05, - "loss": 0.9326, - "step": 26000 - }, - { - "epoch": 2.92, - "learning_rate": 3.8805542190314705e-05, - "loss": 0.9003, - "step": 26020 - }, - { - "epoch": 2.93, - "learning_rate": 3.873462063091825e-05, - "loss": 0.9361, - "step": 26040 - }, - { - "epoch": 2.93, - "learning_rate": 3.866372294094864e-05, - "loss": 0.9186, - "step": 26060 - }, - { - "epoch": 2.93, - "learning_rate": 3.859284927062604e-05, - "loss": 0.9442, - "step": 26080 - }, - { - "epoch": 2.93, - "learning_rate": 3.8521999770119786e-05, - "loss": 0.929, - "step": 26100 - }, - { - "epoch": 2.93, - "learning_rate": 3.845117458954787e-05, - "loss": 0.9241, - "step": 26120 - }, - { - "epoch": 2.94, - "learning_rate": 3.838037387897688e-05, - "loss": 0.9246, - "step": 26140 - }, - { - "epoch": 2.94, - "learning_rate": 3.8309597788421474e-05, - "loss": 0.9379, - "step": 26160 - }, - { - "epoch": 2.94, - "learning_rate": 3.823884646784421e-05, - "loss": 0.9221, - "step": 26180 - }, - { - "epoch": 2.94, - "learning_rate": 3.8168120067155096e-05, - "loss": 0.9069, - "step": 26200 - }, - { - "epoch": 2.95, - "learning_rate": 3.809741873621138e-05, - "loss": 0.9144, - "step": 26220 - }, - { - "epoch": 2.95, - "learning_rate": 3.802674262481719e-05, - "loss": 0.9446, - "step": 26240 - }, - { - "epoch": 2.95, - "learning_rate": 3.79560918827232e-05, - "loss": 0.972, - "step": 26260 - }, - { - "epoch": 2.95, - "learning_rate": 3.7885466659626334e-05, - "loss": 0.9395, - "step": 26280 - }, - { - "epoch": 2.96, - "learning_rate": 3.781486710516948e-05, - "loss": 0.9471, - "step": 26300 - }, - { - "epoch": 2.96, - "learning_rate": 3.77442933689411e-05, - "loss": 0.9502, - "step": 26320 - }, - { - "epoch": 2.96, - "learning_rate": 3.767374560047495e-05, - "loss": 0.9296, - "step": 26340 - }, - { - "epoch": 2.96, - "learning_rate": 3.760322394924979e-05, - "loss": 0.9643, - "step": 26360 - }, - { - "epoch": 2.96, - "learning_rate": 3.753272856468903e-05, - "loss": 0.929, - "step": 26380 - }, - { - "epoch": 2.97, - "learning_rate": 3.746225959616042e-05, - "loss": 0.9233, - "step": 26400 - }, - { - "epoch": 2.97, - "learning_rate": 3.7391817192975745e-05, - "loss": 0.9169, - "step": 26420 - }, - { - "epoch": 2.97, - "learning_rate": 3.732140150439048e-05, - "loss": 0.891, - "step": 26440 - }, - { - "epoch": 2.97, - "learning_rate": 3.725101267960359e-05, - "loss": 0.9535, - "step": 26460 - }, - { - "epoch": 2.98, - "learning_rate": 3.718065086775695e-05, - "loss": 0.9363, - "step": 26480 - }, - { - "epoch": 2.98, - "learning_rate": 3.7110316217935357e-05, - "loss": 0.964, - "step": 26500 - }, - { - "epoch": 2.98, - "learning_rate": 3.7040008879166e-05, - "loss": 0.9386, - "step": 26520 - }, - { - "epoch": 2.98, - "learning_rate": 3.696972900041816e-05, - "loss": 0.9268, - "step": 26540 - }, - { - "epoch": 2.98, - "learning_rate": 3.6899476730603e-05, - "loss": 0.9139, - "step": 26560 - }, - { - "epoch": 2.99, - "learning_rate": 3.682925221857315e-05, - "loss": 0.9274, - "step": 26580 - }, - { - "epoch": 2.99, - "learning_rate": 3.675905561312244e-05, - "loss": 0.9195, - "step": 26600 - }, - { - "epoch": 2.99, - "learning_rate": 3.668888706298554e-05, - "loss": 0.9289, - "step": 26620 - }, - { - "epoch": 2.99, - "learning_rate": 3.66187467168377e-05, - "loss": 0.9429, - "step": 26640 - }, - { - "epoch": 3.0, - "learning_rate": 3.654863472329445e-05, - "loss": 0.9156, - "step": 26660 - }, - { - "epoch": 3.0, - "learning_rate": 3.647855123091115e-05, - "loss": 0.9355, - "step": 26680 - }, - { - "epoch": 3.0, - "learning_rate": 3.640849638818286e-05, - "loss": 0.9313, - "step": 26700 - }, - { - "epoch": 3.0, - "learning_rate": 3.633847034354389e-05, - "loss": 0.86, - "step": 26720 - }, - { - "epoch": 3.0, - "learning_rate": 3.626847324536755e-05, - "loss": 0.9101, - "step": 26740 - }, - { - "epoch": 3.01, - "learning_rate": 3.6198505241965806e-05, - "loss": 0.9232, - "step": 26760 - }, - { - "epoch": 3.01, - "learning_rate": 3.6128566481588977e-05, - "loss": 0.9325, - "step": 26780 - }, - { - "epoch": 3.01, - "learning_rate": 3.605865711242544e-05, - "loss": 0.9364, - "step": 26800 - }, - { - "epoch": 3.01, - "learning_rate": 3.598877728260127e-05, - "loss": 0.8978, - "step": 26820 - }, - { - "epoch": 3.02, - "learning_rate": 3.591892714017995e-05, - "loss": 0.892, - "step": 26840 - }, - { - "epoch": 3.02, - "learning_rate": 3.5849106833162124e-05, - "loss": 0.8974, - "step": 26860 - }, - { - "epoch": 3.02, - "learning_rate": 3.577931650948512e-05, - "loss": 0.8923, - "step": 26880 - }, - { - "epoch": 3.02, - "learning_rate": 3.5709556317022823e-05, - "loss": 0.9018, - "step": 26900 - }, - { - "epoch": 3.02, - "learning_rate": 3.563982640358523e-05, - "loss": 0.9101, - "step": 26920 - }, - { - "epoch": 3.03, - "learning_rate": 3.55701269169182e-05, - "loss": 0.8988, - "step": 26940 - }, - { - "epoch": 3.03, - "learning_rate": 3.550045800470311e-05, - "loss": 0.8896, - "step": 26960 - }, - { - "epoch": 3.03, - "learning_rate": 3.5430819814556544e-05, - "loss": 0.9021, - "step": 26980 - }, - { - "epoch": 3.03, - "learning_rate": 3.536121249403004e-05, - "loss": 0.8667, - "step": 27000 - }, - { - "epoch": 3.04, - "learning_rate": 3.5291636190609665e-05, - "loss": 0.8931, - "step": 27020 - }, - { - "epoch": 3.04, - "learning_rate": 3.52220910517158e-05, - "loss": 0.8927, - "step": 27040 - }, - { - "epoch": 3.04, - "learning_rate": 3.515257722470281e-05, - "loss": 0.9163, - "step": 27060 - }, - { - "epoch": 3.04, - "learning_rate": 3.50830948568587e-05, - "loss": 0.9169, - "step": 27080 - }, - { - "epoch": 3.04, - "learning_rate": 3.50136440954048e-05, - "loss": 0.9031, - "step": 27100 - }, - { - "epoch": 3.05, - "learning_rate": 3.494422508749547e-05, - "loss": 0.9213, - "step": 27120 - }, - { - "epoch": 3.05, - "learning_rate": 3.487483798021785e-05, - "loss": 0.9082, - "step": 27140 - }, - { - "epoch": 3.05, - "learning_rate": 3.480548292059139e-05, - "loss": 0.911, - "step": 27160 - }, - { - "epoch": 3.05, - "learning_rate": 3.473616005556773e-05, - "loss": 0.9075, - "step": 27180 - }, - { - "epoch": 3.06, - "learning_rate": 3.4666869532030224e-05, - "loss": 0.9072, - "step": 27200 - }, - { - "epoch": 3.06, - "learning_rate": 3.459761149679378e-05, - "loss": 0.9181, - "step": 27220 - }, - { - "epoch": 3.06, - "learning_rate": 3.4528386096604366e-05, - "loss": 0.908, - "step": 27240 - }, - { - "epoch": 3.06, - "learning_rate": 3.445919347813888e-05, - "loss": 0.8838, - "step": 27260 - }, - { - "epoch": 3.07, - "learning_rate": 3.439003378800475e-05, - "loss": 0.8977, - "step": 27280 - }, - { - "epoch": 3.07, - "learning_rate": 3.4320907172739594e-05, - "loss": 0.9024, - "step": 27300 - }, - { - "epoch": 3.07, - "learning_rate": 3.425181377881099e-05, - "loss": 0.9174, - "step": 27320 - }, - { - "epoch": 3.07, - "learning_rate": 3.4182753752616094e-05, - "loss": 0.916, - "step": 27340 - }, - { - "epoch": 3.07, - "learning_rate": 3.411372724048144e-05, - "loss": 0.9103, - "step": 27360 - }, - { - "epoch": 3.08, - "learning_rate": 3.4044734388662426e-05, - "loss": 0.8922, - "step": 27380 - }, - { - "epoch": 3.08, - "learning_rate": 3.3975775343343205e-05, - "loss": 0.8991, - "step": 27400 - }, - { - "epoch": 3.08, - "learning_rate": 3.390685025063633e-05, - "loss": 0.8822, - "step": 27420 - }, - { - "epoch": 3.08, - "learning_rate": 3.383795925658233e-05, - "loss": 0.9007, - "step": 27440 - }, - { - "epoch": 3.09, - "learning_rate": 3.376910250714955e-05, - "loss": 0.9058, - "step": 27460 - }, - { - "epoch": 3.09, - "learning_rate": 3.370028014823375e-05, - "loss": 0.9046, - "step": 27480 - }, - { - "epoch": 3.09, - "learning_rate": 3.363149232565785e-05, - "loss": 0.9123, - "step": 27500 - }, - { - "epoch": 3.09, - "learning_rate": 3.356273918517153e-05, - "loss": 0.8856, - "step": 27520 - }, - { - "epoch": 3.09, - "learning_rate": 3.349402087245104e-05, - "loss": 0.9146, - "step": 27540 - }, - { - "epoch": 3.1, - "learning_rate": 3.342533753309887e-05, - "loss": 0.9106, - "step": 27560 - }, - { - "epoch": 3.1, - "learning_rate": 3.335668931264327e-05, - "loss": 0.8902, - "step": 27580 - }, - { - "epoch": 3.1, - "learning_rate": 3.328807635653822e-05, - "loss": 0.8881, - "step": 27600 - }, - { - "epoch": 3.1, - "learning_rate": 3.321949881016293e-05, - "loss": 0.9172, - "step": 27620 - }, - { - "epoch": 3.11, - "learning_rate": 3.315095681882159e-05, - "loss": 0.9076, - "step": 27640 - }, - { - "epoch": 3.11, - "learning_rate": 3.3082450527743014e-05, - "loss": 0.8837, - "step": 27660 - }, - { - "epoch": 3.11, - "learning_rate": 3.301398008208042e-05, - "loss": 0.8725, - "step": 27680 - }, - { - "epoch": 3.11, - "learning_rate": 3.294554562691108e-05, - "loss": 0.9202, - "step": 27700 - }, - { - "epoch": 3.11, - "learning_rate": 3.287714730723596e-05, - "loss": 0.909, - "step": 27720 - }, - { - "epoch": 3.12, - "learning_rate": 3.280878526797948e-05, - "loss": 0.9094, - "step": 27740 - }, - { - "epoch": 3.12, - "learning_rate": 3.274045965398924e-05, - "loss": 0.8797, - "step": 27760 - }, - { - "epoch": 3.12, - "learning_rate": 3.267217061003562e-05, - "loss": 0.8962, - "step": 27780 - }, - { - "epoch": 3.12, - "learning_rate": 3.260391828081147e-05, - "loss": 0.8772, - "step": 27800 - }, - { - "epoch": 3.13, - "learning_rate": 3.253570281093192e-05, - "loss": 0.8907, - "step": 27820 - }, - { - "epoch": 3.13, - "learning_rate": 3.246752434493398e-05, - "loss": 0.8898, - "step": 27840 - }, - { - "epoch": 3.13, - "learning_rate": 3.239938302727622e-05, - "loss": 0.8992, - "step": 27860 - }, - { - "epoch": 3.13, - "learning_rate": 3.233127900233855e-05, - "loss": 0.8948, - "step": 27880 - }, - { - "epoch": 3.13, - "learning_rate": 3.2263212414421846e-05, - "loss": 0.9386, - "step": 27900 - }, - { - "epoch": 3.14, - "learning_rate": 3.219518340774763e-05, - "loss": 0.915, - "step": 27920 - }, - { - "epoch": 3.14, - "learning_rate": 3.2127192126457815e-05, - "loss": 0.9026, - "step": 27940 - }, - { - "epoch": 3.14, - "learning_rate": 3.205923871461442e-05, - "loss": 0.8793, - "step": 27960 - }, - { - "epoch": 3.14, - "learning_rate": 3.19913233161992e-05, - "loss": 0.9182, - "step": 27980 - }, - { - "epoch": 3.15, - "learning_rate": 3.192344607511329e-05, - "loss": 0.8803, - "step": 28000 - }, - { - "epoch": 3.15, - "learning_rate": 3.18556071351771e-05, - "loss": 0.872, - "step": 28020 - }, - { - "epoch": 3.15, - "learning_rate": 3.1787806640129826e-05, - "loss": 0.899, - "step": 28040 - }, - { - "epoch": 3.15, - "learning_rate": 3.1720044733629196e-05, - "loss": 0.9047, - "step": 28060 - }, - { - "epoch": 3.16, - "learning_rate": 3.165232155925118e-05, - "loss": 0.8979, - "step": 28080 - }, - { - "epoch": 3.16, - "learning_rate": 3.15846372604897e-05, - "loss": 0.8833, - "step": 28100 - }, - { - "epoch": 3.16, - "learning_rate": 3.151699198075633e-05, - "loss": 0.908, - "step": 28120 - }, - { - "epoch": 3.16, - "learning_rate": 3.1449385863379866e-05, - "loss": 0.8998, - "step": 28140 - }, - { - "epoch": 3.16, - "learning_rate": 3.138181905160625e-05, - "loss": 0.8975, - "step": 28160 - }, - { - "epoch": 3.17, - "learning_rate": 3.13142916885981e-05, - "loss": 0.8921, - "step": 28180 - }, - { - "epoch": 3.17, - "learning_rate": 3.124680391743438e-05, - "loss": 0.9263, - "step": 28200 - }, - { - "epoch": 3.17, - "learning_rate": 3.117935588111026e-05, - "loss": 0.9153, - "step": 28220 - }, - { - "epoch": 3.17, - "learning_rate": 3.111194772253668e-05, - "loss": 0.9274, - "step": 28240 - }, - { - "epoch": 3.18, - "learning_rate": 3.104457958454009e-05, - "loss": 0.9159, - "step": 28260 - }, - { - "epoch": 3.18, - "learning_rate": 3.097725160986212e-05, - "loss": 0.9314, - "step": 28280 - }, - { - "epoch": 3.18, - "learning_rate": 3.090996394115933e-05, - "loss": 0.9059, - "step": 28300 - }, - { - "epoch": 3.18, - "learning_rate": 3.0842716721002894e-05, - "loss": 0.9248, - "step": 28320 - }, - { - "epoch": 3.18, - "learning_rate": 3.077551009187821e-05, - "loss": 0.9125, - "step": 28340 - }, - { - "epoch": 3.19, - "learning_rate": 3.0708344196184756e-05, - "loss": 0.9084, - "step": 28360 - }, - { - "epoch": 3.19, - "learning_rate": 3.064121917623566e-05, - "loss": 0.9046, - "step": 28380 - }, - { - "epoch": 3.19, - "learning_rate": 3.0574135174257444e-05, - "loss": 0.8961, - "step": 28400 - }, - { - "epoch": 3.19, - "learning_rate": 3.050709233238972e-05, - "loss": 0.9001, - "step": 28420 - }, - { - "epoch": 3.2, - "learning_rate": 3.0440090792684884e-05, - "loss": 0.91, - "step": 28440 - }, - { - "epoch": 3.2, - "learning_rate": 3.037313069710784e-05, - "loss": 0.9225, - "step": 28460 - }, - { - "epoch": 3.2, - "learning_rate": 3.0306212187535653e-05, - "loss": 0.8991, - "step": 28480 - }, - { - "epoch": 3.2, - "learning_rate": 3.0239335405757275e-05, - "loss": 0.8773, - "step": 28500 - }, - { - "epoch": 3.2, - "learning_rate": 3.0172500493473294e-05, - "loss": 0.8922, - "step": 28520 - }, - { - "epoch": 3.21, - "learning_rate": 3.0105707592295528e-05, - "loss": 0.9035, - "step": 28540 - }, - { - "epoch": 3.21, - "learning_rate": 3.003895684374679e-05, - "loss": 0.9027, - "step": 28560 - }, - { - "epoch": 3.21, - "learning_rate": 2.9972248389260593e-05, - "loss": 0.9163, - "step": 28580 - }, - { - "epoch": 3.21, - "learning_rate": 2.9905582370180836e-05, - "loss": 0.909, - "step": 28600 - }, - { - "epoch": 3.22, - "learning_rate": 2.9838958927761477e-05, - "loss": 0.8952, - "step": 28620 - }, - { - "epoch": 3.22, - "learning_rate": 2.9772378203166307e-05, - "loss": 0.9269, - "step": 28640 - }, - { - "epoch": 3.22, - "learning_rate": 2.9705840337468554e-05, - "loss": 0.8917, - "step": 28660 - }, - { - "epoch": 3.22, - "learning_rate": 2.9639345471650716e-05, - "loss": 0.8882, - "step": 28680 - }, - { - "epoch": 3.22, - "learning_rate": 2.9572893746604052e-05, - "loss": 0.9008, - "step": 28700 - }, - { - "epoch": 3.23, - "learning_rate": 2.950648530312854e-05, - "loss": 0.9153, - "step": 28720 - }, - { - "epoch": 3.23, - "learning_rate": 2.9440120281932403e-05, - "loss": 0.8977, - "step": 28740 - }, - { - "epoch": 3.23, - "learning_rate": 2.937379882363183e-05, - "loss": 0.9006, - "step": 28760 - }, - { - "epoch": 3.23, - "learning_rate": 2.9307521068750748e-05, - "loss": 0.921, - "step": 28780 - }, - { - "epoch": 3.24, - "learning_rate": 2.924128715772047e-05, - "loss": 0.8782, - "step": 28800 - }, - { - "epoch": 3.24, - "learning_rate": 2.9175097230879423e-05, - "loss": 0.8994, - "step": 28820 - }, - { - "epoch": 3.24, - "learning_rate": 2.9108951428472804e-05, - "loss": 0.8945, - "step": 28840 - }, - { - "epoch": 3.24, - "learning_rate": 2.9042849890652352e-05, - "loss": 0.8867, - "step": 28860 - }, - { - "epoch": 3.24, - "learning_rate": 2.8976792757476013e-05, - "loss": 0.8793, - "step": 28880 - }, - { - "epoch": 3.25, - "learning_rate": 2.891078016890763e-05, - "loss": 0.9037, - "step": 28900 - }, - { - "epoch": 3.25, - "learning_rate": 2.8844812264816684e-05, - "loss": 0.9293, - "step": 28920 - }, - { - "epoch": 3.25, - "learning_rate": 2.8778889184977986e-05, - "loss": 0.8962, - "step": 28940 - }, - { - "epoch": 3.25, - "learning_rate": 2.8713011069071306e-05, - "loss": 0.886, - "step": 28960 - }, - { - "epoch": 3.26, - "learning_rate": 2.8647178056681194e-05, - "loss": 0.8791, - "step": 28980 - }, - { - "epoch": 3.26, - "learning_rate": 2.8581390287296672e-05, - "loss": 0.9162, - "step": 29000 - }, - { - "epoch": 3.26, - "learning_rate": 2.851564790031086e-05, - "loss": 0.9088, - "step": 29020 - }, - { - "epoch": 3.26, - "learning_rate": 2.8449951035020672e-05, - "loss": 0.9208, - "step": 29040 - }, - { - "epoch": 3.27, - "learning_rate": 2.8384299830626637e-05, - "loss": 0.8747, - "step": 29060 - }, - { - "epoch": 3.27, - "learning_rate": 2.8318694426232516e-05, - "loss": 0.8721, - "step": 29080 - }, - { - "epoch": 3.27, - "learning_rate": 2.825313496084503e-05, - "loss": 0.9281, - "step": 29100 - }, - { - "epoch": 3.27, - "learning_rate": 2.8187621573373544e-05, - "loss": 0.9045, - "step": 29120 - }, - { - "epoch": 3.27, - "learning_rate": 2.8122154402629818e-05, - "loss": 0.8925, - "step": 29140 - }, - { - "epoch": 3.28, - "learning_rate": 2.8056733587327694e-05, - "loss": 0.8958, - "step": 29160 - }, - { - "epoch": 3.28, - "learning_rate": 2.7991359266082717e-05, - "loss": 0.9155, - "step": 29180 - }, - { - "epoch": 3.28, - "learning_rate": 2.7926031577412038e-05, - "loss": 0.8971, - "step": 29200 - }, - { - "epoch": 3.28, - "learning_rate": 2.7860750659733938e-05, - "loss": 0.9249, - "step": 29220 - }, - { - "epoch": 3.29, - "learning_rate": 2.779551665136756e-05, - "loss": 0.9045, - "step": 29240 - }, - { - "epoch": 3.29, - "learning_rate": 2.773032969053273e-05, - "loss": 0.9207, - "step": 29260 - }, - { - "epoch": 3.29, - "learning_rate": 2.7665189915349533e-05, - "loss": 0.8938, - "step": 29280 - }, - { - "epoch": 3.29, - "learning_rate": 2.7600097463838114e-05, - "loss": 0.9088, - "step": 29300 - }, - { - "epoch": 3.29, - "learning_rate": 2.753505247391832e-05, - "loss": 0.9085, - "step": 29320 - }, - { - "epoch": 3.3, - "learning_rate": 2.7470055083409452e-05, - "loss": 0.8978, - "step": 29340 - }, - { - "epoch": 3.3, - "learning_rate": 2.740510543002996e-05, - "loss": 0.8963, - "step": 29360 - }, - { - "epoch": 3.3, - "learning_rate": 2.734020365139708e-05, - "loss": 0.9295, - "step": 29380 - }, - { - "epoch": 3.3, - "learning_rate": 2.727534988502673e-05, - "loss": 0.9076, - "step": 29400 - }, - { - "epoch": 3.31, - "learning_rate": 2.721054426833301e-05, - "loss": 0.9037, - "step": 29420 - }, - { - "epoch": 3.31, - "learning_rate": 2.7145786938628036e-05, - "loss": 0.8821, - "step": 29440 - }, - { - "epoch": 3.31, - "learning_rate": 2.7081078033121577e-05, - "loss": 0.8976, - "step": 29460 - }, - { - "epoch": 3.31, - "learning_rate": 2.7016417688920815e-05, - "loss": 0.9083, - "step": 29480 - }, - { - "epoch": 3.31, - "learning_rate": 2.695180604303007e-05, - "loss": 0.9198, - "step": 29500 - }, - { - "epoch": 3.32, - "learning_rate": 2.6887243232350434e-05, - "loss": 0.9052, - "step": 29520 - }, - { - "epoch": 3.32, - "learning_rate": 2.6822729393679558e-05, - "loss": 0.9033, - "step": 29540 - }, - { - "epoch": 3.32, - "learning_rate": 2.6758264663711306e-05, - "loss": 0.9058, - "step": 29560 - }, - { - "epoch": 3.32, - "learning_rate": 2.6693849179035513e-05, - "loss": 0.889, - "step": 29580 - }, - { - "epoch": 3.33, - "learning_rate": 2.662948307613764e-05, - "loss": 0.9118, - "step": 29600 - }, - { - "epoch": 3.33, - "learning_rate": 2.6565166491398553e-05, - "loss": 0.8994, - "step": 29620 - }, - { - "epoch": 3.33, - "learning_rate": 2.6500899561094184e-05, - "loss": 0.9048, - "step": 29640 - }, - { - "epoch": 3.33, - "learning_rate": 2.643668242139522e-05, - "loss": 0.9, - "step": 29660 - }, - { - "epoch": 3.33, - "learning_rate": 2.63725152083669e-05, - "loss": 0.9199, - "step": 29680 - }, - { - "epoch": 3.34, - "learning_rate": 2.630839805796863e-05, - "loss": 0.9024, - "step": 29700 - }, - { - "epoch": 3.34, - "learning_rate": 2.624433110605383e-05, - "loss": 0.9102, - "step": 29720 - }, - { - "epoch": 3.34, - "learning_rate": 2.6180314488369452e-05, - "loss": 0.8843, - "step": 29740 - }, - { - "epoch": 3.34, - "learning_rate": 2.611634834055585e-05, - "loss": 0.8905, - "step": 29760 - }, - { - "epoch": 3.35, - "learning_rate": 2.6052432798146436e-05, - "loss": 0.8834, - "step": 29780 - }, - { - "epoch": 3.35, - "learning_rate": 2.5988567996567402e-05, - "loss": 0.8973, - "step": 29800 - }, - { - "epoch": 3.35, - "learning_rate": 2.5924754071137415e-05, - "loss": 0.887, - "step": 29820 - }, - { - "epoch": 3.35, - "learning_rate": 2.5860991157067356e-05, - "loss": 0.9162, - "step": 29840 - }, - { - "epoch": 3.36, - "learning_rate": 2.5797279389460037e-05, - "loss": 0.8948, - "step": 29860 - }, - { - "epoch": 3.36, - "learning_rate": 2.5733618903309843e-05, - "loss": 0.897, - "step": 29880 - }, - { - "epoch": 3.36, - "learning_rate": 2.567000983350254e-05, - "loss": 0.875, - "step": 29900 - }, - { - "epoch": 3.36, - "learning_rate": 2.5606452314815e-05, - "loss": 0.9287, - "step": 29920 - }, - { - "epoch": 3.36, - "learning_rate": 2.554294648191477e-05, - "loss": 0.8996, - "step": 29940 - }, - { - "epoch": 3.37, - "learning_rate": 2.5479492469359944e-05, - "loss": 0.8989, - "step": 29960 - }, - { - "epoch": 3.37, - "learning_rate": 2.5416090411598813e-05, - "loss": 0.91, - "step": 29980 - }, - { - "epoch": 3.37, - "learning_rate": 2.535274044296957e-05, - "loss": 0.9104, - "step": 30000 - }, - { - "epoch": 3.37, - "learning_rate": 2.5289442697700043e-05, - "loss": 0.8902, - "step": 30020 - }, - { - "epoch": 3.38, - "learning_rate": 2.5226197309907418e-05, - "loss": 0.8907, - "step": 30040 - }, - { - "epoch": 3.38, - "learning_rate": 2.5163004413597955e-05, - "loss": 0.9099, - "step": 30060 - }, - { - "epoch": 3.38, - "learning_rate": 2.5099864142666642e-05, - "loss": 0.8979, - "step": 30080 - }, - { - "epoch": 3.38, - "learning_rate": 2.5036776630896985e-05, - "loss": 0.9008, - "step": 30100 - }, - { - "epoch": 3.38, - "learning_rate": 2.4973742011960775e-05, - "loss": 0.9105, - "step": 30120 - }, - { - "epoch": 3.39, - "learning_rate": 2.4910760419417616e-05, - "loss": 0.9075, - "step": 30140 - }, - { - "epoch": 3.39, - "learning_rate": 2.4847831986714837e-05, - "loss": 0.9141, - "step": 30160 - }, - { - "epoch": 3.39, - "learning_rate": 2.47849568471871e-05, - "loss": 0.9281, - "step": 30180 - }, - { - "epoch": 3.39, - "learning_rate": 2.472213513405615e-05, - "loss": 0.9085, - "step": 30200 - }, - { - "epoch": 3.4, - "learning_rate": 2.4659366980430547e-05, - "loss": 0.9308, - "step": 30220 - }, - { - "epoch": 3.4, - "learning_rate": 2.4596652519305346e-05, - "loss": 0.9147, - "step": 30240 - }, - { - "epoch": 3.4, - "learning_rate": 2.4533991883561868e-05, - "loss": 0.91, - "step": 30260 - }, - { - "epoch": 3.4, - "learning_rate": 2.4471385205967323e-05, - "loss": 0.8888, - "step": 30280 - }, - { - "epoch": 3.4, - "learning_rate": 2.4408832619174644e-05, - "loss": 0.894, - "step": 30300 - }, - { - "epoch": 3.41, - "learning_rate": 2.4346334255722168e-05, - "loss": 0.8859, - "step": 30320 - }, - { - "epoch": 3.41, - "learning_rate": 2.4283890248033337e-05, - "loss": 0.9136, - "step": 30340 - }, - { - "epoch": 3.41, - "learning_rate": 2.4221500728416356e-05, - "loss": 0.9353, - "step": 30360 - }, - { - "epoch": 3.41, - "learning_rate": 2.415916582906405e-05, - "loss": 0.9154, - "step": 30380 - }, - { - "epoch": 3.42, - "learning_rate": 2.409688568205349e-05, - "loss": 0.9022, - "step": 30400 - }, - { - "epoch": 3.42, - "learning_rate": 2.403466041934574e-05, - "loss": 0.9071, - "step": 30420 - }, - { - "epoch": 3.42, - "learning_rate": 2.3972490172785567e-05, - "loss": 0.8839, - "step": 30440 - }, - { - "epoch": 3.42, - "learning_rate": 2.3910375074101172e-05, - "loss": 0.9102, - "step": 30460 - }, - { - "epoch": 3.42, - "learning_rate": 2.3848315254903924e-05, - "loss": 0.9196, - "step": 30480 - }, - { - "epoch": 3.43, - "learning_rate": 2.3786310846688e-05, - "loss": 0.8863, - "step": 30500 - }, - { - "epoch": 3.43, - "learning_rate": 2.3724361980830257e-05, - "loss": 0.9043, - "step": 30520 - }, - { - "epoch": 3.43, - "learning_rate": 2.366246878858984e-05, - "loss": 0.8813, - "step": 30540 - }, - { - "epoch": 3.43, - "learning_rate": 2.3600631401107882e-05, - "loss": 0.9023, - "step": 30560 - }, - { - "epoch": 3.44, - "learning_rate": 2.353884994940732e-05, - "loss": 0.8872, - "step": 30580 - }, - { - "epoch": 3.44, - "learning_rate": 2.3477124564392572e-05, - "loss": 0.9056, - "step": 30600 - }, - { - "epoch": 3.44, - "learning_rate": 2.3415455376849248e-05, - "loss": 0.8708, - "step": 30620 - }, - { - "epoch": 3.44, - "learning_rate": 2.3353842517443898e-05, - "loss": 0.8727, - "step": 30640 - }, - { - "epoch": 3.44, - "learning_rate": 2.32922861167237e-05, - "loss": 0.8796, - "step": 30660 - }, - { - "epoch": 3.45, - "learning_rate": 2.3230786305116253e-05, - "loss": 0.9369, - "step": 30680 - }, - { - "epoch": 3.45, - "learning_rate": 2.316934321292915e-05, - "loss": 0.9044, - "step": 30700 - }, - { - "epoch": 3.45, - "learning_rate": 2.3107956970349942e-05, - "loss": 0.8945, - "step": 30720 - }, - { - "epoch": 3.45, - "learning_rate": 2.3046627707445635e-05, - "loss": 0.9097, - "step": 30740 - }, - { - "epoch": 3.46, - "learning_rate": 2.2985355554162546e-05, - "loss": 0.908, - "step": 30760 - }, - { - "epoch": 3.46, - "learning_rate": 2.292414064032593e-05, - "loss": 0.8604, - "step": 30780 - }, - { - "epoch": 3.46, - "learning_rate": 2.2862983095639823e-05, - "loss": 0.8698, - "step": 30800 - }, - { - "epoch": 3.46, - "learning_rate": 2.2801883049686678e-05, - "loss": 0.8969, - "step": 30820 - }, - { - "epoch": 3.47, - "learning_rate": 2.2740840631927118e-05, - "loss": 0.8927, - "step": 30840 - }, - { - "epoch": 3.47, - "learning_rate": 2.2679855971699676e-05, - "loss": 0.9017, - "step": 30860 - }, - { - "epoch": 3.47, - "learning_rate": 2.2618929198220513e-05, - "loss": 0.924, - "step": 30880 - }, - { - "epoch": 3.47, - "learning_rate": 2.2558060440583057e-05, - "loss": 0.8936, - "step": 30900 - }, - { - "epoch": 3.47, - "learning_rate": 2.2497249827757933e-05, - "loss": 0.9073, - "step": 30920 - }, - { - "epoch": 3.48, - "learning_rate": 2.2436497488592497e-05, - "loss": 0.9292, - "step": 30940 - }, - { - "epoch": 3.48, - "learning_rate": 2.2375803551810654e-05, - "loss": 0.9278, - "step": 30960 - }, - { - "epoch": 3.48, - "learning_rate": 2.2315168146012527e-05, - "loss": 0.8894, - "step": 30980 - }, - { - "epoch": 3.48, - "learning_rate": 2.225459139967426e-05, - "loss": 0.8936, - "step": 31000 - }, - { - "epoch": 3.49, - "learning_rate": 2.21940734411477e-05, - "loss": 0.9102, - "step": 31020 - }, - { - "epoch": 3.49, - "learning_rate": 2.213361439866013e-05, - "loss": 0.8849, - "step": 31040 - }, - { - "epoch": 3.49, - "learning_rate": 2.2073214400313997e-05, - "loss": 0.8884, - "step": 31060 - }, - { - "epoch": 3.49, - "learning_rate": 2.201287357408665e-05, - "loss": 0.8864, - "step": 31080 - }, - { - "epoch": 3.49, - "learning_rate": 2.1952592047830055e-05, - "loss": 0.94, - "step": 31100 - }, - { - "epoch": 3.5, - "learning_rate": 2.189236994927054e-05, - "loss": 0.8892, - "step": 31120 - }, - { - "epoch": 3.5, - "learning_rate": 2.1832207406008502e-05, - "loss": 0.8934, - "step": 31140 - }, - { - "epoch": 3.5, - "learning_rate": 2.1772104545518185e-05, - "loss": 0.8911, - "step": 31160 - }, - { - "epoch": 3.5, - "learning_rate": 2.171206149514731e-05, - "loss": 0.9029, - "step": 31180 - }, - { - "epoch": 3.51, - "learning_rate": 2.165207838211693e-05, - "loss": 0.9103, - "step": 31200 - }, - { - "epoch": 3.51, - "learning_rate": 2.159215533352106e-05, - "loss": 0.9166, - "step": 31220 - }, - { - "epoch": 3.51, - "learning_rate": 2.153229247632652e-05, - "loss": 0.8993, - "step": 31240 - }, - { - "epoch": 3.51, - "learning_rate": 2.14724899373725e-05, - "loss": 0.9114, - "step": 31260 - }, - { - "epoch": 3.51, - "learning_rate": 2.141274784337044e-05, - "loss": 0.9053, - "step": 31280 - }, - { - "epoch": 3.52, - "learning_rate": 2.1353066320903698e-05, - "loss": 0.8942, - "step": 31300 - }, - { - "epoch": 3.52, - "learning_rate": 2.1293445496427296e-05, - "loss": 0.8935, - "step": 31320 - }, - { - "epoch": 3.52, - "learning_rate": 2.1233885496267634e-05, - "loss": 0.8798, - "step": 31340 - }, - { - "epoch": 3.52, - "learning_rate": 2.117438644662226e-05, - "loss": 0.9204, - "step": 31360 - }, - { - "epoch": 3.53, - "learning_rate": 2.1114948473559554e-05, - "loss": 0.8907, - "step": 31380 - }, - { - "epoch": 3.53, - "learning_rate": 2.1055571703018474e-05, - "loss": 0.8935, - "step": 31400 - }, - { - "epoch": 3.53, - "learning_rate": 2.0996256260808316e-05, - "loss": 0.8761, - "step": 31420 - }, - { - "epoch": 3.53, - "learning_rate": 2.0937002272608493e-05, - "loss": 0.9049, - "step": 31440 - }, - { - "epoch": 3.53, - "learning_rate": 2.087780986396808e-05, - "loss": 0.9202, - "step": 31460 - }, - { - "epoch": 3.54, - "learning_rate": 2.0818679160305776e-05, - "loss": 0.8871, - "step": 31480 - }, - { - "epoch": 3.54, - "learning_rate": 2.0759610286909508e-05, - "loss": 0.8833, - "step": 31500 - }, - { - "epoch": 3.54, - "learning_rate": 2.0700603368936182e-05, - "loss": 0.917, - "step": 31520 - }, - { - "epoch": 3.54, - "learning_rate": 2.064165853141145e-05, - "loss": 0.9222, - "step": 31540 - }, - { - "epoch": 3.55, - "learning_rate": 2.058277589922942e-05, - "loss": 0.9001, - "step": 31560 - }, - { - "epoch": 3.55, - "learning_rate": 2.05239555971524e-05, - "loss": 0.906, - "step": 31580 - }, - { - "epoch": 3.55, - "learning_rate": 2.0465197749810604e-05, - "loss": 0.9021, - "step": 31600 - }, - { - "epoch": 3.55, - "learning_rate": 2.040650248170194e-05, - "loss": 0.8962, - "step": 31620 - }, - { - "epoch": 3.56, - "learning_rate": 2.034786991719174e-05, - "loss": 0.901, - "step": 31640 - }, - { - "epoch": 3.56, - "learning_rate": 2.0289300180512478e-05, - "loss": 0.9213, - "step": 31660 - }, - { - "epoch": 3.56, - "learning_rate": 2.0230793395763426e-05, - "loss": 0.9018, - "step": 31680 - }, - { - "epoch": 3.56, - "learning_rate": 2.0172349686910568e-05, - "loss": 0.8924, - "step": 31700 - }, - { - "epoch": 3.56, - "learning_rate": 2.011396917778619e-05, - "loss": 0.8989, - "step": 31720 - }, - { - "epoch": 3.57, - "learning_rate": 2.0055651992088692e-05, - "loss": 0.8872, - "step": 31740 - }, - { - "epoch": 3.57, - "learning_rate": 1.9997398253382287e-05, - "loss": 0.9258, - "step": 31760 - }, - { - "epoch": 3.57, - "learning_rate": 1.993920808509676e-05, - "loss": 0.9025, - "step": 31780 - }, - { - "epoch": 3.57, - "learning_rate": 1.988108161052722e-05, - "loss": 0.8995, - "step": 31800 - }, - { - "epoch": 3.58, - "learning_rate": 1.9823018952833748e-05, - "loss": 0.9158, - "step": 31820 - }, - { - "epoch": 3.58, - "learning_rate": 1.9765020235041322e-05, - "loss": 0.9092, - "step": 31840 - }, - { - "epoch": 3.58, - "learning_rate": 1.9707085580039365e-05, - "loss": 0.9003, - "step": 31860 - }, - { - "epoch": 3.58, - "learning_rate": 1.9649215110581553e-05, - "loss": 0.8768, - "step": 31880 - }, - { - "epoch": 3.58, - "learning_rate": 1.9591408949285605e-05, - "loss": 0.9147, - "step": 31900 - }, - { - "epoch": 3.59, - "learning_rate": 1.953366721863297e-05, - "loss": 0.9055, - "step": 31920 - }, - { - "epoch": 3.59, - "learning_rate": 1.947599004096856e-05, - "loss": 0.9076, - "step": 31940 - }, - { - "epoch": 3.59, - "learning_rate": 1.9418377538500543e-05, - "loss": 0.8803, - "step": 31960 - }, - { - "epoch": 3.59, - "learning_rate": 1.936082983330002e-05, - "loss": 0.9346, - "step": 31980 - }, - { - "epoch": 3.6, - "learning_rate": 1.9303347047300834e-05, - "loss": 0.9033, - "step": 32000 - }, - { - "epoch": 3.6, - "learning_rate": 1.9245929302299202e-05, - "loss": 0.8857, - "step": 32020 - }, - { - "epoch": 3.6, - "learning_rate": 1.9188576719953633e-05, - "loss": 0.9229, - "step": 32040 - }, - { - "epoch": 3.6, - "learning_rate": 1.913128942178451e-05, - "loss": 0.9216, - "step": 32060 - }, - { - "epoch": 3.6, - "learning_rate": 1.907406752917386e-05, - "loss": 0.8786, - "step": 32080 - }, - { - "epoch": 3.61, - "learning_rate": 1.9016911163365185e-05, - "loss": 0.896, - "step": 32100 - }, - { - "epoch": 3.61, - "learning_rate": 1.89598204454631e-05, - "loss": 0.9214, - "step": 32120 - }, - { - "epoch": 3.61, - "learning_rate": 1.89027954964332e-05, - "loss": 0.8923, - "step": 32140 - }, - { - "epoch": 3.61, - "learning_rate": 1.8845836437101622e-05, - "loss": 0.9045, - "step": 32160 - }, - { - "epoch": 3.62, - "learning_rate": 1.8788943388154962e-05, - "loss": 0.8815, - "step": 32180 - }, - { - "epoch": 3.62, - "learning_rate": 1.873211647013995e-05, - "loss": 0.9081, - "step": 32200 - }, - { - "epoch": 3.62, - "learning_rate": 1.867535580346313e-05, - "loss": 0.8995, - "step": 32220 - }, - { - "epoch": 3.62, - "learning_rate": 1.861866150839078e-05, - "loss": 0.9083, - "step": 32240 - }, - { - "epoch": 3.62, - "learning_rate": 1.856203370504846e-05, - "loss": 0.8916, - "step": 32260 - }, - { - "epoch": 3.63, - "learning_rate": 1.850547251342089e-05, - "loss": 0.9364, - "step": 32280 - }, - { - "epoch": 3.63, - "learning_rate": 1.844897805335162e-05, - "loss": 0.8781, - "step": 32300 - }, - { - "epoch": 3.63, - "learning_rate": 1.8392550444542793e-05, - "loss": 0.9038, - "step": 32320 - }, - { - "epoch": 3.63, - "learning_rate": 1.8336189806555014e-05, - "loss": 0.8806, - "step": 32340 - }, - { - "epoch": 3.64, - "learning_rate": 1.8279896258806844e-05, - "loss": 0.9133, - "step": 32360 - }, - { - "epoch": 3.64, - "learning_rate": 1.8223669920574772e-05, - "loss": 0.9114, - "step": 32380 - }, - { - "epoch": 3.64, - "learning_rate": 1.8167510910992875e-05, - "loss": 0.8787, - "step": 32400 - }, - { - "epoch": 3.64, - "learning_rate": 1.811141934905255e-05, - "loss": 0.8737, - "step": 32420 - }, - { - "epoch": 3.64, - "learning_rate": 1.8055395353602306e-05, - "loss": 0.9233, - "step": 32440 - }, - { - "epoch": 3.65, - "learning_rate": 1.7999439043347476e-05, - "loss": 0.9049, - "step": 32460 - }, - { - "epoch": 3.65, - "learning_rate": 1.7943550536850006e-05, - "loss": 0.893, - "step": 32480 - }, - { - "epoch": 3.65, - "learning_rate": 1.7887729952528117e-05, - "loss": 0.9078, - "step": 32500 - }, - { - "epoch": 3.65, - "learning_rate": 1.7831977408656153e-05, - "loss": 0.9135, - "step": 32520 - }, - { - "epoch": 3.66, - "learning_rate": 1.7776293023364325e-05, - "loss": 0.8959, - "step": 32540 - }, - { - "epoch": 3.66, - "learning_rate": 1.7720676914638407e-05, - "loss": 0.9199, - "step": 32560 - }, - { - "epoch": 3.66, - "learning_rate": 1.766512920031944e-05, - "loss": 0.9057, - "step": 32580 - }, - { - "epoch": 3.66, - "learning_rate": 1.7609649998103634e-05, - "loss": 0.8747, - "step": 32600 - }, - { - "epoch": 3.67, - "learning_rate": 1.755423942554199e-05, - "loss": 0.911, - "step": 32620 - }, - { - "epoch": 3.67, - "learning_rate": 1.749889760004012e-05, - "loss": 0.8998, - "step": 32640 - }, - { - "epoch": 3.67, - "learning_rate": 1.7443624638857954e-05, - "loss": 0.9014, - "step": 32660 - }, - { - "epoch": 3.67, - "learning_rate": 1.7388420659109515e-05, - "loss": 0.8887, - "step": 32680 - }, - { - "epoch": 3.67, - "learning_rate": 1.7333285777762682e-05, - "loss": 0.9168, - "step": 32700 - }, - { - "epoch": 3.68, - "learning_rate": 1.727822011163886e-05, - "loss": 0.9106, - "step": 32720 - }, - { - "epoch": 3.68, - "learning_rate": 1.7223223777412905e-05, - "loss": 0.907, - "step": 32740 - }, - { - "epoch": 3.68, - "learning_rate": 1.7168296891612707e-05, - "loss": 0.906, - "step": 32760 - }, - { - "epoch": 3.68, - "learning_rate": 1.711343957061899e-05, - "loss": 0.8978, - "step": 32780 - }, - { - "epoch": 3.69, - "learning_rate": 1.7058651930665114e-05, - "loss": 0.8732, - "step": 32800 - }, - { - "epoch": 3.69, - "learning_rate": 1.70039340878368e-05, - "loss": 0.909, - "step": 32820 - }, - { - "epoch": 3.69, - "learning_rate": 1.6949286158071858e-05, - "loss": 0.8888, - "step": 32840 - }, - { - "epoch": 3.69, - "learning_rate": 1.689470825715998e-05, - "loss": 0.9104, - "step": 32860 - }, - { - "epoch": 3.69, - "learning_rate": 1.6840200500742482e-05, - "loss": 0.878, - "step": 32880 - }, - { - "epoch": 3.7, - "learning_rate": 1.6785763004312055e-05, - "loss": 0.9154, - "step": 32900 - }, - { - "epoch": 3.7, - "learning_rate": 1.673139588321247e-05, - "loss": 0.8783, - "step": 32920 - }, - { - "epoch": 3.7, - "learning_rate": 1.6677099252638477e-05, - "loss": 0.8909, - "step": 32940 - }, - { - "epoch": 3.7, - "learning_rate": 1.6622873227635428e-05, - "loss": 0.908, - "step": 32960 - }, - { - "epoch": 3.71, - "learning_rate": 1.656871792309902e-05, - "loss": 0.8948, - "step": 32980 - }, - { - "epoch": 3.71, - "learning_rate": 1.651463345377518e-05, - "loss": 0.888, - "step": 33000 - }, - { - "epoch": 3.71, - "learning_rate": 1.6460619934259707e-05, - "loss": 0.9002, - "step": 33020 - }, - { - "epoch": 3.71, - "learning_rate": 1.6406677478998094e-05, - "loss": 0.9047, - "step": 33040 - }, - { - "epoch": 3.71, - "learning_rate": 1.6352806202285244e-05, - "loss": 0.8803, - "step": 33060 - }, - { - "epoch": 3.72, - "learning_rate": 1.6299006218265246e-05, - "loss": 0.8966, - "step": 33080 - }, - { - "epoch": 3.72, - "learning_rate": 1.624527764093115e-05, - "loss": 0.8901, - "step": 33100 - }, - { - "epoch": 3.72, - "learning_rate": 1.619162058412465e-05, - "loss": 0.9045, - "step": 33120 - }, - { - "epoch": 3.72, - "learning_rate": 1.6138035161535986e-05, - "loss": 0.8999, - "step": 33140 - }, - { - "epoch": 3.73, - "learning_rate": 1.608452148670356e-05, - "loss": 0.8836, - "step": 33160 - }, - { - "epoch": 3.73, - "learning_rate": 1.603107967301378e-05, - "loss": 0.9097, - "step": 33180 - }, - { - "epoch": 3.73, - "learning_rate": 1.597770983370075e-05, - "loss": 0.9269, - "step": 33200 - }, - { - "epoch": 3.73, - "learning_rate": 1.5924412081846113e-05, - "loss": 0.9053, - "step": 33220 - }, - { - "epoch": 3.73, - "learning_rate": 1.5871186530378763e-05, - "loss": 0.8981, - "step": 33240 - }, - { - "epoch": 3.74, - "learning_rate": 1.581803329207461e-05, - "loss": 0.8572, - "step": 33260 - }, - { - "epoch": 3.74, - "learning_rate": 1.5764952479556334e-05, - "loss": 0.8934, - "step": 33280 - }, - { - "epoch": 3.74, - "learning_rate": 1.5711944205293185e-05, - "loss": 0.8725, - "step": 33300 - }, - { - "epoch": 3.74, - "learning_rate": 1.565900858160068e-05, - "loss": 0.8948, - "step": 33320 - }, - { - "epoch": 3.75, - "learning_rate": 1.5606145720640442e-05, - "loss": 0.8983, - "step": 33340 - }, - { - "epoch": 3.75, - "learning_rate": 1.555335573441989e-05, - "loss": 0.9201, - "step": 33360 - }, - { - "epoch": 3.75, - "learning_rate": 1.5500638734792055e-05, - "loss": 0.8854, - "step": 33380 - }, - { - "epoch": 3.75, - "learning_rate": 1.5447994833455292e-05, - "loss": 0.8975, - "step": 33400 - }, - { - "epoch": 3.76, - "learning_rate": 1.53954241419531e-05, - "loss": 0.8831, - "step": 33420 - }, - { - "epoch": 3.76, - "learning_rate": 1.5342926771673842e-05, - "loss": 0.8813, - "step": 33440 - }, - { - "epoch": 3.76, - "learning_rate": 1.5290502833850578e-05, - "loss": 0.8986, - "step": 33460 - }, - { - "epoch": 3.76, - "learning_rate": 1.5238152439560693e-05, - "loss": 0.8652, - "step": 33480 - }, - { - "epoch": 3.76, - "learning_rate": 1.5185875699725793e-05, - "loss": 0.9025, - "step": 33500 - }, - { - "epoch": 3.77, - "learning_rate": 1.5133672725111425e-05, - "loss": 0.8958, - "step": 33520 - }, - { - "epoch": 3.77, - "learning_rate": 1.5081543626326833e-05, - "loss": 0.9204, - "step": 33540 - }, - { - "epoch": 3.77, - "learning_rate": 1.5029488513824724e-05, - "loss": 0.8805, - "step": 33560 - }, - { - "epoch": 3.77, - "learning_rate": 1.4977507497901055e-05, - "loss": 0.9058, - "step": 33580 - }, - { - "epoch": 3.78, - "learning_rate": 1.492560068869478e-05, - "loss": 0.9166, - "step": 33600 - }, - { - "epoch": 3.78, - "learning_rate": 1.4873768196187593e-05, - "loss": 0.9029, - "step": 33620 - }, - { - "epoch": 3.78, - "learning_rate": 1.482201013020375e-05, - "loss": 0.883, - "step": 33640 - }, - { - "epoch": 3.78, - "learning_rate": 1.4770326600409851e-05, - "loss": 0.9034, - "step": 33660 - }, - { - "epoch": 3.78, - "learning_rate": 1.471871771631448e-05, - "loss": 0.8594, - "step": 33680 - }, - { - "epoch": 3.79, - "learning_rate": 1.4667183587268118e-05, - "loss": 0.9064, - "step": 33700 - }, - { - "epoch": 3.79, - "learning_rate": 1.4615724322462838e-05, - "loss": 0.9083, - "step": 33720 - }, - { - "epoch": 3.79, - "learning_rate": 1.4564340030932083e-05, - "loss": 0.8579, - "step": 33740 - }, - { - "epoch": 3.79, - "learning_rate": 1.4513030821550449e-05, - "loss": 0.8899, - "step": 33760 - }, - { - "epoch": 3.8, - "learning_rate": 1.4461796803033445e-05, - "loss": 0.9189, - "step": 33780 - }, - { - "epoch": 3.8, - "learning_rate": 1.4410638083937272e-05, - "loss": 0.9012, - "step": 33800 - }, - { - "epoch": 3.8, - "learning_rate": 1.4359554772658552e-05, - "loss": 0.9, - "step": 33820 - }, - { - "epoch": 3.8, - "learning_rate": 1.4308546977434135e-05, - "loss": 0.957, - "step": 33840 - }, - { - "epoch": 3.8, - "learning_rate": 1.4257614806340919e-05, - "loss": 0.8913, - "step": 33860 - }, - { - "epoch": 3.81, - "learning_rate": 1.4206758367295537e-05, - "loss": 0.9182, - "step": 33880 - }, - { - "epoch": 3.81, - "learning_rate": 1.4155977768054113e-05, - "loss": 0.9013, - "step": 33900 - }, - { - "epoch": 3.81, - "learning_rate": 1.4105273116212136e-05, - "loss": 0.9113, - "step": 33920 - }, - { - "epoch": 3.81, - "learning_rate": 1.4054644519204157e-05, - "loss": 0.8801, - "step": 33940 - }, - { - "epoch": 3.82, - "learning_rate": 1.4004092084303583e-05, - "loss": 0.9287, - "step": 33960 - }, - { - "epoch": 3.82, - "learning_rate": 1.3953615918622443e-05, - "loss": 0.9068, - "step": 33980 - }, - { - "epoch": 3.82, - "learning_rate": 1.3903216129111174e-05, - "loss": 0.8831, - "step": 34000 - }, - { - "epoch": 3.82, - "learning_rate": 1.385289282255835e-05, - "loss": 0.9099, - "step": 34020 - }, - { - "epoch": 3.82, - "learning_rate": 1.380264610559051e-05, - "loss": 0.9004, - "step": 34040 - }, - { - "epoch": 3.83, - "learning_rate": 1.3752476084671962e-05, - "loss": 0.8964, - "step": 34060 - }, - { - "epoch": 3.83, - "learning_rate": 1.3702382866104457e-05, - "loss": 0.8801, - "step": 34080 - }, - { - "epoch": 3.83, - "learning_rate": 1.3652366556026996e-05, - "loss": 0.919, - "step": 34100 - }, - { - "epoch": 3.83, - "learning_rate": 1.3602427260415663e-05, - "loss": 0.8887, - "step": 34120 - }, - { - "epoch": 3.84, - "learning_rate": 1.3552565085083352e-05, - "loss": 0.9103, - "step": 34140 - }, - { - "epoch": 3.84, - "learning_rate": 1.350278013567955e-05, - "loss": 0.9001, - "step": 34160 - }, - { - "epoch": 3.84, - "learning_rate": 1.3453072517690107e-05, - "loss": 0.8922, - "step": 34180 - }, - { - "epoch": 3.84, - "learning_rate": 1.3403442336437039e-05, - "loss": 0.8959, - "step": 34200 - }, - { - "epoch": 3.84, - "learning_rate": 1.3353889697078287e-05, - "loss": 0.8825, - "step": 34220 - }, - { - "epoch": 3.85, - "learning_rate": 1.3304414704607443e-05, - "loss": 0.9026, - "step": 34240 - }, - { - "epoch": 3.85, - "learning_rate": 1.3255017463853659e-05, - "loss": 0.8999, - "step": 34260 - }, - { - "epoch": 3.85, - "learning_rate": 1.3205698079481298e-05, - "loss": 0.8748, - "step": 34280 - }, - { - "epoch": 3.85, - "learning_rate": 1.3156456655989746e-05, - "loss": 0.882, - "step": 34300 - }, - { - "epoch": 3.86, - "learning_rate": 1.3107293297713236e-05, - "loss": 0.8574, - "step": 34320 - }, - { - "epoch": 3.86, - "learning_rate": 1.3058208108820574e-05, - "loss": 0.9052, - "step": 34340 - }, - { - "epoch": 3.86, - "learning_rate": 1.3009201193314947e-05, - "loss": 0.9069, - "step": 34360 - }, - { - "epoch": 3.86, - "learning_rate": 1.2960272655033689e-05, - "loss": 0.8821, - "step": 34380 - }, - { - "epoch": 3.87, - "learning_rate": 1.2911422597648077e-05, - "loss": 0.8953, - "step": 34400 - }, - { - "epoch": 3.87, - "learning_rate": 1.2862651124663095e-05, - "loss": 0.8736, - "step": 34420 - }, - { - "epoch": 3.87, - "learning_rate": 1.2813958339417176e-05, - "loss": 0.9102, - "step": 34440 - }, - { - "epoch": 3.87, - "learning_rate": 1.2765344345082114e-05, - "loss": 0.8673, - "step": 34460 - }, - { - "epoch": 3.87, - "learning_rate": 1.2716809244662691e-05, - "loss": 0.9082, - "step": 34480 - }, - { - "epoch": 3.88, - "learning_rate": 1.266835314099657e-05, - "loss": 0.8893, - "step": 34500 - }, - { - "epoch": 3.88, - "learning_rate": 1.261997613675398e-05, - "loss": 0.8961, - "step": 34520 - }, - { - "epoch": 3.88, - "learning_rate": 1.2571678334437591e-05, - "loss": 0.8572, - "step": 34540 - }, - { - "epoch": 3.88, - "learning_rate": 1.252345983638225e-05, - "loss": 0.9225, - "step": 34560 - }, - { - "epoch": 3.89, - "learning_rate": 1.2475320744754776e-05, - "loss": 0.9017, - "step": 34580 - }, - { - "epoch": 3.89, - "learning_rate": 1.2427261161553732e-05, - "loss": 0.9022, - "step": 34600 - }, - { - "epoch": 3.89, - "learning_rate": 1.2379281188609209e-05, - "loss": 0.9, - "step": 34620 - }, - { - "epoch": 3.89, - "learning_rate": 1.2331380927582642e-05, - "loss": 0.8776, - "step": 34640 - }, - { - "epoch": 3.89, - "learning_rate": 1.2283560479966538e-05, - "loss": 0.8804, - "step": 34660 - }, - { - "epoch": 3.9, - "learning_rate": 1.223581994708432e-05, - "loss": 0.8881, - "step": 34680 - }, - { - "epoch": 3.9, - "learning_rate": 1.2188159430090085e-05, - "loss": 0.8949, - "step": 34700 - }, - { - "epoch": 3.9, - "learning_rate": 1.2140579029968352e-05, - "loss": 0.8953, - "step": 34720 - }, - { - "epoch": 3.9, - "learning_rate": 1.2093078847533922e-05, - "loss": 0.8937, - "step": 34740 - }, - { - "epoch": 3.91, - "learning_rate": 1.2045658983431612e-05, - "loss": 0.9329, - "step": 34760 - }, - { - "epoch": 3.91, - "learning_rate": 1.199831953813611e-05, - "loss": 0.8943, - "step": 34780 - }, - { - "epoch": 3.91, - "learning_rate": 1.1951060611951615e-05, - "loss": 0.8774, - "step": 34800 - }, - { - "epoch": 3.91, - "learning_rate": 1.1903882305011793e-05, - "loss": 0.9075, - "step": 34820 - }, - { - "epoch": 3.91, - "learning_rate": 1.1856784717279462e-05, - "loss": 0.8714, - "step": 34840 - }, - { - "epoch": 3.92, - "learning_rate": 1.1809767948546419e-05, - "loss": 0.8841, - "step": 34860 - }, - { - "epoch": 3.92, - "learning_rate": 1.1762832098433219e-05, - "loss": 0.8719, - "step": 34880 - }, - { - "epoch": 3.92, - "learning_rate": 1.1715977266388961e-05, - "loss": 0.8972, - "step": 34900 - }, - { - "epoch": 3.92, - "learning_rate": 1.1669203551691093e-05, - "loss": 0.8943, - "step": 34920 - }, - { - "epoch": 3.93, - "learning_rate": 1.1622511053445156e-05, - "loss": 0.8861, - "step": 34940 - }, - { - "epoch": 3.93, - "learning_rate": 1.1575899870584621e-05, - "loss": 0.9284, - "step": 34960 - }, - { - "epoch": 3.93, - "learning_rate": 1.1529370101870723e-05, - "loss": 0.8943, - "step": 34980 - }, - { - "epoch": 3.93, - "learning_rate": 1.1482921845892098e-05, - "loss": 0.8904, - "step": 35000 - }, - { - "epoch": 3.93, - "learning_rate": 1.143655520106473e-05, - "loss": 0.8703, - "step": 35020 - }, - { - "epoch": 3.94, - "learning_rate": 1.1390270265631675e-05, - "loss": 0.9096, - "step": 35040 - }, - { - "epoch": 3.94, - "learning_rate": 1.134406713766285e-05, - "loss": 0.9049, - "step": 35060 - }, - { - "epoch": 3.94, - "learning_rate": 1.1297945915054842e-05, - "loss": 0.8983, - "step": 35080 - }, - { - "epoch": 3.94, - "learning_rate": 1.1251906695530701e-05, - "loss": 0.9089, - "step": 35100 - }, - { - "epoch": 3.95, - "learning_rate": 1.1205949576639723e-05, - "loss": 0.8768, - "step": 35120 - }, - { - "epoch": 3.95, - "learning_rate": 1.116007465575722e-05, - "loss": 0.9167, - "step": 35140 - }, - { - "epoch": 3.95, - "learning_rate": 1.1114282030084361e-05, - "loss": 0.9169, - "step": 35160 - }, - { - "epoch": 3.95, - "learning_rate": 1.1068571796647992e-05, - "loss": 0.903, - "step": 35180 - }, - { - "epoch": 3.96, - "learning_rate": 1.1022944052300293e-05, - "loss": 0.8746, - "step": 35200 - }, - { - "epoch": 3.96, - "learning_rate": 1.0977398893718732e-05, - "loss": 0.9006, - "step": 35220 - }, - { - "epoch": 3.96, - "learning_rate": 1.0931936417405764e-05, - "loss": 0.8895, - "step": 35240 - }, - { - "epoch": 3.96, - "learning_rate": 1.0886556719688662e-05, - "loss": 0.8928, - "step": 35260 - }, - { - "epoch": 3.96, - "learning_rate": 1.0841259896719297e-05, - "loss": 0.9004, - "step": 35280 - }, - { - "epoch": 3.97, - "learning_rate": 1.0796046044473962e-05, - "loss": 0.9078, - "step": 35300 - }, - { - "epoch": 3.97, - "learning_rate": 1.0750915258753141e-05, - "loss": 0.8804, - "step": 35320 - }, - { - "epoch": 3.97, - "learning_rate": 1.0705867635181278e-05, - "loss": 0.902, - "step": 35340 - }, - { - "epoch": 3.97, - "learning_rate": 1.0660903269206652e-05, - "loss": 0.898, - "step": 35360 - }, - { - "epoch": 3.98, - "learning_rate": 1.0616022256101143e-05, - "loss": 0.8605, - "step": 35380 - }, - { - "epoch": 3.98, - "learning_rate": 1.0571224690960002e-05, - "loss": 0.8795, - "step": 35400 - }, - { - "epoch": 3.98, - "learning_rate": 1.0526510668701633e-05, - "loss": 0.8985, - "step": 35420 - }, - { - "epoch": 3.98, - "learning_rate": 1.0481880284067485e-05, - "loss": 0.9146, - "step": 35440 - }, - { - "epoch": 3.98, - "learning_rate": 1.0437333631621765e-05, - "loss": 0.8778, - "step": 35460 - }, - { - "epoch": 3.99, - "learning_rate": 1.0392870805751265e-05, - "loss": 0.9129, - "step": 35480 - }, - { - "epoch": 3.99, - "learning_rate": 1.0348491900665164e-05, - "loss": 0.9134, - "step": 35500 - }, - { - "epoch": 3.99, - "learning_rate": 1.030419701039484e-05, - "loss": 0.9043, - "step": 35520 - }, - { - "epoch": 3.99, - "learning_rate": 1.025998622879365e-05, - "loss": 0.8896, - "step": 35540 - }, - { - "epoch": 4.0, - "learning_rate": 1.0215859649536702e-05, - "loss": 0.9067, - "step": 35560 - }, - { - "epoch": 4.0, - "learning_rate": 1.0171817366120767e-05, - "loss": 0.9166, - "step": 35580 - }, - { - "epoch": 4.0, - "learning_rate": 1.012785947186397e-05, - "loss": 0.8699, - "step": 35600 - }, - { - "epoch": 4.0, - "learning_rate": 1.0083986059905598e-05, - "loss": 0.8923, - "step": 35620 - }, - { - "epoch": 4.0, - "learning_rate": 1.0040197223205978e-05, - "loss": 0.8432, - "step": 35640 - }, - { - "epoch": 4.01, - "learning_rate": 9.996493054546214e-06, - "loss": 0.8849, - "step": 35660 - }, - { - "epoch": 4.01, - "learning_rate": 9.95287364652806e-06, - "loss": 0.8686, - "step": 35680 - }, - { - "epoch": 4.01, - "learning_rate": 9.909339091573594e-06, - "loss": 0.8646, - "step": 35700 - }, - { - "epoch": 4.01, - "learning_rate": 9.865889481925167e-06, - "loss": 0.8976, - "step": 35720 - }, - { - "epoch": 4.02, - "learning_rate": 9.822524909645137e-06, - "loss": 0.8732, - "step": 35740 - }, - { - "epoch": 4.02, - "learning_rate": 9.779245466615639e-06, - "loss": 0.8954, - "step": 35760 - }, - { - "epoch": 4.02, - "learning_rate": 9.736051244538497e-06, - "loss": 0.912, - "step": 35780 - }, - { - "epoch": 4.02, - "learning_rate": 9.692942334934935e-06, - "loss": 0.891, - "step": 35800 - }, - { - "epoch": 4.02, - "learning_rate": 9.649918829145415e-06, - "loss": 0.8954, - "step": 35820 - }, - { - "epoch": 4.03, - "learning_rate": 9.60698081832943e-06, - "loss": 0.9018, - "step": 35840 - }, - { - "epoch": 4.03, - "learning_rate": 9.564128393465332e-06, - "loss": 0.8627, - "step": 35860 - }, - { - "epoch": 4.03, - "learning_rate": 9.52136164535018e-06, - "loss": 0.9076, - "step": 35880 - }, - { - "epoch": 4.03, - "learning_rate": 9.478680664599404e-06, - "loss": 0.93, - "step": 35900 - }, - { - "epoch": 4.04, - "learning_rate": 9.436085541646783e-06, - "loss": 0.8731, - "step": 35920 - }, - { - "epoch": 4.04, - "learning_rate": 9.393576366744162e-06, - "loss": 0.8885, - "step": 35940 - }, - { - "epoch": 4.04, - "learning_rate": 9.351153229961223e-06, - "loss": 0.8715, - "step": 35960 - }, - { - "epoch": 4.04, - "learning_rate": 9.308816221185441e-06, - "loss": 0.8789, - "step": 35980 - }, - { - "epoch": 4.04, - "learning_rate": 9.266565430121733e-06, - "loss": 0.886, - "step": 36000 - }, - { - "epoch": 4.05, - "learning_rate": 9.224400946292367e-06, - "loss": 0.8862, - "step": 36020 - }, - { - "epoch": 4.05, - "learning_rate": 9.182322859036702e-06, - "loss": 0.9107, - "step": 36040 - }, - { - "epoch": 4.05, - "learning_rate": 9.14033125751107e-06, - "loss": 0.877, - "step": 36060 - }, - { - "epoch": 4.05, - "learning_rate": 9.098426230688578e-06, - "loss": 0.8937, - "step": 36080 - }, - { - "epoch": 4.06, - "learning_rate": 9.056607867358829e-06, - "loss": 0.8663, - "step": 36100 - }, - { - "epoch": 4.06, - "learning_rate": 9.014876256127852e-06, - "loss": 0.8781, - "step": 36120 - }, - { - "epoch": 4.06, - "learning_rate": 8.973231485417849e-06, - "loss": 0.866, - "step": 36140 - }, - { - "epoch": 4.06, - "learning_rate": 8.931673643467014e-06, - "loss": 0.8693, - "step": 36160 - }, - { - "epoch": 4.07, - "learning_rate": 8.890202818329368e-06, - "loss": 0.8629, - "step": 36180 - }, - { - "epoch": 4.07, - "learning_rate": 8.84881909787455e-06, - "loss": 0.8731, - "step": 36200 - }, - { - "epoch": 4.07, - "learning_rate": 8.807522569787653e-06, - "loss": 0.8921, - "step": 36220 - }, - { - "epoch": 4.07, - "learning_rate": 8.76631332156898e-06, - "loss": 0.8864, - "step": 36240 - }, - { - "epoch": 4.07, - "learning_rate": 8.725191440533936e-06, - "loss": 0.8866, - "step": 36260 - }, - { - "epoch": 4.08, - "learning_rate": 8.684157013812839e-06, - "loss": 0.8669, - "step": 36280 - }, - { - "epoch": 4.08, - "learning_rate": 8.643210128350665e-06, - "loss": 0.8555, - "step": 36300 - }, - { - "epoch": 4.08, - "learning_rate": 8.602350870906895e-06, - "loss": 0.8809, - "step": 36320 - }, - { - "epoch": 4.08, - "learning_rate": 8.561579328055375e-06, - "loss": 0.865, - "step": 36340 - }, - { - "epoch": 4.09, - "learning_rate": 8.52089558618408e-06, - "loss": 0.888, - "step": 36360 - }, - { - "epoch": 4.09, - "learning_rate": 8.480299731494956e-06, - "loss": 0.8966, - "step": 36380 - }, - { - "epoch": 4.09, - "learning_rate": 8.439791850003726e-06, - "loss": 0.8947, - "step": 36400 - }, - { - "epoch": 4.09, - "learning_rate": 8.39937202753972e-06, - "loss": 0.8662, - "step": 36420 - }, - { - "epoch": 4.09, - "learning_rate": 8.35904034974569e-06, - "loss": 0.8828, - "step": 36440 - }, - { - "epoch": 4.1, - "learning_rate": 8.31879690207758e-06, - "loss": 0.8679, - "step": 36460 - }, - { - "epoch": 4.1, - "learning_rate": 8.278641769804469e-06, - "loss": 0.8961, - "step": 36480 - }, - { - "epoch": 4.1, - "learning_rate": 8.23857503800825e-06, - "loss": 0.8836, - "step": 36500 - }, - { - "epoch": 4.1, - "learning_rate": 8.198596791583523e-06, - "loss": 0.8672, - "step": 36520 - }, - { - "epoch": 4.11, - "learning_rate": 8.158707115237407e-06, - "loss": 0.8916, - "step": 36540 - }, - { - "epoch": 4.11, - "learning_rate": 8.118906093489358e-06, - "loss": 0.9051, - "step": 36560 - }, - { - "epoch": 4.11, - "learning_rate": 8.079193810670988e-06, - "loss": 0.8754, - "step": 36580 - }, - { - "epoch": 4.11, - "learning_rate": 8.039570350925878e-06, - "loss": 0.895, - "step": 36600 - }, - { - "epoch": 4.11, - "learning_rate": 8.000035798209421e-06, - "loss": 0.8794, - "step": 36620 - }, - { - "epoch": 4.12, - "learning_rate": 7.960590236288633e-06, - "loss": 0.8761, - "step": 36640 - }, - { - "epoch": 4.12, - "learning_rate": 7.921233748741934e-06, - "loss": 0.8677, - "step": 36660 - }, - { - "epoch": 4.12, - "learning_rate": 7.88196641895907e-06, - "loss": 0.8771, - "step": 36680 - }, - { - "epoch": 4.12, - "learning_rate": 7.842788330140838e-06, - "loss": 0.8851, - "step": 36700 - }, - { - "epoch": 4.13, - "learning_rate": 7.803699565298972e-06, - "loss": 0.9103, - "step": 36720 - }, - { - "epoch": 4.13, - "learning_rate": 7.764700207255903e-06, - "loss": 0.8621, - "step": 36740 - }, - { - "epoch": 4.13, - "learning_rate": 7.725790338644673e-06, - "loss": 0.8558, - "step": 36760 - }, - { - "epoch": 4.13, - "learning_rate": 7.686970041908675e-06, - "loss": 0.8793, - "step": 36780 - }, - { - "epoch": 4.13, - "learning_rate": 7.648239399301544e-06, - "loss": 0.9105, - "step": 36800 - }, - { - "epoch": 4.14, - "learning_rate": 7.6095984928869265e-06, - "loss": 0.879, - "step": 36820 - }, - { - "epoch": 4.14, - "learning_rate": 7.571047404538351e-06, - "loss": 0.8657, - "step": 36840 - }, - { - "epoch": 4.14, - "learning_rate": 7.532586215939025e-06, - "loss": 0.8624, - "step": 36860 - }, - { - "epoch": 4.14, - "learning_rate": 7.49421500858169e-06, - "loss": 0.8568, - "step": 36880 - }, - { - "epoch": 4.15, - "learning_rate": 7.45593386376841e-06, - "loss": 0.8805, - "step": 36900 - }, - { - "epoch": 4.15, - "learning_rate": 7.41774286261045e-06, - "loss": 0.8731, - "step": 36920 - }, - { - "epoch": 4.15, - "learning_rate": 7.379642086028038e-06, - "loss": 0.9025, - "step": 36940 - }, - { - "epoch": 4.15, - "learning_rate": 7.341631614750266e-06, - "loss": 0.867, - "step": 36960 - }, - { - "epoch": 4.16, - "learning_rate": 7.303711529314861e-06, - "loss": 0.877, - "step": 36980 - }, - { - "epoch": 4.16, - "learning_rate": 7.265881910068062e-06, - "loss": 0.8611, - "step": 37000 - }, - { - "epoch": 4.16, - "learning_rate": 7.228142837164404e-06, - "loss": 0.8895, - "step": 37020 - }, - { - "epoch": 4.16, - "learning_rate": 7.190494390566571e-06, - "loss": 0.9216, - "step": 37040 - }, - { - "epoch": 4.16, - "learning_rate": 7.152936650045245e-06, - "loss": 0.8817, - "step": 37060 - }, - { - "epoch": 4.17, - "learning_rate": 7.115469695178895e-06, - "loss": 0.8688, - "step": 37080 - }, - { - "epoch": 4.17, - "learning_rate": 7.078093605353642e-06, - "loss": 0.8903, - "step": 37100 - }, - { - "epoch": 4.17, - "learning_rate": 7.040808459763082e-06, - "loss": 0.8687, - "step": 37120 - }, - { - "epoch": 4.17, - "learning_rate": 7.003614337408099e-06, - "loss": 0.8761, - "step": 37140 - }, - { - "epoch": 4.18, - "learning_rate": 6.96651131709673e-06, - "loss": 0.88, - "step": 37160 - }, - { - "epoch": 4.18, - "learning_rate": 6.929499477443962e-06, - "loss": 0.8919, - "step": 37180 - }, - { - "epoch": 4.18, - "learning_rate": 6.892578896871643e-06, - "loss": 0.9064, - "step": 37200 - }, - { - "epoch": 4.18, - "learning_rate": 6.855749653608179e-06, - "loss": 0.8838, - "step": 37220 - }, - { - "epoch": 4.18, - "learning_rate": 6.819011825688498e-06, - "loss": 0.8945, - "step": 37240 - }, - { - "epoch": 4.19, - "learning_rate": 6.782365490953824e-06, - "loss": 0.8609, - "step": 37260 - }, - { - "epoch": 4.19, - "learning_rate": 6.745810727051521e-06, - "loss": 0.8978, - "step": 37280 - }, - { - "epoch": 4.19, - "learning_rate": 6.709347611434924e-06, - "loss": 0.8814, - "step": 37300 - }, - { - "epoch": 4.19, - "learning_rate": 6.672976221363186e-06, - "loss": 0.8896, - "step": 37320 - }, - { - "epoch": 4.2, - "learning_rate": 6.636696633901124e-06, - "loss": 0.9108, - "step": 37340 - }, - { - "epoch": 4.2, - "learning_rate": 6.600508925919008e-06, - "loss": 0.9018, - "step": 37360 - }, - { - "epoch": 4.2, - "learning_rate": 6.564413174092443e-06, - "loss": 0.9047, - "step": 37380 - }, - { - "epoch": 4.2, - "learning_rate": 6.528409454902235e-06, - "loss": 0.8608, - "step": 37400 - }, - { - "epoch": 4.2, - "learning_rate": 6.492497844634121e-06, - "loss": 0.8941, - "step": 37420 - }, - { - "epoch": 4.21, - "learning_rate": 6.4566784193787255e-06, - "loss": 0.8743, - "step": 37440 - }, - { - "epoch": 4.21, - "learning_rate": 6.4209512550313215e-06, - "loss": 0.8991, - "step": 37460 - }, - { - "epoch": 4.21, - "learning_rate": 6.38531642729171e-06, - "loss": 0.8968, - "step": 37480 - }, - { - "epoch": 4.21, - "learning_rate": 6.3497740116640396e-06, - "loss": 0.8719, - "step": 37500 - }, - { - "epoch": 4.22, - "learning_rate": 6.314324083456663e-06, - "loss": 0.9034, - "step": 37520 - }, - { - "epoch": 4.22, - "learning_rate": 6.2789667177819755e-06, - "loss": 0.8603, - "step": 37540 - }, - { - "epoch": 4.22, - "learning_rate": 6.2437019895561995e-06, - "loss": 0.9047, - "step": 37560 - }, - { - "epoch": 4.22, - "learning_rate": 6.208529973499316e-06, - "loss": 0.9057, - "step": 37580 - }, - { - "epoch": 4.22, - "learning_rate": 6.1734507441348785e-06, - "loss": 0.8938, - "step": 37600 - }, - { - "epoch": 4.23, - "learning_rate": 6.138464375789821e-06, - "loss": 0.8755, - "step": 37620 - }, - { - "epoch": 4.23, - "learning_rate": 6.1035709425943e-06, - "loss": 0.8896, - "step": 37640 - }, - { - "epoch": 4.23, - "learning_rate": 6.068770518481582e-06, - "loss": 0.8586, - "step": 37660 - }, - { - "epoch": 4.23, - "learning_rate": 6.034063177187865e-06, - "loss": 0.8803, - "step": 37680 - }, - { - "epoch": 4.24, - "learning_rate": 5.9994489922521155e-06, - "loss": 0.9121, - "step": 37700 - }, - { - "epoch": 4.24, - "learning_rate": 5.96492803701591e-06, - "loss": 0.8782, - "step": 37720 - }, - { - "epoch": 4.24, - "learning_rate": 5.9305003846233e-06, - "loss": 0.9016, - "step": 37740 - }, - { - "epoch": 4.24, - "learning_rate": 5.8961661080206454e-06, - "loss": 0.8687, - "step": 37760 - }, - { - "epoch": 4.24, - "learning_rate": 5.861925279956415e-06, - "loss": 0.885, - "step": 37780 - }, - { - "epoch": 4.25, - "learning_rate": 5.827777972981152e-06, - "loss": 0.853, - "step": 37800 - }, - { - "epoch": 4.25, - "learning_rate": 5.793724259447203e-06, - "loss": 0.8716, - "step": 37820 - }, - { - "epoch": 4.25, - "learning_rate": 5.759764211508578e-06, - "loss": 0.868, - "step": 37840 - }, - { - "epoch": 4.25, - "learning_rate": 5.7258979011208746e-06, - "loss": 0.8453, - "step": 37860 - }, - { - "epoch": 4.26, - "learning_rate": 5.692125400041049e-06, - "loss": 0.8788, - "step": 37880 - }, - { - "epoch": 4.26, - "learning_rate": 5.658446779827309e-06, - "loss": 0.8741, - "step": 37900 - }, - { - "epoch": 4.26, - "learning_rate": 5.624862111838919e-06, - "loss": 0.8998, - "step": 37920 - }, - { - "epoch": 4.26, - "learning_rate": 5.5913714672361065e-06, - "loss": 0.8717, - "step": 37940 - }, - { - "epoch": 4.27, - "learning_rate": 5.557974916979863e-06, - "loss": 0.8747, - "step": 37960 - }, - { - "epoch": 4.27, - "learning_rate": 5.5246725318317815e-06, - "loss": 0.8667, - "step": 37980 - }, - { - "epoch": 4.27, - "learning_rate": 5.491464382354e-06, - "loss": 0.8788, - "step": 38000 - }, - { - "epoch": 4.27, - "learning_rate": 5.458350538908946e-06, - "loss": 0.8948, - "step": 38020 - }, - { - "epoch": 4.27, - "learning_rate": 5.425331071659212e-06, - "loss": 0.8609, - "step": 38040 - }, - { - "epoch": 4.28, - "learning_rate": 5.392406050567455e-06, - "loss": 0.8659, - "step": 38060 - }, - { - "epoch": 4.28, - "learning_rate": 5.3595755453962115e-06, - "loss": 0.8705, - "step": 38080 - }, - { - "epoch": 4.28, - "learning_rate": 5.3268396257077465e-06, - "loss": 0.8849, - "step": 38100 - }, - { - "epoch": 4.28, - "learning_rate": 5.294198360863917e-06, - "loss": 0.8659, - "step": 38120 - }, - { - "epoch": 4.29, - "learning_rate": 5.26165182002602e-06, - "loss": 0.8674, - "step": 38140 - }, - { - "epoch": 4.29, - "learning_rate": 5.229200072154672e-06, - "loss": 0.8786, - "step": 38160 - }, - { - "epoch": 4.29, - "learning_rate": 5.196843186009581e-06, - "loss": 0.8893, - "step": 38180 - }, - { - "epoch": 4.29, - "learning_rate": 5.164581230149529e-06, - "loss": 0.899, - "step": 38200 - }, - { - "epoch": 4.29, - "learning_rate": 5.132414272932107e-06, - "loss": 0.8991, - "step": 38220 - }, - { - "epoch": 4.3, - "learning_rate": 5.100342382513662e-06, - "loss": 0.867, - "step": 38240 - }, - { - "epoch": 4.3, - "learning_rate": 5.068365626849058e-06, - "loss": 0.8965, - "step": 38260 - }, - { - "epoch": 4.3, - "learning_rate": 5.036484073691622e-06, - "loss": 0.9204, - "step": 38280 - }, - { - "epoch": 4.3, - "learning_rate": 5.004697790592961e-06, - "loss": 0.9037, - "step": 38300 - }, - { - "epoch": 4.31, - "learning_rate": 4.9730068449028e-06, - "loss": 0.8833, - "step": 38320 - }, - { - "epoch": 4.31, - "learning_rate": 4.941411303768889e-06, - "loss": 0.88, - "step": 38340 - }, - { - "epoch": 4.31, - "learning_rate": 4.90991123413681e-06, - "loss": 0.8873, - "step": 38360 - }, - { - "epoch": 4.31, - "learning_rate": 4.878506702749869e-06, - "loss": 0.8802, - "step": 38380 - }, - { - "epoch": 4.31, - "learning_rate": 4.847197776148932e-06, - "loss": 0.8771, - "step": 38400 - }, - { - "epoch": 4.32, - "learning_rate": 4.815984520672301e-06, - "loss": 0.8883, - "step": 38420 - }, - { - "epoch": 4.32, - "learning_rate": 4.784867002455584e-06, - "loss": 0.8629, - "step": 38440 - }, - { - "epoch": 4.32, - "learning_rate": 4.753845287431491e-06, - "loss": 0.8824, - "step": 38460 - }, - { - "epoch": 4.32, - "learning_rate": 4.722919441329782e-06, - "loss": 0.882, - "step": 38480 - }, - { - "epoch": 4.33, - "learning_rate": 4.692089529677074e-06, - "loss": 0.8704, - "step": 38500 - }, - { - "epoch": 4.33, - "learning_rate": 4.661355617796742e-06, - "loss": 0.8956, - "step": 38520 - }, - { - "epoch": 4.33, - "learning_rate": 4.630717770808696e-06, - "loss": 0.8867, - "step": 38540 - }, - { - "epoch": 4.33, - "learning_rate": 4.600176053629346e-06, - "loss": 0.8825, - "step": 38560 - }, - { - "epoch": 4.33, - "learning_rate": 4.569730530971411e-06, - "loss": 0.8755, - "step": 38580 - }, - { - "epoch": 4.34, - "learning_rate": 4.5393812673437844e-06, - "loss": 0.8778, - "step": 38600 - }, - { - "epoch": 4.34, - "learning_rate": 4.5091283270513985e-06, - "loss": 0.8921, - "step": 38620 - }, - { - "epoch": 4.34, - "learning_rate": 4.4789717741951065e-06, - "loss": 0.8814, - "step": 38640 - }, - { - "epoch": 4.34, - "learning_rate": 4.448911672671535e-06, - "loss": 0.8655, - "step": 38660 - }, - { - "epoch": 4.35, - "learning_rate": 4.418948086172914e-06, - "loss": 0.9148, - "step": 38680 - }, - { - "epoch": 4.35, - "learning_rate": 4.389081078186996e-06, - "loss": 0.8551, - "step": 38700 - }, - { - "epoch": 4.35, - "learning_rate": 4.359310711996939e-06, - "loss": 0.8897, - "step": 38720 - }, - { - "epoch": 4.35, - "learning_rate": 4.329637050681057e-06, - "loss": 0.859, - "step": 38740 - }, - { - "epoch": 4.36, - "learning_rate": 4.300060157112817e-06, - "loss": 0.8528, - "step": 38760 - }, - { - "epoch": 4.36, - "learning_rate": 4.270580093960641e-06, - "loss": 0.8661, - "step": 38780 - }, - { - "epoch": 4.36, - "learning_rate": 4.241196923687774e-06, - "loss": 0.8779, - "step": 38800 - }, - { - "epoch": 4.36, - "learning_rate": 4.2119107085521636e-06, - "loss": 0.8813, - "step": 38820 - }, - { - "epoch": 4.36, - "learning_rate": 4.18272151060633e-06, - "loss": 0.865, - "step": 38840 - }, - { - "epoch": 4.37, - "learning_rate": 4.153629391697244e-06, - "loss": 0.8753, - "step": 38860 - }, - { - "epoch": 4.37, - "learning_rate": 4.12463441346615e-06, - "loss": 0.9042, - "step": 38880 - }, - { - "epoch": 4.37, - "learning_rate": 4.09573663734848e-06, - "loss": 0.8962, - "step": 38900 - }, - { - "epoch": 4.37, - "learning_rate": 4.066936124573734e-06, - "loss": 0.8484, - "step": 38920 - }, - { - "epoch": 4.38, - "learning_rate": 4.03823293616532e-06, - "loss": 0.8848, - "step": 38940 - }, - { - "epoch": 4.38, - "learning_rate": 4.009627132940397e-06, - "loss": 0.8626, - "step": 38960 - }, - { - "epoch": 4.38, - "learning_rate": 3.981118775509812e-06, - "loss": 0.8792, - "step": 38980 - }, - { - "epoch": 4.38, - "learning_rate": 3.952707924277949e-06, - "loss": 0.8841, - "step": 39000 - }, - { - "epoch": 4.38, - "learning_rate": 3.9243946394425635e-06, - "loss": 0.8447, - "step": 39020 - }, - { - "epoch": 4.39, - "learning_rate": 3.896178980994714e-06, - "loss": 0.8927, - "step": 39040 - }, - { - "epoch": 4.39, - "learning_rate": 3.868061008718593e-06, - "loss": 0.8913, - "step": 39060 - }, - { - "epoch": 4.39, - "learning_rate": 3.840040782191401e-06, - "loss": 0.889, - "step": 39080 - }, - { - "epoch": 4.39, - "learning_rate": 3.8121183607832344e-06, - "loss": 0.869, - "step": 39100 - }, - { - "epoch": 4.4, - "learning_rate": 3.7842938036569854e-06, - "loss": 0.9043, - "step": 39120 - }, - { - "epoch": 4.4, - "learning_rate": 3.756567169768166e-06, - "loss": 0.8772, - "step": 39140 - }, - { - "epoch": 4.4, - "learning_rate": 3.728938517864794e-06, - "loss": 0.8899, - "step": 39160 - }, - { - "epoch": 4.4, - "learning_rate": 3.701407906487303e-06, - "loss": 0.8797, - "step": 39180 - }, - { - "epoch": 4.4, - "learning_rate": 3.6739753939683817e-06, - "loss": 0.8753, - "step": 39200 - }, - { - "epoch": 4.41, - "learning_rate": 3.6466410384328685e-06, - "loss": 0.8659, - "step": 39220 - }, - { - "epoch": 4.41, - "learning_rate": 3.6194048977976237e-06, - "loss": 0.875, - "step": 39240 - }, - { - "epoch": 4.41, - "learning_rate": 3.5922670297714124e-06, - "loss": 0.8816, - "step": 39260 - }, - { - "epoch": 4.41, - "learning_rate": 3.5652274918547724e-06, - "loss": 0.8792, - "step": 39280 - }, - { - "epoch": 4.42, - "learning_rate": 3.5382863413398694e-06, - "loss": 0.872, - "step": 39300 - }, - { - "epoch": 4.42, - "learning_rate": 3.5114436353104574e-06, - "loss": 0.8681, - "step": 39320 - }, - { - "epoch": 4.42, - "learning_rate": 3.4846994306416746e-06, - "loss": 0.855, - "step": 39340 - }, - { - "epoch": 4.42, - "learning_rate": 3.4580537839999374e-06, - "loss": 0.8619, - "step": 39360 - }, - { - "epoch": 4.42, - "learning_rate": 3.43150675184285e-06, - "loss": 0.8799, - "step": 39380 - }, - { - "epoch": 4.43, - "learning_rate": 3.405058390419069e-06, - "loss": 0.8626, - "step": 39400 - }, - { - "epoch": 4.43, - "learning_rate": 3.3787087557681895e-06, - "loss": 0.8733, - "step": 39420 - }, - { - "epoch": 4.43, - "learning_rate": 3.352457903720613e-06, - "loss": 0.8642, - "step": 39440 - }, - { - "epoch": 4.43, - "learning_rate": 3.326305889897435e-06, - "loss": 0.8841, - "step": 39460 - }, - { - "epoch": 4.44, - "learning_rate": 3.3002527697103435e-06, - "loss": 0.8708, - "step": 39480 - }, - { - "epoch": 4.44, - "learning_rate": 3.274298598361458e-06, - "loss": 0.896, - "step": 39500 - }, - { - "epoch": 4.44, - "learning_rate": 3.2484434308432843e-06, - "loss": 0.8654, - "step": 39520 - }, - { - "epoch": 4.44, - "learning_rate": 3.2226873219385224e-06, - "loss": 0.8616, - "step": 39540 - }, - { - "epoch": 4.44, - "learning_rate": 3.197030326220013e-06, - "loss": 0.8735, - "step": 39560 - }, - { - "epoch": 4.45, - "learning_rate": 3.1714724980505484e-06, - "loss": 0.8782, - "step": 39580 - }, - { - "epoch": 4.45, - "learning_rate": 3.146013891582844e-06, - "loss": 0.8788, - "step": 39600 - }, - { - "epoch": 4.45, - "learning_rate": 3.1206545607593616e-06, - "loss": 0.8564, - "step": 39620 - }, - { - "epoch": 4.45, - "learning_rate": 3.095394559312226e-06, - "loss": 0.8777, - "step": 39640 - }, - { - "epoch": 4.46, - "learning_rate": 3.070233940763084e-06, - "loss": 0.8988, - "step": 39660 - }, - { - "epoch": 4.46, - "learning_rate": 3.0451727584230207e-06, - "loss": 0.871, - "step": 39680 - }, - { - "epoch": 4.46, - "learning_rate": 3.020211065392431e-06, - "loss": 0.885, - "step": 39700 - }, - { - "epoch": 4.46, - "learning_rate": 2.995348914560897e-06, - "loss": 0.8604, - "step": 39720 - }, - { - "epoch": 4.47, - "learning_rate": 2.9705863586071027e-06, - "loss": 0.8535, - "step": 39740 - }, - { - "epoch": 4.47, - "learning_rate": 2.945923449998711e-06, - "loss": 0.9172, - "step": 39760 - }, - { - "epoch": 4.47, - "learning_rate": 2.921360240992216e-06, - "loss": 0.8873, - "step": 39780 - }, - { - "epoch": 4.47, - "learning_rate": 2.8968967836329077e-06, - "loss": 0.8798, - "step": 39800 - }, - { - "epoch": 4.47, - "learning_rate": 2.8725331297546786e-06, - "loss": 0.8649, - "step": 39820 - }, - { - "epoch": 4.48, - "learning_rate": 2.8482693309800136e-06, - "loss": 0.8986, - "step": 39840 - }, - { - "epoch": 4.48, - "learning_rate": 2.8241054387197487e-06, - "loss": 0.92, - "step": 39860 - }, - { - "epoch": 4.48, - "learning_rate": 2.8000415041730845e-06, - "loss": 0.8747, - "step": 39880 - }, - { - "epoch": 4.48, - "learning_rate": 2.7760775783274127e-06, - "loss": 0.8853, - "step": 39900 - }, - { - "epoch": 4.49, - "learning_rate": 2.7522137119582238e-06, - "loss": 0.8646, - "step": 39920 - }, - { - "epoch": 4.49, - "learning_rate": 2.7284499556290033e-06, - "loss": 0.8972, - "step": 39940 - }, - { - "epoch": 4.49, - "learning_rate": 2.7047863596911248e-06, - "loss": 0.8619, - "step": 39960 - }, - { - "epoch": 4.49, - "learning_rate": 2.681222974283715e-06, - "loss": 0.8512, - "step": 39980 - }, - { - "epoch": 4.49, - "learning_rate": 2.657759849333591e-06, - "loss": 0.868, - "step": 40000 - }, - { - "epoch": 4.5, - "learning_rate": 2.6343970345551363e-06, - "loss": 0.8721, - "step": 40020 - }, - { - "epoch": 4.5, - "learning_rate": 2.6111345794502027e-06, - "loss": 0.8967, - "step": 40040 - }, - { - "epoch": 4.5, - "learning_rate": 2.587972533307964e-06, - "loss": 0.8751, - "step": 40060 - }, - { - "epoch": 4.5, - "learning_rate": 2.5649109452048735e-06, - "loss": 0.8797, - "step": 40080 - }, - { - "epoch": 4.51, - "learning_rate": 2.541949864004528e-06, - "loss": 0.8924, - "step": 40100 - }, - { - "epoch": 4.51, - "learning_rate": 2.5190893383575498e-06, - "loss": 0.855, - "step": 40120 - }, - { - "epoch": 4.51, - "learning_rate": 2.4963294167015204e-06, - "loss": 0.8679, - "step": 40140 - }, - { - "epoch": 4.51, - "learning_rate": 2.473670147260848e-06, - "loss": 0.8751, - "step": 40160 - }, - { - "epoch": 4.51, - "learning_rate": 2.451111578046689e-06, - "loss": 0.8775, - "step": 40180 - }, - { - "epoch": 4.52, - "learning_rate": 2.4286537568568023e-06, - "loss": 0.8797, - "step": 40200 - }, - { - "epoch": 4.52, - "learning_rate": 2.4062967312755037e-06, - "loss": 0.8527, - "step": 40220 - }, - { - "epoch": 4.52, - "learning_rate": 2.3840405486735607e-06, - "loss": 0.8736, - "step": 40240 - }, - { - "epoch": 4.52, - "learning_rate": 2.3618852562080187e-06, - "loss": 0.8782, - "step": 40260 - }, - { - "epoch": 4.53, - "learning_rate": 2.339830900822193e-06, - "loss": 0.9045, - "step": 40280 - }, - { - "epoch": 4.53, - "learning_rate": 2.3178775292455214e-06, - "loss": 0.8947, - "step": 40300 - }, - { - "epoch": 4.53, - "learning_rate": 2.296025187993467e-06, - "loss": 0.8586, - "step": 40320 - }, - { - "epoch": 4.53, - "learning_rate": 2.274273923367437e-06, - "loss": 0.8814, - "step": 40340 - }, - { - "epoch": 4.53, - "learning_rate": 2.25262378145466e-06, - "loss": 0.8635, - "step": 40360 - }, - { - "epoch": 4.54, - "learning_rate": 2.231074808128131e-06, - "loss": 0.8544, - "step": 40380 - }, - { - "epoch": 4.54, - "learning_rate": 2.2096270490464476e-06, - "loss": 0.8556, - "step": 40400 - }, - { - "epoch": 4.54, - "learning_rate": 2.1882805496537705e-06, - "loss": 0.9008, - "step": 40420 - }, - { - "epoch": 4.54, - "learning_rate": 2.167035355179725e-06, - "loss": 0.8816, - "step": 40440 - }, - { - "epoch": 4.55, - "learning_rate": 2.1458915106392697e-06, - "loss": 0.8555, - "step": 40460 - }, - { - "epoch": 4.55, - "learning_rate": 2.1248490608326123e-06, - "loss": 0.8593, - "step": 40480 - }, - { - "epoch": 4.55, - "learning_rate": 2.1039080503451325e-06, - "loss": 0.8759, - "step": 40500 - }, - { - "epoch": 4.55, - "learning_rate": 2.083068523547288e-06, - "loss": 0.8855, - "step": 40520 - }, - { - "epoch": 4.56, - "learning_rate": 2.0623305245944913e-06, - "loss": 0.8577, - "step": 40540 - }, - { - "epoch": 4.56, - "learning_rate": 2.0416940974270384e-06, - "loss": 0.8959, - "step": 40560 - }, - { - "epoch": 4.56, - "learning_rate": 2.0211592857700303e-06, - "loss": 0.8824, - "step": 40580 - }, - { - "epoch": 4.56, - "learning_rate": 2.0007261331332404e-06, - "loss": 0.9085, - "step": 40600 - }, - { - "epoch": 4.56, - "learning_rate": 1.9803946828110375e-06, - "loss": 0.8982, - "step": 40620 - }, - { - "epoch": 4.57, - "learning_rate": 1.9601649778823337e-06, - "loss": 0.863, - "step": 40640 - }, - { - "epoch": 4.57, - "learning_rate": 1.940037061210437e-06, - "loss": 0.8765, - "step": 40660 - }, - { - "epoch": 4.57, - "learning_rate": 1.920010975442976e-06, - "loss": 0.8977, - "step": 40680 - }, - { - "epoch": 4.57, - "learning_rate": 1.9000867630118324e-06, - "loss": 0.8776, - "step": 40700 - }, - { - "epoch": 4.58, - "learning_rate": 1.8802644661330304e-06, - "loss": 0.862, - "step": 40720 - }, - { - "epoch": 4.58, - "learning_rate": 1.8605441268066625e-06, - "loss": 0.8683, - "step": 40740 - }, - { - "epoch": 4.58, - "learning_rate": 1.8409257868167718e-06, - "loss": 0.8883, - "step": 40760 - }, - { - "epoch": 4.58, - "learning_rate": 1.8214094877312849e-06, - "loss": 0.8897, - "step": 40780 - }, - { - "epoch": 4.58, - "learning_rate": 1.8019952709019404e-06, - "loss": 0.8771, - "step": 40800 - }, - { - "epoch": 4.59, - "learning_rate": 1.7826831774641617e-06, - "loss": 0.8785, - "step": 40820 - }, - { - "epoch": 4.59, - "learning_rate": 1.7634732483369943e-06, - "loss": 0.8497, - "step": 40840 - }, - { - "epoch": 4.59, - "learning_rate": 1.7443655242230238e-06, - "loss": 0.8961, - "step": 40860 - }, - { - "epoch": 4.59, - "learning_rate": 1.7253600456082764e-06, - "loss": 0.8995, - "step": 40880 - }, - { - "epoch": 4.6, - "learning_rate": 1.7064568527621228e-06, - "loss": 0.8858, - "step": 40900 - }, - { - "epoch": 4.6, - "learning_rate": 1.6876559857372243e-06, - "loss": 0.8618, - "step": 40920 - }, - { - "epoch": 4.6, - "learning_rate": 1.6689574843694433e-06, - "loss": 0.8773, - "step": 40940 - }, - { - "epoch": 4.6, - "learning_rate": 1.6503613882777101e-06, - "loss": 0.8614, - "step": 40960 - }, - { - "epoch": 4.6, - "learning_rate": 1.6318677368640067e-06, - "loss": 0.9059, - "step": 40980 - }, - { - "epoch": 4.61, - "learning_rate": 1.6134765693132382e-06, - "loss": 0.9023, - "step": 41000 - }, - { - "epoch": 4.61, - "learning_rate": 1.5951879245931723e-06, - "loss": 0.8583, - "step": 41020 - }, - { - "epoch": 4.61, - "learning_rate": 1.5770018414543342e-06, - "loss": 0.8845, - "step": 41040 - }, - { - "epoch": 4.61, - "learning_rate": 1.5589183584299504e-06, - "loss": 0.8818, - "step": 41060 - }, - { - "epoch": 4.62, - "learning_rate": 1.5409375138358663e-06, - "loss": 0.8754, - "step": 41080 - }, - { - "epoch": 4.62, - "learning_rate": 1.5230593457704168e-06, - "loss": 0.8513, - "step": 41100 - }, - { - "epoch": 4.62, - "learning_rate": 1.505283892114412e-06, - "loss": 0.8949, - "step": 41120 - }, - { - "epoch": 4.62, - "learning_rate": 1.4876111905310408e-06, - "loss": 0.8962, - "step": 41140 - }, - { - "epoch": 4.62, - "learning_rate": 1.4700412784657336e-06, - "loss": 0.8671, - "step": 41160 - }, - { - "epoch": 4.63, - "learning_rate": 1.4525741931461612e-06, - "loss": 0.8822, - "step": 41180 - }, - { - "epoch": 4.63, - "learning_rate": 1.4352099715821133e-06, - "loss": 0.8698, - "step": 41200 - }, - { - "epoch": 4.63, - "learning_rate": 1.4179486505654316e-06, - "loss": 0.8912, - "step": 41220 - }, - { - "epoch": 4.63, - "learning_rate": 1.4007902666699157e-06, - "loss": 0.9107, - "step": 41240 - }, - { - "epoch": 4.64, - "learning_rate": 1.3837348562512842e-06, - "loss": 0.8674, - "step": 41260 - }, - { - "epoch": 4.64, - "learning_rate": 1.3667824554470466e-06, - "loss": 0.8724, - "step": 41280 - }, - { - "epoch": 4.64, - "learning_rate": 1.3499331001764592e-06, - "loss": 0.8747, - "step": 41300 - }, - { - "epoch": 4.64, - "learning_rate": 1.3331868261404479e-06, - "loss": 0.8595, - "step": 41320 - }, - { - "epoch": 4.64, - "learning_rate": 1.316543668821535e-06, - "loss": 0.8836, - "step": 41340 - }, - { - "epoch": 4.65, - "learning_rate": 1.3000036634837398e-06, - "loss": 0.8632, - "step": 41360 - }, - { - "epoch": 4.65, - "learning_rate": 1.2835668451725292e-06, - "loss": 0.8968, - "step": 41380 - }, - { - "epoch": 4.65, - "learning_rate": 1.267233248714722e-06, - "loss": 0.8771, - "step": 41400 - }, - { - "epoch": 4.65, - "learning_rate": 1.251002908718446e-06, - "loss": 0.8829, - "step": 41420 - }, - { - "epoch": 4.66, - "learning_rate": 1.2348758595730314e-06, - "loss": 0.8628, - "step": 41440 - }, - { - "epoch": 4.66, - "learning_rate": 1.2188521354489613e-06, - "loss": 0.862, - "step": 41460 - }, - { - "epoch": 4.66, - "learning_rate": 1.2029317702977882e-06, - "loss": 0.8919, - "step": 41480 - }, - { - "epoch": 4.66, - "learning_rate": 1.187114797852068e-06, - "loss": 0.8733, - "step": 41500 - }, - { - "epoch": 4.67, - "learning_rate": 1.1714012516252648e-06, - "loss": 0.8723, - "step": 41520 - }, - { - "epoch": 4.67, - "learning_rate": 1.1557911649117293e-06, - "loss": 0.8721, - "step": 41540 - }, - { - "epoch": 4.67, - "learning_rate": 1.1402845707865928e-06, - "loss": 0.8917, - "step": 41560 - }, - { - "epoch": 4.67, - "learning_rate": 1.124881502105679e-06, - "loss": 0.871, - "step": 41580 - }, - { - "epoch": 4.67, - "learning_rate": 1.1095819915054872e-06, - "loss": 0.8686, - "step": 41600 - }, - { - "epoch": 4.68, - "learning_rate": 1.094386071403075e-06, - "loss": 0.8621, - "step": 41620 - }, - { - "epoch": 4.68, - "learning_rate": 1.0792937739960262e-06, - "loss": 0.8884, - "step": 41640 - }, - { - "epoch": 4.68, - "learning_rate": 1.0643051312623553e-06, - "loss": 0.8675, - "step": 41660 - }, - { - "epoch": 4.68, - "learning_rate": 1.0494201749604525e-06, - "loss": 0.8882, - "step": 41680 - }, - { - "epoch": 4.69, - "learning_rate": 1.0346389366290122e-06, - "loss": 0.8953, - "step": 41700 - }, - { - "epoch": 4.69, - "learning_rate": 1.0199614475869646e-06, - "loss": 0.8781, - "step": 41720 - }, - { - "epoch": 4.69, - "learning_rate": 1.0053877389334277e-06, - "loss": 0.8719, - "step": 41740 - }, - { - "epoch": 4.69, - "learning_rate": 9.909178415476116e-07, - "loss": 0.8847, - "step": 41760 - }, - { - "epoch": 4.69, - "learning_rate": 9.765517860887808e-07, - "loss": 0.8803, - "step": 41780 - }, - { - "epoch": 4.7, - "learning_rate": 9.62289602996158e-07, - "loss": 0.8687, - "step": 41800 - }, - { - "epoch": 4.7, - "learning_rate": 9.481313224888877e-07, - "loss": 0.8566, - "step": 41820 - }, - { - "epoch": 4.7, - "learning_rate": 9.340769745659672e-07, - "loss": 0.9047, - "step": 41840 - }, - { - "epoch": 4.7, - "learning_rate": 9.201265890061816e-07, - "loss": 0.8534, - "step": 41860 - }, - { - "epoch": 4.71, - "learning_rate": 9.062801953680145e-07, - "loss": 0.9076, - "step": 41880 - }, - { - "epoch": 4.71, - "learning_rate": 8.925378229896364e-07, - "loss": 0.9027, - "step": 41900 - }, - { - "epoch": 4.71, - "learning_rate": 8.788995009888002e-07, - "loss": 0.8772, - "step": 41920 - }, - { - "epoch": 4.71, - "learning_rate": 8.653652582627958e-07, - "loss": 0.8791, - "step": 41940 - }, - { - "epoch": 4.71, - "learning_rate": 8.519351234883787e-07, - "loss": 0.8871, - "step": 41960 - }, - { - "epoch": 4.72, - "learning_rate": 8.386091251217365e-07, - "loss": 0.8808, - "step": 41980 - }, - { - "epoch": 4.72, - "learning_rate": 8.253872913983884e-07, - "loss": 0.9046, - "step": 42000 - }, - { - "epoch": 4.72, - "learning_rate": 8.122696503331583e-07, - "loss": 0.9211, - "step": 42020 - }, - { - "epoch": 4.72, - "learning_rate": 7.992562297201023e-07, - "loss": 0.8927, - "step": 42040 - }, - { - "epoch": 4.73, - "learning_rate": 7.863470571324527e-07, - "loss": 0.8639, - "step": 42060 - }, - { - "epoch": 4.73, - "learning_rate": 7.735421599225467e-07, - "loss": 0.9165, - "step": 42080 - }, - { - "epoch": 4.73, - "learning_rate": 7.608415652217982e-07, - "loss": 0.8829, - "step": 42100 - }, - { - "epoch": 4.73, - "learning_rate": 7.482452999406087e-07, - "loss": 0.8938, - "step": 42120 - }, - { - "epoch": 4.73, - "learning_rate": 7.357533907683234e-07, - "loss": 0.8722, - "step": 42140 - }, - { - "epoch": 4.74, - "learning_rate": 7.23365864173181e-07, - "loss": 0.8738, - "step": 42160 - }, - { - "epoch": 4.74, - "learning_rate": 7.110827464022474e-07, - "loss": 0.8595, - "step": 42180 - }, - { - "epoch": 4.74, - "learning_rate": 6.989040634813648e-07, - "loss": 0.8597, - "step": 42200 - }, - { - "epoch": 4.74, - "learning_rate": 6.868298412150864e-07, - "loss": 0.9116, - "step": 42220 - }, - { - "epoch": 4.75, - "learning_rate": 6.748601051866532e-07, - "loss": 0.8424, - "step": 42240 - }, - { - "epoch": 4.75, - "learning_rate": 6.629948807579001e-07, - "loss": 0.8709, - "step": 42260 - }, - { - "epoch": 4.75, - "learning_rate": 6.512341930692167e-07, - "loss": 0.9012, - "step": 42280 - }, - { - "epoch": 4.75, - "learning_rate": 6.395780670395147e-07, - "loss": 0.869, - "step": 42300 - }, - { - "epoch": 4.76, - "learning_rate": 6.280265273661379e-07, - "loss": 0.8905, - "step": 42320 - }, - { - "epoch": 4.76, - "learning_rate": 6.165795985248413e-07, - "loss": 0.8933, - "step": 42340 - }, - { - "epoch": 4.76, - "learning_rate": 6.052373047697236e-07, - "loss": 0.8983, - "step": 42360 - }, - { - "epoch": 4.76, - "learning_rate": 5.939996701331884e-07, - "loss": 0.9021, - "step": 42380 - }, - { - "epoch": 4.76, - "learning_rate": 5.828667184258673e-07, - "loss": 0.889, - "step": 42400 - }, - { - "epoch": 4.77, - "learning_rate": 5.718384732365967e-07, - "loss": 0.8627, - "step": 42420 - }, - { - "epoch": 4.77, - "learning_rate": 5.609149579323513e-07, - "loss": 0.8778, - "step": 42440 - }, - { - "epoch": 4.77, - "learning_rate": 5.50096195658223e-07, - "loss": 0.8989, - "step": 42460 - }, - { - "epoch": 4.77, - "learning_rate": 5.393822093373135e-07, - "loss": 0.8895, - "step": 42480 - }, - { - "epoch": 4.78, - "learning_rate": 5.287730216707532e-07, - "loss": 0.8777, - "step": 42500 - }, - { - "epoch": 4.78, - "learning_rate": 5.182686551376048e-07, - "loss": 0.895, - "step": 42520 - }, - { - "epoch": 4.78, - "learning_rate": 5.07869131994837e-07, - "loss": 0.8736, - "step": 42540 - }, - { - "epoch": 4.78, - "learning_rate": 4.975744742772848e-07, - "loss": 0.8999, - "step": 42560 - }, - { - "epoch": 4.78, - "learning_rate": 4.873847037975665e-07, - "loss": 0.8983, - "step": 42580 - }, - { - "epoch": 4.79, - "learning_rate": 4.772998421460895e-07, - "loss": 0.8648, - "step": 42600 - }, - { - "epoch": 4.79, - "learning_rate": 4.6731991069094984e-07, - "loss": 0.8712, - "step": 42620 - }, - { - "epoch": 4.79, - "learning_rate": 4.574449305779327e-07, - "loss": 0.8866, - "step": 42640 - }, - { - "epoch": 4.79, - "learning_rate": 4.4767492273045665e-07, - "loss": 0.9028, - "step": 42660 - }, - { - "epoch": 4.8, - "learning_rate": 4.380099078495015e-07, - "loss": 0.88, - "step": 42680 - }, - { - "epoch": 4.8, - "learning_rate": 4.284499064135916e-07, - "loss": 0.8929, - "step": 42700 - }, - { - "epoch": 4.8, - "learning_rate": 4.189949386787462e-07, - "loss": 0.8876, - "step": 42720 - }, - { - "epoch": 4.8, - "learning_rate": 4.0964502467844e-07, - "loss": 0.9048, - "step": 42740 - }, - { - "epoch": 4.8, - "learning_rate": 4.0040018422355385e-07, - "loss": 0.8726, - "step": 42760 - }, - { - "epoch": 4.81, - "learning_rate": 3.9126043690234093e-07, - "loss": 0.8974, - "step": 42780 - }, - { - "epoch": 4.81, - "learning_rate": 3.82225802080366e-07, - "loss": 0.8804, - "step": 42800 - }, - { - "epoch": 4.81, - "learning_rate": 3.7329629890048846e-07, - "loss": 0.8947, - "step": 42820 - }, - { - "epoch": 4.81, - "learning_rate": 3.6447194628281276e-07, - "loss": 0.8923, - "step": 42840 - }, - { - "epoch": 4.82, - "learning_rate": 3.557527629246438e-07, - "loss": 0.8837, - "step": 42860 - }, - { - "epoch": 4.82, - "learning_rate": 3.471387673004534e-07, - "loss": 0.8695, - "step": 42880 - }, - { - "epoch": 4.82, - "learning_rate": 3.3862997766182515e-07, - "loss": 0.8827, - "step": 42900 - }, - { - "epoch": 4.82, - "learning_rate": 3.302264120374543e-07, - "loss": 0.8632, - "step": 42920 - }, - { - "epoch": 4.82, - "learning_rate": 3.219280882330644e-07, - "loss": 0.8822, - "step": 42940 - }, - { - "epoch": 4.83, - "learning_rate": 3.137350238313963e-07, - "loss": 0.9121, - "step": 42960 - }, - { - "epoch": 4.83, - "learning_rate": 3.0564723619215807e-07, - "loss": 0.9014, - "step": 42980 - }, - { - "epoch": 4.83, - "learning_rate": 2.9766474245200847e-07, - "loss": 0.8743, - "step": 43000 - }, - { - "epoch": 4.83, - "learning_rate": 2.8978755952448475e-07, - "loss": 0.8561, - "step": 43020 - }, - { - "epoch": 4.84, - "learning_rate": 2.8201570410000824e-07, - "loss": 0.8845, - "step": 43040 - }, - { - "epoch": 4.84, - "learning_rate": 2.74349192645823e-07, - "loss": 0.8676, - "step": 43060 - }, - { - "epoch": 4.84, - "learning_rate": 2.6678804140596315e-07, - "loss": 0.8449, - "step": 43080 - }, - { - "epoch": 4.84, - "learning_rate": 2.5933226640121875e-07, - "loss": 0.8725, - "step": 43100 - }, - { - "epoch": 4.84, - "learning_rate": 2.5198188342912543e-07, - "loss": 0.876, - "step": 43120 - }, - { - "epoch": 4.85, - "learning_rate": 2.447369080638806e-07, - "loss": 0.8781, - "step": 43140 - }, - { - "epoch": 4.85, - "learning_rate": 2.3759735565637155e-07, - "loss": 0.8812, - "step": 43160 - }, - { - "epoch": 4.85, - "learning_rate": 2.305632413340919e-07, - "loss": 0.8981, - "step": 43180 - }, - { - "epoch": 4.85, - "learning_rate": 2.236345800011308e-07, - "loss": 0.886, - "step": 43200 - }, - { - "epoch": 4.86, - "learning_rate": 2.1681138633816156e-07, - "loss": 0.8873, - "step": 43220 - }, - { - "epoch": 4.86, - "learning_rate": 2.1009367480235298e-07, - "loss": 0.8964, - "step": 43240 - }, - { - "epoch": 4.86, - "learning_rate": 2.0348145962740817e-07, - "loss": 0.8663, - "step": 43260 - }, - { - "epoch": 4.86, - "learning_rate": 1.9697475482349238e-07, - "loss": 0.868, - "step": 43280 - }, - { - "epoch": 4.87, - "learning_rate": 1.9057357417719968e-07, - "loss": 0.881, - "step": 43300 - }, - { - "epoch": 4.87, - "learning_rate": 1.8427793125154747e-07, - "loss": 0.8902, - "step": 43320 - }, - { - "epoch": 4.87, - "learning_rate": 1.7808783938593198e-07, - "loss": 0.8796, - "step": 43340 - }, - { - "epoch": 4.87, - "learning_rate": 1.7200331169611727e-07, - "loss": 0.8813, - "step": 43360 - }, - { - "epoch": 4.87, - "learning_rate": 1.6602436107417408e-07, - "loss": 0.8988, - "step": 43380 - }, - { - "epoch": 4.88, - "learning_rate": 1.6015100018849095e-07, - "loss": 0.8534, - "step": 43400 - }, - { - "epoch": 4.88, - "learning_rate": 1.5438324148371875e-07, - "loss": 0.8964, - "step": 43420 - }, - { - "epoch": 4.88, - "learning_rate": 1.487210971807651e-07, - "loss": 0.8934, - "step": 43440 - }, - { - "epoch": 4.88, - "learning_rate": 1.4316457927674444e-07, - "loss": 0.8765, - "step": 43460 - }, - { - "epoch": 4.89, - "learning_rate": 1.3771369954497793e-07, - "loss": 0.8458, - "step": 43480 - }, - { - "epoch": 4.89, - "learning_rate": 1.323684695349603e-07, - "loss": 0.8656, - "step": 43500 - }, - { - "epoch": 4.89, - "learning_rate": 1.2712890057232085e-07, - "loss": 0.8792, - "step": 43520 - }, - { - "epoch": 4.89, - "learning_rate": 1.2199500375881247e-07, - "loss": 0.8566, - "step": 43540 - }, - { - "epoch": 4.89, - "learning_rate": 1.1696678997230038e-07, - "loss": 0.8954, - "step": 43560 - }, - { - "epoch": 4.9, - "learning_rate": 1.1204426986671235e-07, - "loss": 0.8871, - "step": 43580 - }, - { - "epoch": 4.9, - "learning_rate": 1.0722745387203859e-07, - "loss": 0.875, - "step": 43600 - }, - { - "epoch": 4.9, - "learning_rate": 1.025163521942818e-07, - "loss": 0.8808, - "step": 43620 - }, - { - "epoch": 4.9, - "learning_rate": 9.791097481547939e-08, - "loss": 0.8818, - "step": 43640 - }, - { - "epoch": 4.91, - "learning_rate": 9.34113314936369e-08, - "loss": 0.884, - "step": 43660 - }, - { - "epoch": 4.91, - "learning_rate": 8.901743176273902e-08, - "loss": 0.8735, - "step": 43680 - }, - { - "epoch": 4.91, - "learning_rate": 8.472928493271082e-08, - "loss": 0.883, - "step": 43700 - }, - { - "epoch": 4.91, - "learning_rate": 8.054690008940657e-08, - "loss": 0.8736, - "step": 43720 - }, - { - "epoch": 4.91, - "learning_rate": 7.647028609459317e-08, - "loss": 0.8778, - "step": 43740 - }, - { - "epoch": 4.92, - "learning_rate": 7.249945158592231e-08, - "loss": 0.8444, - "step": 43760 - }, - { - "epoch": 4.92, - "learning_rate": 6.863440497691942e-08, - "loss": 0.8991, - "step": 43780 - }, - { - "epoch": 4.92, - "learning_rate": 6.487515445696146e-08, - "loss": 0.8613, - "step": 43800 - }, - { - "epoch": 4.92, - "learning_rate": 6.122170799126581e-08, - "loss": 0.8667, - "step": 43820 - }, - { - "epoch": 4.93, - "learning_rate": 5.7674073320856945e-08, - "loss": 0.886, - "step": 43840 - }, - { - "epoch": 4.93, - "learning_rate": 5.423225796257758e-08, - "loss": 0.9095, - "step": 43860 - }, - { - "epoch": 4.93, - "learning_rate": 5.089626920904422e-08, - "loss": 0.8674, - "step": 43880 - }, - { - "epoch": 4.93, - "learning_rate": 4.766611412865829e-08, - "loss": 0.9072, - "step": 43900 - }, - { - "epoch": 4.93, - "learning_rate": 4.4541799565567255e-08, - "loss": 0.8989, - "step": 43920 - }, - { - "epoch": 4.94, - "learning_rate": 4.1523332139664636e-08, - "loss": 0.885, - "step": 43940 - }, - { - "epoch": 4.94, - "learning_rate": 3.861071824656226e-08, - "loss": 0.8705, - "step": 43960 - }, - { - "epoch": 4.94, - "learning_rate": 3.5803964057606885e-08, - "loss": 0.8665, - "step": 43980 - }, - { - "epoch": 4.94, - "learning_rate": 3.310307551983027e-08, - "loss": 0.8746, - "step": 44000 - }, - { - "epoch": 4.95, - "learning_rate": 3.050805835594917e-08, - "loss": 0.8846, - "step": 44020 - }, - { - "epoch": 4.95, - "learning_rate": 2.8018918064376398e-08, - "loss": 0.8902, - "step": 44040 - }, - { - "epoch": 4.95, - "learning_rate": 2.563565991916539e-08, - "loss": 0.8422, - "step": 44060 - }, - { - "epoch": 4.95, - "learning_rate": 2.335828897004344e-08, - "loss": 0.8935, - "step": 44080 - }, - { - "epoch": 4.96, - "learning_rate": 2.1186810042372885e-08, - "loss": 0.8708, - "step": 44100 - }, - { - "epoch": 4.96, - "learning_rate": 1.912122773715108e-08, - "loss": 0.8663, - "step": 44120 - }, - { - "epoch": 4.96, - "learning_rate": 1.7161546430988217e-08, - "loss": 0.8972, - "step": 44140 - }, - { - "epoch": 4.96, - "learning_rate": 1.5307770276123956e-08, - "loss": 0.8774, - "step": 44160 - }, - { - "epoch": 4.96, - "learning_rate": 1.3559903200394131e-08, - "loss": 0.9105, - "step": 44180 - }, - { - "epoch": 4.97, - "learning_rate": 1.1917948907225196e-08, - "loss": 0.8849, - "step": 44200 - }, - { - "epoch": 4.97, - "learning_rate": 1.038191087565088e-08, - "loss": 0.8716, - "step": 44220 - }, - { - "epoch": 4.97, - "learning_rate": 8.951792360267775e-09, - "loss": 0.8811, - "step": 44240 - }, - { - "epoch": 4.97, - "learning_rate": 7.62759639125199e-09, - "loss": 0.8778, - "step": 44260 - }, - { - "epoch": 4.98, - "learning_rate": 6.409325774359154e-09, - "loss": 0.8776, - "step": 44280 - }, - { - "epoch": 4.98, - "learning_rate": 5.296983090891106e-09, - "loss": 0.8683, - "step": 44300 - }, - { - "epoch": 4.98, - "learning_rate": 4.2905706977181e-09, - "loss": 0.8893, - "step": 44320 - }, - { - "epoch": 4.98, - "learning_rate": 3.3900907272510497e-09, - "loss": 0.8707, - "step": 44340 - }, - { - "epoch": 4.98, - "learning_rate": 2.5955450874581845e-09, - "loss": 0.879, - "step": 44360 - }, - { - "epoch": 4.99, - "learning_rate": 1.90693546184284e-09, - "loss": 0.857, - "step": 44380 - }, - { - "epoch": 4.99, - "learning_rate": 1.3242633094545654e-09, - "loss": 0.8935, - "step": 44400 - }, - { - "epoch": 4.99, - "learning_rate": 8.475298648835672e-10, - "loss": 0.9124, - "step": 44420 - }, - { - "epoch": 4.99, - "learning_rate": 4.767361382329583e-10, - "loss": 0.9007, - "step": 44440 - }, - { - "epoch": 5.0, - "learning_rate": 2.1188291516316404e-10, - "loss": 0.8689, - "step": 44460 - }, - { - "epoch": 5.0, - "learning_rate": 5.297075684751463e-11, - "loss": 0.8644, - "step": 44480 - }, - { - "epoch": 5.0, - "learning_rate": 0.0, - "loss": 0.8527, - "step": 44500 - }, - { - "epoch": 5.0, - "step": 44500, - "total_flos": 1.4745575028791706e+19, - "train_loss": 0.9734652058462079, - "train_runtime": 41271.4588, - "train_samples_per_second": 17.251, - "train_steps_per_second": 1.078 - } - ], - "max_steps": 44500, - "num_train_epochs": 5, - "total_flos": 1.4745575028791706e+19, - "trial_name": null, - "trial_params": null -}