| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 39.92356514024432, | |
| "global_step": 585000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1e-05, | |
| "loss": 8.7125, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 2e-05, | |
| "loss": 4.7856, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3e-05, | |
| "loss": 3.6782, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4e-05, | |
| "loss": 3.2033, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 5e-05, | |
| "loss": 3.0057, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 6e-05, | |
| "loss": 2.8619, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 7e-05, | |
| "loss": 2.7523, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8e-05, | |
| "loss": 2.6541, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 9e-05, | |
| "loss": 2.6102, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0001, | |
| "loss": 2.5355, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 9.98989898989899e-05, | |
| "loss": 2.465, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.97979797979798e-05, | |
| "loss": 2.4072, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 9.96969696969697e-05, | |
| "loss": 2.361, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.95959595959596e-05, | |
| "loss": 2.3297, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.94949494949495e-05, | |
| "loss": 2.3137, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.939393939393939e-05, | |
| "loss": 2.2747, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 9.92929292929293e-05, | |
| "loss": 2.2714, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 9.919191919191919e-05, | |
| "loss": 2.2657, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 9.909090909090911e-05, | |
| "loss": 2.2708, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 9.8989898989899e-05, | |
| "loss": 2.2574, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 9.888888888888889e-05, | |
| "loss": 2.246, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 9.87878787878788e-05, | |
| "loss": 2.2485, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 9.868686868686869e-05, | |
| "loss": 2.2516, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 9.85858585858586e-05, | |
| "loss": 2.235, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 9.848484848484849e-05, | |
| "loss": 2.2303, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 9.838383838383838e-05, | |
| "loss": 2.2393, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 9.828282828282829e-05, | |
| "loss": 2.2356, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 9.818181818181818e-05, | |
| "loss": 2.2279, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 9.808080808080809e-05, | |
| "loss": 2.2222, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 9.797979797979798e-05, | |
| "loss": 2.2134, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 9.787878787878789e-05, | |
| "loss": 2.2069, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 9.777777777777778e-05, | |
| "loss": 2.2067, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 9.767676767676767e-05, | |
| "loss": 2.1958, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 9.757575757575758e-05, | |
| "loss": 2.1923, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 9.747474747474747e-05, | |
| "loss": 2.2106, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.737373737373738e-05, | |
| "loss": 2.2019, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 9.727272727272728e-05, | |
| "loss": 2.1868, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 9.717171717171718e-05, | |
| "loss": 2.1874, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 9.707070707070708e-05, | |
| "loss": 2.1788, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 9.696969696969698e-05, | |
| "loss": 2.1841, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 9.686868686868688e-05, | |
| "loss": 2.191, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 9.676767676767677e-05, | |
| "loss": 2.1849, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 9.666666666666667e-05, | |
| "loss": 2.1881, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 9.656565656565657e-05, | |
| "loss": 2.1755, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 9.646464646464647e-05, | |
| "loss": 2.1639, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 9.636363636363637e-05, | |
| "loss": 2.1548, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 9.626262626262627e-05, | |
| "loss": 2.1594, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 9.616161616161616e-05, | |
| "loss": 2.164, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 9.606060606060606e-05, | |
| "loss": 2.1616, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 9.595959595959596e-05, | |
| "loss": 2.151, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 9.585858585858586e-05, | |
| "loss": 2.1597, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 9.575757575757576e-05, | |
| "loss": 2.1545, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 9.565656565656566e-05, | |
| "loss": 2.1496, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 9.555555555555557e-05, | |
| "loss": 2.1531, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 9.545454545454546e-05, | |
| "loss": 2.1487, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 9.535353535353537e-05, | |
| "loss": 2.1546, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 9.525252525252526e-05, | |
| "loss": 2.1565, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 9.515151515151515e-05, | |
| "loss": 2.15, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 9.505050505050506e-05, | |
| "loss": 2.1505, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 9.494949494949495e-05, | |
| "loss": 2.1334, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 9.484848484848486e-05, | |
| "loss": 2.1271, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 9.474747474747475e-05, | |
| "loss": 2.1305, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 9.464646464646464e-05, | |
| "loss": 2.1287, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 9.454545454545455e-05, | |
| "loss": 2.1251, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 9.444444444444444e-05, | |
| "loss": 2.1274, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 9.434343434343435e-05, | |
| "loss": 2.1326, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 9.424242424242424e-05, | |
| "loss": 2.1264, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 9.414141414141415e-05, | |
| "loss": 2.1338, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 9.404040404040404e-05, | |
| "loss": 2.1246, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 9.393939393939395e-05, | |
| "loss": 2.1268, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 9.383838383838385e-05, | |
| "loss": 2.1307, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 9.373737373737375e-05, | |
| "loss": 2.1208, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 9.363636363636364e-05, | |
| "loss": 2.1299, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 9.353535353535354e-05, | |
| "loss": 2.12, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 9.343434343434344e-05, | |
| "loss": 2.11, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 9.333333333333334e-05, | |
| "loss": 2.1066, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 9.323232323232324e-05, | |
| "loss": 2.1105, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "learning_rate": 9.313131313131314e-05, | |
| "loss": 2.1213, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 9.303030303030303e-05, | |
| "loss": 2.1217, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 5.46, | |
| "learning_rate": 9.292929292929293e-05, | |
| "loss": 2.1217, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 9.282828282828283e-05, | |
| "loss": 2.1148, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 9.272727272727273e-05, | |
| "loss": 2.1155, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 9.262626262626263e-05, | |
| "loss": 2.1148, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 9.252525252525253e-05, | |
| "loss": 2.122, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 9.242424242424242e-05, | |
| "loss": 2.124, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 9.232323232323232e-05, | |
| "loss": 2.1017, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 9.222222222222223e-05, | |
| "loss": 2.109, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 9.212121212121214e-05, | |
| "loss": 2.1146, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 9.202020202020203e-05, | |
| "loss": 2.0845, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 6.14, | |
| "learning_rate": 9.191919191919192e-05, | |
| "loss": 2.0992, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 9.181818181818183e-05, | |
| "loss": 2.0965, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 9.171717171717172e-05, | |
| "loss": 2.0959, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 9.161616161616163e-05, | |
| "loss": 2.1125, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 9.151515151515152e-05, | |
| "loss": 2.1012, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 9.141414141414141e-05, | |
| "loss": 2.0981, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 9.131313131313132e-05, | |
| "loss": 2.1021, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 9.121212121212121e-05, | |
| "loss": 2.0956, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 9.111111111111112e-05, | |
| "loss": 2.0971, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 9.101010101010101e-05, | |
| "loss": 2.0998, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 9.090909090909092e-05, | |
| "loss": 2.0961, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 9.080808080808081e-05, | |
| "loss": 2.1015, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 9.07070707070707e-05, | |
| "loss": 2.1087, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 9.060606060606061e-05, | |
| "loss": 2.086, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 9.050505050505052e-05, | |
| "loss": 2.0822, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 9.040404040404041e-05, | |
| "loss": 2.0788, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "learning_rate": 9.030303030303031e-05, | |
| "loss": 2.0808, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 9.02020202020202e-05, | |
| "loss": 2.0758, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 9.010101010101011e-05, | |
| "loss": 2.0863, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 9e-05, | |
| "loss": 2.0818, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 8.98989898989899e-05, | |
| "loss": 2.081, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 8.97979797979798e-05, | |
| "loss": 2.0777, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "learning_rate": 8.96969696969697e-05, | |
| "loss": 2.0833, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 8.95959595959596e-05, | |
| "loss": 2.0804, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 8.94949494949495e-05, | |
| "loss": 2.083, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 8.93939393939394e-05, | |
| "loss": 2.0699, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 8.92929292929293e-05, | |
| "loss": 2.078, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 8.919191919191919e-05, | |
| "loss": 2.085, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 8.90909090909091e-05, | |
| "loss": 2.0635, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 8.898989898989899e-05, | |
| "loss": 2.0554, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 2.0632, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 8.87878787878788e-05, | |
| "loss": 2.0563, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 8.868686868686869e-05, | |
| "loss": 2.0661, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 8.85858585858586e-05, | |
| "loss": 2.0752, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 8.848484848484849e-05, | |
| "loss": 2.0623, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 8.83838383838384e-05, | |
| "loss": 2.0757, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 8.828282828282829e-05, | |
| "loss": 2.0676, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 8.818181818181818e-05, | |
| "loss": 2.0663, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 8.808080808080809e-05, | |
| "loss": 2.0558, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 8.797979797979798e-05, | |
| "loss": 2.0652, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "learning_rate": 8.787878787878789e-05, | |
| "loss": 2.0695, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 8.777777777777778e-05, | |
| "loss": 2.0697, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 8.767676767676767e-05, | |
| "loss": 2.0678, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 8.757575757575758e-05, | |
| "loss": 2.0623, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 8.747474747474747e-05, | |
| "loss": 2.0422, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 9.21, | |
| "learning_rate": 8.737373737373738e-05, | |
| "loss": 2.0517, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 8.727272727272727e-05, | |
| "loss": 2.049, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 8.717171717171718e-05, | |
| "loss": 2.0583, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 8.707070707070707e-05, | |
| "loss": 2.0535, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "learning_rate": 8.696969696969698e-05, | |
| "loss": 2.0547, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 8.686868686868688e-05, | |
| "loss": 2.0537, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "learning_rate": 8.676767676767678e-05, | |
| "loss": 2.0546, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 8.666666666666667e-05, | |
| "loss": 2.047, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 8.656565656565657e-05, | |
| "loss": 2.0523, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 8.646464646464647e-05, | |
| "loss": 2.0412, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 8.636363636363637e-05, | |
| "loss": 2.0561, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 8.626262626262627e-05, | |
| "loss": 2.0521, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "learning_rate": 8.616161616161616e-05, | |
| "loss": 2.0443, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 8.606060606060606e-05, | |
| "loss": 2.0359, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 10.17, | |
| "learning_rate": 8.595959595959596e-05, | |
| "loss": 2.0426, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "learning_rate": 8.585858585858586e-05, | |
| "loss": 2.032, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 8.575757575757576e-05, | |
| "loss": 2.0389, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "learning_rate": 8.565656565656566e-05, | |
| "loss": 2.0431, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 10.44, | |
| "learning_rate": 8.555555555555556e-05, | |
| "loss": 2.0374, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "learning_rate": 8.545454545454545e-05, | |
| "loss": 2.0466, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 10.58, | |
| "learning_rate": 8.535353535353535e-05, | |
| "loss": 2.0332, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 8.525252525252526e-05, | |
| "loss": 2.0467, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "learning_rate": 8.515151515151515e-05, | |
| "loss": 2.0389, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 10.78, | |
| "learning_rate": 8.505050505050506e-05, | |
| "loss": 2.0432, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 8.494949494949495e-05, | |
| "loss": 2.0386, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 10.92, | |
| "learning_rate": 8.484848484848486e-05, | |
| "loss": 2.0393, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 8.474747474747475e-05, | |
| "loss": 2.0373, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "learning_rate": 8.464646464646466e-05, | |
| "loss": 2.0287, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "learning_rate": 8.454545454545455e-05, | |
| "loss": 2.0278, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 11.19, | |
| "learning_rate": 8.444444444444444e-05, | |
| "loss": 2.0278, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 11.26, | |
| "learning_rate": 8.434343434343435e-05, | |
| "loss": 2.0263, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "learning_rate": 8.424242424242424e-05, | |
| "loss": 2.029, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "learning_rate": 8.414141414141415e-05, | |
| "loss": 2.0313, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 11.47, | |
| "learning_rate": 8.404040404040404e-05, | |
| "loss": 2.0263, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 11.53, | |
| "learning_rate": 8.393939393939393e-05, | |
| "loss": 2.0403, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "learning_rate": 8.383838383838384e-05, | |
| "loss": 2.0248, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 8.373737373737373e-05, | |
| "loss": 2.0308, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "learning_rate": 8.363636363636364e-05, | |
| "loss": 2.0331, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 11.81, | |
| "learning_rate": 8.353535353535355e-05, | |
| "loss": 2.0219, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "learning_rate": 8.343434343434344e-05, | |
| "loss": 2.0305, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 11.94, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 2.0285, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "learning_rate": 8.323232323232324e-05, | |
| "loss": 2.0276, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "learning_rate": 8.313131313131314e-05, | |
| "loss": 2.0082, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "learning_rate": 8.303030303030304e-05, | |
| "loss": 2.0207, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 12.22, | |
| "learning_rate": 8.292929292929293e-05, | |
| "loss": 2.0116, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 12.28, | |
| "learning_rate": 8.282828282828283e-05, | |
| "loss": 2.017, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "learning_rate": 8.272727272727273e-05, | |
| "loss": 2.0176, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "learning_rate": 8.262626262626263e-05, | |
| "loss": 2.0143, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "learning_rate": 8.252525252525253e-05, | |
| "loss": 2.0222, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "learning_rate": 8.242424242424243e-05, | |
| "loss": 2.0214, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 12.63, | |
| "learning_rate": 8.232323232323233e-05, | |
| "loss": 2.0192, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 12.69, | |
| "learning_rate": 8.222222222222222e-05, | |
| "loss": 2.0139, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 12.76, | |
| "learning_rate": 8.212121212121212e-05, | |
| "loss": 2.0328, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 8.202020202020202e-05, | |
| "loss": 2.0146, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 8.191919191919192e-05, | |
| "loss": 2.0265, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "learning_rate": 8.181818181818183e-05, | |
| "loss": 2.0229, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "learning_rate": 8.171717171717172e-05, | |
| "loss": 2.0149, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 13.1, | |
| "learning_rate": 8.161616161616163e-05, | |
| "loss": 1.998, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 13.17, | |
| "learning_rate": 8.151515151515152e-05, | |
| "loss": 2.0079, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 13.24, | |
| "learning_rate": 8.141414141414141e-05, | |
| "loss": 2.0109, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 13.31, | |
| "learning_rate": 8.131313131313132e-05, | |
| "loss": 2.0047, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 13.38, | |
| "learning_rate": 8.121212121212121e-05, | |
| "loss": 2.0052, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 13.44, | |
| "learning_rate": 8.111111111111112e-05, | |
| "loss": 2.0011, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 13.51, | |
| "learning_rate": 8.101010101010101e-05, | |
| "loss": 2.0056, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 13.58, | |
| "learning_rate": 8.090909090909092e-05, | |
| "loss": 2.0019, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 13.65, | |
| "learning_rate": 8.080808080808081e-05, | |
| "loss": 2.0061, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 13.72, | |
| "learning_rate": 8.07070707070707e-05, | |
| "loss": 2.0082, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 13.79, | |
| "learning_rate": 8.060606060606061e-05, | |
| "loss": 2.0041, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "learning_rate": 8.05050505050505e-05, | |
| "loss": 2.0162, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 13.92, | |
| "learning_rate": 8.040404040404041e-05, | |
| "loss": 2.0059, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "learning_rate": 8.03030303030303e-05, | |
| "loss": 2.0168, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 14.06, | |
| "learning_rate": 8.02020202020202e-05, | |
| "loss": 1.9914, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 14.13, | |
| "learning_rate": 8.010101010101011e-05, | |
| "loss": 1.9906, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 14.2, | |
| "learning_rate": 8e-05, | |
| "loss": 1.9841, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 14.26, | |
| "learning_rate": 7.989898989898991e-05, | |
| "loss": 2.0038, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 14.33, | |
| "learning_rate": 7.97979797979798e-05, | |
| "loss": 1.9909, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 7.96969696969697e-05, | |
| "loss": 1.9961, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 14.47, | |
| "learning_rate": 7.95959595959596e-05, | |
| "loss": 1.9962, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 14.54, | |
| "learning_rate": 7.94949494949495e-05, | |
| "loss": 2.003, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 14.6, | |
| "learning_rate": 7.93939393939394e-05, | |
| "loss": 1.9987, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 7.92929292929293e-05, | |
| "loss": 2.004, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 14.74, | |
| "learning_rate": 7.919191919191919e-05, | |
| "loss": 1.9994, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 14.81, | |
| "learning_rate": 7.90909090909091e-05, | |
| "loss": 2.001, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 14.88, | |
| "learning_rate": 7.898989898989899e-05, | |
| "loss": 1.998, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "learning_rate": 7.88888888888889e-05, | |
| "loss": 2.0076, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 15.01, | |
| "learning_rate": 7.878787878787879e-05, | |
| "loss": 2.0047, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 15.08, | |
| "learning_rate": 7.868686868686869e-05, | |
| "loss": 1.9923, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 15.15, | |
| "learning_rate": 7.858585858585859e-05, | |
| "loss": 1.9828, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "learning_rate": 7.848484848484848e-05, | |
| "loss": 1.9744, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 15.29, | |
| "learning_rate": 7.83838383838384e-05, | |
| "loss": 1.9851, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "learning_rate": 7.828282828282829e-05, | |
| "loss": 1.9855, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "learning_rate": 7.818181818181818e-05, | |
| "loss": 1.994, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 15.49, | |
| "learning_rate": 7.808080808080809e-05, | |
| "loss": 1.9845, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 15.56, | |
| "learning_rate": 7.797979797979798e-05, | |
| "loss": 1.9871, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 15.63, | |
| "learning_rate": 7.787878787878789e-05, | |
| "loss": 1.9895, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "learning_rate": 7.777777777777778e-05, | |
| "loss": 2.0003, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 15.76, | |
| "learning_rate": 7.767676767676769e-05, | |
| "loss": 2.0049, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 15.83, | |
| "learning_rate": 7.757575757575758e-05, | |
| "loss": 1.9929, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 15.9, | |
| "learning_rate": 7.747474747474747e-05, | |
| "loss": 1.9928, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 15.97, | |
| "learning_rate": 7.737373737373738e-05, | |
| "loss": 1.9938, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 16.04, | |
| "learning_rate": 7.727272727272727e-05, | |
| "loss": 1.9929, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 16.11, | |
| "learning_rate": 7.717171717171718e-05, | |
| "loss": 1.9688, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 16.17, | |
| "learning_rate": 7.707070707070707e-05, | |
| "loss": 1.9788, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 16.24, | |
| "learning_rate": 7.696969696969696e-05, | |
| "loss": 1.9745, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 16.31, | |
| "learning_rate": 7.686868686868687e-05, | |
| "loss": 1.9982, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 16.38, | |
| "learning_rate": 7.676767676767676e-05, | |
| "loss": 1.9925, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "learning_rate": 7.666666666666667e-05, | |
| "loss": 1.9876, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "learning_rate": 7.656565656565658e-05, | |
| "loss": 1.9761, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 16.58, | |
| "learning_rate": 7.646464646464647e-05, | |
| "loss": 1.9833, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 16.65, | |
| "learning_rate": 7.636363636363637e-05, | |
| "loss": 1.9863, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 16.72, | |
| "learning_rate": 7.626262626262627e-05, | |
| "loss": 1.9841, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 16.79, | |
| "learning_rate": 7.616161616161617e-05, | |
| "loss": 1.9776, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "learning_rate": 7.606060606060607e-05, | |
| "loss": 1.9947, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "learning_rate": 7.595959595959596e-05, | |
| "loss": 1.9729, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "learning_rate": 7.585858585858586e-05, | |
| "loss": 1.9813, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 17.06, | |
| "learning_rate": 7.575757575757576e-05, | |
| "loss": 1.9689, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 17.13, | |
| "learning_rate": 7.565656565656566e-05, | |
| "loss": 1.9626, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "learning_rate": 7.555555555555556e-05, | |
| "loss": 1.9661, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 17.27, | |
| "learning_rate": 7.545454545454545e-05, | |
| "loss": 1.9777, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "learning_rate": 7.535353535353536e-05, | |
| "loss": 1.9788, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 17.4, | |
| "learning_rate": 7.525252525252525e-05, | |
| "loss": 1.9721, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 17.47, | |
| "learning_rate": 7.515151515151515e-05, | |
| "loss": 1.9779, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 17.54, | |
| "learning_rate": 7.505050505050505e-05, | |
| "loss": 1.9763, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "learning_rate": 7.494949494949495e-05, | |
| "loss": 1.9768, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 17.68, | |
| "learning_rate": 7.484848484848486e-05, | |
| "loss": 1.9626, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 17.74, | |
| "learning_rate": 7.474747474747475e-05, | |
| "loss": 1.9703, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 17.81, | |
| "learning_rate": 7.464646464646466e-05, | |
| "loss": 1.9747, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 17.88, | |
| "learning_rate": 7.454545454545455e-05, | |
| "loss": 1.9698, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "learning_rate": 7.444444444444444e-05, | |
| "loss": 1.982, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "learning_rate": 7.434343434343435e-05, | |
| "loss": 1.9721, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 18.09, | |
| "learning_rate": 7.424242424242424e-05, | |
| "loss": 1.9554, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "learning_rate": 7.414141414141415e-05, | |
| "loss": 1.9615, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 18.22, | |
| "learning_rate": 7.404040404040404e-05, | |
| "loss": 1.9592, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 18.29, | |
| "learning_rate": 7.393939393939395e-05, | |
| "loss": 1.9548, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 18.36, | |
| "learning_rate": 7.383838383838384e-05, | |
| "loss": 1.9535, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 18.43, | |
| "learning_rate": 7.373737373737373e-05, | |
| "loss": 1.9606, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 18.49, | |
| "learning_rate": 7.363636363636364e-05, | |
| "loss": 1.9567, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 18.56, | |
| "learning_rate": 7.353535353535353e-05, | |
| "loss": 1.9641, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 18.63, | |
| "learning_rate": 7.343434343434344e-05, | |
| "loss": 1.9645, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "learning_rate": 7.333333333333333e-05, | |
| "loss": 1.9537, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 18.77, | |
| "learning_rate": 7.323232323232324e-05, | |
| "loss": 1.9603, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 18.84, | |
| "learning_rate": 7.313131313131314e-05, | |
| "loss": 1.9613, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 18.9, | |
| "learning_rate": 7.303030303030304e-05, | |
| "loss": 1.9673, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 18.97, | |
| "learning_rate": 7.292929292929293e-05, | |
| "loss": 1.9685, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 19.04, | |
| "learning_rate": 7.282828282828284e-05, | |
| "loss": 1.953, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 19.11, | |
| "learning_rate": 7.272727272727273e-05, | |
| "loss": 1.9428, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 19.18, | |
| "learning_rate": 7.262626262626263e-05, | |
| "loss": 1.9426, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 19.25, | |
| "learning_rate": 7.252525252525253e-05, | |
| "loss": 1.9503, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 19.31, | |
| "learning_rate": 7.242424242424243e-05, | |
| "loss": 1.9517, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 19.38, | |
| "learning_rate": 7.232323232323233e-05, | |
| "loss": 1.9556, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 19.45, | |
| "learning_rate": 7.222222222222222e-05, | |
| "loss": 1.9592, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 19.52, | |
| "learning_rate": 7.212121212121213e-05, | |
| "loss": 1.9499, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 19.59, | |
| "learning_rate": 7.202020202020202e-05, | |
| "loss": 1.9604, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 19.65, | |
| "learning_rate": 7.191919191919192e-05, | |
| "loss": 1.9599, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 7.181818181818182e-05, | |
| "loss": 1.9466, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "learning_rate": 7.171717171717171e-05, | |
| "loss": 1.9581, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 19.86, | |
| "learning_rate": 7.161616161616162e-05, | |
| "loss": 1.9576, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 19.93, | |
| "learning_rate": 7.151515151515152e-05, | |
| "loss": 1.9606, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 7.141414141414143e-05, | |
| "loss": 1.9705, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 20.06, | |
| "learning_rate": 7.131313131313132e-05, | |
| "loss": 1.9444, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 20.13, | |
| "learning_rate": 7.121212121212121e-05, | |
| "loss": 1.939, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 20.2, | |
| "learning_rate": 7.111111111111112e-05, | |
| "loss": 1.9402, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 20.27, | |
| "learning_rate": 7.101010101010101e-05, | |
| "loss": 1.9451, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 20.34, | |
| "learning_rate": 7.090909090909092e-05, | |
| "loss": 1.9454, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 20.41, | |
| "learning_rate": 7.080808080808081e-05, | |
| "loss": 1.9405, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 20.47, | |
| "learning_rate": 7.07070707070707e-05, | |
| "loss": 1.944, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 20.54, | |
| "learning_rate": 7.060606060606061e-05, | |
| "loss": 1.948, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 20.61, | |
| "learning_rate": 7.05050505050505e-05, | |
| "loss": 1.9418, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 20.68, | |
| "learning_rate": 7.040404040404041e-05, | |
| "loss": 1.9508, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "learning_rate": 7.03030303030303e-05, | |
| "loss": 1.9472, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 20.81, | |
| "learning_rate": 7.020202020202021e-05, | |
| "loss": 1.9414, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 20.88, | |
| "learning_rate": 7.01010101010101e-05, | |
| "loss": 1.946, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 20.95, | |
| "learning_rate": 7e-05, | |
| "loss": 1.9558, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 21.02, | |
| "learning_rate": 6.98989898989899e-05, | |
| "loss": 1.9465, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 21.09, | |
| "learning_rate": 6.97979797979798e-05, | |
| "loss": 1.9275, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 21.16, | |
| "learning_rate": 6.96969696969697e-05, | |
| "loss": 1.9293, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 21.22, | |
| "learning_rate": 6.95959595959596e-05, | |
| "loss": 1.9273, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 21.29, | |
| "learning_rate": 6.94949494949495e-05, | |
| "loss": 1.9307, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 21.36, | |
| "learning_rate": 6.93939393939394e-05, | |
| "loss": 1.9341, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 21.43, | |
| "learning_rate": 6.92929292929293e-05, | |
| "loss": 1.9504, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "learning_rate": 6.91919191919192e-05, | |
| "loss": 1.9366, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 21.57, | |
| "learning_rate": 6.90909090909091e-05, | |
| "loss": 1.9418, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 21.63, | |
| "learning_rate": 6.898989898989899e-05, | |
| "loss": 1.9473, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 21.7, | |
| "learning_rate": 6.88888888888889e-05, | |
| "loss": 1.9403, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 21.77, | |
| "learning_rate": 6.878787878787879e-05, | |
| "loss": 1.9409, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 21.84, | |
| "learning_rate": 6.86868686868687e-05, | |
| "loss": 1.9383, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 21.91, | |
| "learning_rate": 6.858585858585859e-05, | |
| "loss": 1.9433, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 21.98, | |
| "learning_rate": 6.848484848484848e-05, | |
| "loss": 1.9449, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 22.04, | |
| "learning_rate": 6.838383838383839e-05, | |
| "loss": 1.9287, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 22.11, | |
| "learning_rate": 6.828282828282828e-05, | |
| "loss": 1.9251, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 22.18, | |
| "learning_rate": 6.818181818181818e-05, | |
| "loss": 1.943, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 22.25, | |
| "learning_rate": 6.808080808080809e-05, | |
| "loss": 1.9316, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "learning_rate": 6.797979797979798e-05, | |
| "loss": 1.935, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 22.38, | |
| "learning_rate": 6.787878787878789e-05, | |
| "loss": 1.9294, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 22.45, | |
| "learning_rate": 6.777777777777778e-05, | |
| "loss": 1.9458, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "learning_rate": 6.767676767676769e-05, | |
| "loss": 1.9334, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 22.59, | |
| "learning_rate": 6.757575757575758e-05, | |
| "loss": 1.9595, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 22.66, | |
| "learning_rate": 6.747474747474747e-05, | |
| "loss": 1.9662, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 22.73, | |
| "learning_rate": 6.737373737373738e-05, | |
| "loss": 1.9576, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 22.79, | |
| "learning_rate": 6.727272727272727e-05, | |
| "loss": 1.9432, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "learning_rate": 6.717171717171718e-05, | |
| "loss": 1.933, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 22.93, | |
| "learning_rate": 6.707070707070707e-05, | |
| "loss": 1.948, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 6.696969696969696e-05, | |
| "loss": 1.9438, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 23.07, | |
| "learning_rate": 6.686868686868687e-05, | |
| "loss": 1.9149, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 23.14, | |
| "learning_rate": 6.676767676767676e-05, | |
| "loss": 1.9162, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 1.9097, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 23.27, | |
| "learning_rate": 6.656565656565656e-05, | |
| "loss": 1.9195, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 23.34, | |
| "learning_rate": 6.646464646464647e-05, | |
| "loss": 1.925, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 23.41, | |
| "learning_rate": 6.636363636363638e-05, | |
| "loss": 1.9224, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 23.48, | |
| "learning_rate": 6.626262626262627e-05, | |
| "loss": 1.9144, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 23.54, | |
| "learning_rate": 6.616161616161617e-05, | |
| "loss": 1.9156, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 23.61, | |
| "learning_rate": 6.606060606060607e-05, | |
| "loss": 1.9179, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 23.68, | |
| "learning_rate": 6.595959595959596e-05, | |
| "loss": 1.9291, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 23.75, | |
| "learning_rate": 6.585858585858587e-05, | |
| "loss": 1.9177, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 23.82, | |
| "learning_rate": 6.575757575757576e-05, | |
| "loss": 1.92, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 23.89, | |
| "learning_rate": 6.565656565656566e-05, | |
| "loss": 1.9278, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 23.95, | |
| "learning_rate": 6.555555555555556e-05, | |
| "loss": 1.927, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "learning_rate": 6.545454545454546e-05, | |
| "loss": 1.9263, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 24.09, | |
| "learning_rate": 6.535353535353536e-05, | |
| "loss": 1.9008, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 24.16, | |
| "learning_rate": 6.525252525252525e-05, | |
| "loss": 1.9071, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 24.23, | |
| "learning_rate": 6.515151515151516e-05, | |
| "loss": 1.9203, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "learning_rate": 6.505050505050505e-05, | |
| "loss": 1.903, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 24.36, | |
| "learning_rate": 6.494949494949495e-05, | |
| "loss": 1.907, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 24.43, | |
| "learning_rate": 6.484848484848485e-05, | |
| "loss": 1.9137, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 24.5, | |
| "learning_rate": 6.474747474747474e-05, | |
| "loss": 1.9095, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 24.57, | |
| "learning_rate": 6.464646464646466e-05, | |
| "loss": 1.908, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "learning_rate": 6.454545454545455e-05, | |
| "loss": 1.9124, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 24.7, | |
| "learning_rate": 6.444444444444446e-05, | |
| "loss": 1.9199, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 24.77, | |
| "learning_rate": 6.434343434343435e-05, | |
| "loss": 1.9144, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 24.84, | |
| "learning_rate": 6.424242424242424e-05, | |
| "loss": 1.9174, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 24.91, | |
| "learning_rate": 6.414141414141415e-05, | |
| "loss": 1.9205, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 24.98, | |
| "learning_rate": 6.404040404040404e-05, | |
| "loss": 1.9046, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 25.05, | |
| "learning_rate": 6.393939393939395e-05, | |
| "loss": 1.9036, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 25.11, | |
| "learning_rate": 6.383838383838384e-05, | |
| "loss": 1.8938, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 25.18, | |
| "learning_rate": 6.373737373737373e-05, | |
| "loss": 1.898, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 25.25, | |
| "learning_rate": 6.363636363636364e-05, | |
| "loss": 1.896, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 25.32, | |
| "learning_rate": 6.353535353535353e-05, | |
| "loss": 1.9085, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 25.39, | |
| "learning_rate": 6.343434343434344e-05, | |
| "loss": 1.9156, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 25.46, | |
| "learning_rate": 6.333333333333333e-05, | |
| "loss": 1.9199, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 25.52, | |
| "learning_rate": 6.323232323232323e-05, | |
| "loss": 1.9125, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 25.59, | |
| "learning_rate": 6.313131313131313e-05, | |
| "loss": 1.9183, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 25.66, | |
| "learning_rate": 6.303030303030302e-05, | |
| "loss": 1.913, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 25.73, | |
| "learning_rate": 6.292929292929294e-05, | |
| "loss": 1.9045, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 25.8, | |
| "learning_rate": 6.282828282828284e-05, | |
| "loss": 1.909, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 25.87, | |
| "learning_rate": 6.272727272727273e-05, | |
| "loss": 1.9037, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 25.93, | |
| "learning_rate": 6.262626262626264e-05, | |
| "loss": 1.9099, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 6.252525252525253e-05, | |
| "loss": 1.9088, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 26.07, | |
| "learning_rate": 6.242424242424243e-05, | |
| "loss": 1.8777, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 26.14, | |
| "learning_rate": 6.232323232323233e-05, | |
| "loss": 1.8821, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 26.21, | |
| "learning_rate": 6.222222222222222e-05, | |
| "loss": 1.8967, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 26.27, | |
| "learning_rate": 6.212121212121213e-05, | |
| "loss": 1.8927, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 26.34, | |
| "learning_rate": 6.202020202020202e-05, | |
| "loss": 1.8913, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 26.41, | |
| "learning_rate": 6.191919191919192e-05, | |
| "loss": 1.8924, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 26.48, | |
| "learning_rate": 6.181818181818182e-05, | |
| "loss": 1.8934, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 26.55, | |
| "learning_rate": 6.171717171717172e-05, | |
| "loss": 1.8967, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 26.62, | |
| "learning_rate": 6.161616161616162e-05, | |
| "loss": 1.8984, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 26.68, | |
| "learning_rate": 6.151515151515151e-05, | |
| "loss": 1.896, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 26.75, | |
| "learning_rate": 6.141414141414142e-05, | |
| "loss": 1.9051, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 26.82, | |
| "learning_rate": 6.131313131313131e-05, | |
| "loss": 1.9027, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 26.89, | |
| "learning_rate": 6.121212121212121e-05, | |
| "loss": 1.8934, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 26.96, | |
| "learning_rate": 6.111111111111112e-05, | |
| "loss": 1.9082, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 27.03, | |
| "learning_rate": 6.101010101010102e-05, | |
| "loss": 1.8962, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 27.09, | |
| "learning_rate": 6.090909090909091e-05, | |
| "loss": 1.8776, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 27.16, | |
| "learning_rate": 6.080808080808081e-05, | |
| "loss": 1.8818, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 27.23, | |
| "learning_rate": 6.070707070707071e-05, | |
| "loss": 1.8873, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 27.3, | |
| "learning_rate": 6.060606060606061e-05, | |
| "loss": 1.8836, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 27.37, | |
| "learning_rate": 6.050505050505051e-05, | |
| "loss": 1.8855, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 27.43, | |
| "learning_rate": 6.040404040404041e-05, | |
| "loss": 1.8858, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 6.03030303030303e-05, | |
| "loss": 1.8853, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 27.57, | |
| "learning_rate": 6.02020202020202e-05, | |
| "loss": 1.8919, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 27.64, | |
| "learning_rate": 6.01010101010101e-05, | |
| "loss": 1.896, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 27.71, | |
| "learning_rate": 6e-05, | |
| "loss": 1.896, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 27.78, | |
| "learning_rate": 5.98989898989899e-05, | |
| "loss": 1.8899, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 27.84, | |
| "learning_rate": 5.97979797979798e-05, | |
| "loss": 1.8864, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 27.91, | |
| "learning_rate": 5.969696969696969e-05, | |
| "loss": 1.8934, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 27.98, | |
| "learning_rate": 5.959595959595959e-05, | |
| "loss": 1.8943, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 28.05, | |
| "learning_rate": 5.949494949494949e-05, | |
| "loss": 1.8819, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 28.12, | |
| "learning_rate": 5.93939393939394e-05, | |
| "loss": 1.8619, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 28.19, | |
| "learning_rate": 5.92929292929293e-05, | |
| "loss": 1.8659, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 28.25, | |
| "learning_rate": 5.91919191919192e-05, | |
| "loss": 1.871, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 28.32, | |
| "learning_rate": 5.90909090909091e-05, | |
| "loss": 1.8764, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "learning_rate": 5.8989898989898996e-05, | |
| "loss": 1.8786, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 28.46, | |
| "learning_rate": 5.8888888888888896e-05, | |
| "loss": 1.8813, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 28.53, | |
| "learning_rate": 5.878787878787879e-05, | |
| "loss": 1.8769, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 28.59, | |
| "learning_rate": 5.868686868686869e-05, | |
| "loss": 1.8842, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 28.66, | |
| "learning_rate": 5.858585858585859e-05, | |
| "loss": 1.8805, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "learning_rate": 5.848484848484849e-05, | |
| "loss": 1.8778, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "learning_rate": 5.8383838383838386e-05, | |
| "loss": 1.8847, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 28.87, | |
| "learning_rate": 5.8282828282828286e-05, | |
| "loss": 1.8862, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 28.94, | |
| "learning_rate": 5.818181818181818e-05, | |
| "loss": 1.8858, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 5.808080808080808e-05, | |
| "loss": 1.8838, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 29.07, | |
| "learning_rate": 5.797979797979798e-05, | |
| "loss": 1.8534, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 29.14, | |
| "learning_rate": 5.787878787878788e-05, | |
| "loss": 1.8667, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 29.21, | |
| "learning_rate": 5.7777777777777776e-05, | |
| "loss": 1.8682, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 29.28, | |
| "learning_rate": 5.767676767676768e-05, | |
| "loss": 1.8787, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 29.35, | |
| "learning_rate": 5.757575757575758e-05, | |
| "loss": 1.8761, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 29.41, | |
| "learning_rate": 5.747474747474748e-05, | |
| "loss": 1.88, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 29.48, | |
| "learning_rate": 5.737373737373738e-05, | |
| "loss": 1.8679, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 29.55, | |
| "learning_rate": 5.727272727272728e-05, | |
| "loss": 1.87, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 29.62, | |
| "learning_rate": 5.717171717171717e-05, | |
| "loss": 1.8662, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 29.69, | |
| "learning_rate": 5.707070707070707e-05, | |
| "loss": 1.8798, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 29.75, | |
| "learning_rate": 5.696969696969697e-05, | |
| "loss": 1.8779, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 29.82, | |
| "learning_rate": 5.686868686868687e-05, | |
| "loss": 1.8739, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 29.89, | |
| "learning_rate": 5.676767676767677e-05, | |
| "loss": 1.8757, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 29.96, | |
| "learning_rate": 5.666666666666667e-05, | |
| "loss": 1.8833, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 30.03, | |
| "learning_rate": 5.6565656565656563e-05, | |
| "loss": 1.8713, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 30.1, | |
| "learning_rate": 5.646464646464646e-05, | |
| "loss": 1.8733, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 30.16, | |
| "learning_rate": 5.636363636363636e-05, | |
| "loss": 1.8555, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 30.23, | |
| "learning_rate": 5.626262626262626e-05, | |
| "loss": 1.8639, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 30.3, | |
| "learning_rate": 5.616161616161616e-05, | |
| "loss": 1.8654, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 30.37, | |
| "learning_rate": 5.606060606060606e-05, | |
| "loss": 1.8597, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 30.44, | |
| "learning_rate": 5.595959595959597e-05, | |
| "loss": 1.8625, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 30.51, | |
| "learning_rate": 5.5858585858585867e-05, | |
| "loss": 1.8686, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 30.57, | |
| "learning_rate": 5.5757575757575766e-05, | |
| "loss": 1.8881, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 30.64, | |
| "learning_rate": 5.5656565656565666e-05, | |
| "loss": 1.9063, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 30.71, | |
| "learning_rate": 5.555555555555556e-05, | |
| "loss": 1.9019, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 30.78, | |
| "learning_rate": 5.545454545454546e-05, | |
| "loss": 1.93, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 30.85, | |
| "learning_rate": 5.535353535353536e-05, | |
| "loss": 1.9213, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 30.92, | |
| "learning_rate": 5.525252525252526e-05, | |
| "loss": 1.9301, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 30.98, | |
| "learning_rate": 5.5151515151515156e-05, | |
| "loss": 1.9074, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 31.05, | |
| "learning_rate": 5.5050505050505056e-05, | |
| "loss": 1.8867, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 31.12, | |
| "learning_rate": 5.494949494949495e-05, | |
| "loss": 1.8717, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 31.19, | |
| "learning_rate": 5.484848484848485e-05, | |
| "loss": 1.8738, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 31.26, | |
| "learning_rate": 5.474747474747475e-05, | |
| "loss": 1.8762, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 31.32, | |
| "learning_rate": 5.464646464646465e-05, | |
| "loss": 1.8793, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 31.39, | |
| "learning_rate": 5.4545454545454546e-05, | |
| "loss": 1.8792, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 31.46, | |
| "learning_rate": 5.4444444444444446e-05, | |
| "loss": 1.8754, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 31.53, | |
| "learning_rate": 5.434343434343434e-05, | |
| "loss": 1.889, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 31.6, | |
| "learning_rate": 5.424242424242425e-05, | |
| "loss": 1.888, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 31.67, | |
| "learning_rate": 5.414141414141415e-05, | |
| "loss": 1.8847, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 31.73, | |
| "learning_rate": 5.4040404040404044e-05, | |
| "loss": 1.8827, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 31.8, | |
| "learning_rate": 5.393939393939394e-05, | |
| "loss": 1.8847, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 31.87, | |
| "learning_rate": 5.383838383838384e-05, | |
| "loss": 1.8872, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 31.94, | |
| "learning_rate": 5.373737373737374e-05, | |
| "loss": 1.8853, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 32.01, | |
| "learning_rate": 5.363636363636364e-05, | |
| "loss": 1.8751, | |
| "step": 469000 | |
| }, | |
| { | |
| "epoch": 32.08, | |
| "learning_rate": 5.353535353535354e-05, | |
| "loss": 1.8578, | |
| "step": 470000 | |
| }, | |
| { | |
| "epoch": 32.14, | |
| "learning_rate": 5.3434343434343434e-05, | |
| "loss": 1.8641, | |
| "step": 471000 | |
| }, | |
| { | |
| "epoch": 32.21, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 1.8604, | |
| "step": 472000 | |
| }, | |
| { | |
| "epoch": 32.28, | |
| "learning_rate": 5.323232323232323e-05, | |
| "loss": 1.8562, | |
| "step": 473000 | |
| }, | |
| { | |
| "epoch": 32.35, | |
| "learning_rate": 5.313131313131313e-05, | |
| "loss": 1.8676, | |
| "step": 474000 | |
| }, | |
| { | |
| "epoch": 32.42, | |
| "learning_rate": 5.303030303030303e-05, | |
| "loss": 1.8777, | |
| "step": 475000 | |
| }, | |
| { | |
| "epoch": 32.48, | |
| "learning_rate": 5.292929292929293e-05, | |
| "loss": 1.871, | |
| "step": 476000 | |
| }, | |
| { | |
| "epoch": 32.55, | |
| "learning_rate": 5.2828282828282824e-05, | |
| "loss": 1.8754, | |
| "step": 477000 | |
| }, | |
| { | |
| "epoch": 32.62, | |
| "learning_rate": 5.272727272727272e-05, | |
| "loss": 1.8745, | |
| "step": 478000 | |
| }, | |
| { | |
| "epoch": 32.69, | |
| "learning_rate": 5.262626262626262e-05, | |
| "loss": 1.862, | |
| "step": 479000 | |
| }, | |
| { | |
| "epoch": 32.76, | |
| "learning_rate": 5.2525252525252536e-05, | |
| "loss": 1.8573, | |
| "step": 480000 | |
| }, | |
| { | |
| "epoch": 32.83, | |
| "learning_rate": 5.242424242424243e-05, | |
| "loss": 1.8736, | |
| "step": 481000 | |
| }, | |
| { | |
| "epoch": 32.89, | |
| "learning_rate": 5.232323232323233e-05, | |
| "loss": 1.8789, | |
| "step": 482000 | |
| }, | |
| { | |
| "epoch": 32.96, | |
| "learning_rate": 5.222222222222223e-05, | |
| "loss": 1.8713, | |
| "step": 483000 | |
| }, | |
| { | |
| "epoch": 33.03, | |
| "learning_rate": 5.212121212121213e-05, | |
| "loss": 1.8554, | |
| "step": 484000 | |
| }, | |
| { | |
| "epoch": 33.1, | |
| "learning_rate": 5.2020202020202026e-05, | |
| "loss": 1.8639, | |
| "step": 485000 | |
| }, | |
| { | |
| "epoch": 33.17, | |
| "learning_rate": 5.1919191919191926e-05, | |
| "loss": 1.8765, | |
| "step": 486000 | |
| }, | |
| { | |
| "epoch": 33.24, | |
| "learning_rate": 5.181818181818182e-05, | |
| "loss": 1.8711, | |
| "step": 487000 | |
| }, | |
| { | |
| "epoch": 33.3, | |
| "learning_rate": 5.171717171717172e-05, | |
| "loss": 1.8657, | |
| "step": 488000 | |
| }, | |
| { | |
| "epoch": 33.37, | |
| "learning_rate": 5.161616161616162e-05, | |
| "loss": 1.8547, | |
| "step": 489000 | |
| }, | |
| { | |
| "epoch": 33.44, | |
| "learning_rate": 5.151515151515152e-05, | |
| "loss": 1.8535, | |
| "step": 490000 | |
| }, | |
| { | |
| "epoch": 33.51, | |
| "learning_rate": 5.1414141414141416e-05, | |
| "loss": 1.8606, | |
| "step": 491000 | |
| }, | |
| { | |
| "epoch": 33.58, | |
| "learning_rate": 5.1313131313131316e-05, | |
| "loss": 1.8564, | |
| "step": 492000 | |
| }, | |
| { | |
| "epoch": 33.64, | |
| "learning_rate": 5.121212121212121e-05, | |
| "loss": 1.8529, | |
| "step": 493000 | |
| }, | |
| { | |
| "epoch": 33.71, | |
| "learning_rate": 5.111111111111111e-05, | |
| "loss": 1.8678, | |
| "step": 494000 | |
| }, | |
| { | |
| "epoch": 33.78, | |
| "learning_rate": 5.101010101010101e-05, | |
| "loss": 1.869, | |
| "step": 495000 | |
| }, | |
| { | |
| "epoch": 33.85, | |
| "learning_rate": 5.090909090909091e-05, | |
| "loss": 1.8669, | |
| "step": 496000 | |
| }, | |
| { | |
| "epoch": 33.92, | |
| "learning_rate": 5.080808080808081e-05, | |
| "loss": 1.8564, | |
| "step": 497000 | |
| }, | |
| { | |
| "epoch": 33.99, | |
| "learning_rate": 5.070707070707071e-05, | |
| "loss": 1.8517, | |
| "step": 498000 | |
| }, | |
| { | |
| "epoch": 34.05, | |
| "learning_rate": 5.060606060606061e-05, | |
| "loss": 1.842, | |
| "step": 499000 | |
| }, | |
| { | |
| "epoch": 34.12, | |
| "learning_rate": 5.050505050505051e-05, | |
| "loss": 1.8367, | |
| "step": 500000 | |
| }, | |
| { | |
| "epoch": 34.19, | |
| "learning_rate": 5.040404040404041e-05, | |
| "loss": 1.8419, | |
| "step": 501000 | |
| }, | |
| { | |
| "epoch": 34.26, | |
| "learning_rate": 5.030303030303031e-05, | |
| "loss": 1.859, | |
| "step": 502000 | |
| }, | |
| { | |
| "epoch": 34.33, | |
| "learning_rate": 5.0202020202020203e-05, | |
| "loss": 1.8429, | |
| "step": 503000 | |
| }, | |
| { | |
| "epoch": 34.4, | |
| "learning_rate": 5.01010101010101e-05, | |
| "loss": 1.8482, | |
| "step": 504000 | |
| }, | |
| { | |
| "epoch": 34.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.8443, | |
| "step": 505000 | |
| }, | |
| { | |
| "epoch": 34.53, | |
| "learning_rate": 4.98989898989899e-05, | |
| "loss": 1.8703, | |
| "step": 506000 | |
| }, | |
| { | |
| "epoch": 34.6, | |
| "learning_rate": 4.97979797979798e-05, | |
| "loss": 1.8624, | |
| "step": 507000 | |
| }, | |
| { | |
| "epoch": 34.67, | |
| "learning_rate": 4.9696969696969694e-05, | |
| "loss": 1.8535, | |
| "step": 508000 | |
| }, | |
| { | |
| "epoch": 34.74, | |
| "learning_rate": 4.9595959595959594e-05, | |
| "loss": 1.853, | |
| "step": 509000 | |
| }, | |
| { | |
| "epoch": 34.81, | |
| "learning_rate": 4.94949494949495e-05, | |
| "loss": 1.8532, | |
| "step": 510000 | |
| }, | |
| { | |
| "epoch": 34.87, | |
| "learning_rate": 4.93939393939394e-05, | |
| "loss": 1.8566, | |
| "step": 511000 | |
| }, | |
| { | |
| "epoch": 34.94, | |
| "learning_rate": 4.92929292929293e-05, | |
| "loss": 1.8706, | |
| "step": 512000 | |
| }, | |
| { | |
| "epoch": 35.01, | |
| "learning_rate": 4.919191919191919e-05, | |
| "loss": 1.8669, | |
| "step": 513000 | |
| }, | |
| { | |
| "epoch": 35.08, | |
| "learning_rate": 4.909090909090909e-05, | |
| "loss": 1.8427, | |
| "step": 514000 | |
| }, | |
| { | |
| "epoch": 35.15, | |
| "learning_rate": 4.898989898989899e-05, | |
| "loss": 1.8566, | |
| "step": 515000 | |
| }, | |
| { | |
| "epoch": 35.21, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 1.8456, | |
| "step": 516000 | |
| }, | |
| { | |
| "epoch": 35.28, | |
| "learning_rate": 4.878787878787879e-05, | |
| "loss": 1.8461, | |
| "step": 517000 | |
| }, | |
| { | |
| "epoch": 35.35, | |
| "learning_rate": 4.868686868686869e-05, | |
| "loss": 1.8392, | |
| "step": 518000 | |
| }, | |
| { | |
| "epoch": 35.42, | |
| "learning_rate": 4.858585858585859e-05, | |
| "loss": 1.8446, | |
| "step": 519000 | |
| }, | |
| { | |
| "epoch": 35.49, | |
| "learning_rate": 4.848484848484849e-05, | |
| "loss": 1.8494, | |
| "step": 520000 | |
| }, | |
| { | |
| "epoch": 35.56, | |
| "learning_rate": 4.838383838383839e-05, | |
| "loss": 1.8426, | |
| "step": 521000 | |
| }, | |
| { | |
| "epoch": 35.62, | |
| "learning_rate": 4.828282828282829e-05, | |
| "loss": 1.8472, | |
| "step": 522000 | |
| }, | |
| { | |
| "epoch": 35.69, | |
| "learning_rate": 4.8181818181818186e-05, | |
| "loss": 1.8524, | |
| "step": 523000 | |
| }, | |
| { | |
| "epoch": 35.76, | |
| "learning_rate": 4.808080808080808e-05, | |
| "loss": 1.8464, | |
| "step": 524000 | |
| }, | |
| { | |
| "epoch": 35.83, | |
| "learning_rate": 4.797979797979798e-05, | |
| "loss": 1.8522, | |
| "step": 525000 | |
| }, | |
| { | |
| "epoch": 35.9, | |
| "learning_rate": 4.787878787878788e-05, | |
| "loss": 1.852, | |
| "step": 526000 | |
| }, | |
| { | |
| "epoch": 35.97, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 1.8598, | |
| "step": 527000 | |
| }, | |
| { | |
| "epoch": 36.03, | |
| "learning_rate": 4.7676767676767684e-05, | |
| "loss": 1.8685, | |
| "step": 528000 | |
| }, | |
| { | |
| "epoch": 36.1, | |
| "learning_rate": 4.7575757575757576e-05, | |
| "loss": 1.8371, | |
| "step": 529000 | |
| }, | |
| { | |
| "epoch": 36.17, | |
| "learning_rate": 4.7474747474747476e-05, | |
| "loss": 1.8453, | |
| "step": 530000 | |
| }, | |
| { | |
| "epoch": 36.24, | |
| "learning_rate": 4.7373737373737375e-05, | |
| "loss": 1.837, | |
| "step": 531000 | |
| }, | |
| { | |
| "epoch": 36.31, | |
| "learning_rate": 4.7272727272727275e-05, | |
| "loss": 1.8441, | |
| "step": 532000 | |
| }, | |
| { | |
| "epoch": 36.37, | |
| "learning_rate": 4.7171717171717174e-05, | |
| "loss": 1.8605, | |
| "step": 533000 | |
| }, | |
| { | |
| "epoch": 36.44, | |
| "learning_rate": 4.7070707070707074e-05, | |
| "loss": 1.852, | |
| "step": 534000 | |
| }, | |
| { | |
| "epoch": 36.51, | |
| "learning_rate": 4.696969696969697e-05, | |
| "loss": 1.862, | |
| "step": 535000 | |
| }, | |
| { | |
| "epoch": 36.58, | |
| "learning_rate": 4.686868686868687e-05, | |
| "loss": 1.8512, | |
| "step": 536000 | |
| }, | |
| { | |
| "epoch": 36.65, | |
| "learning_rate": 4.676767676767677e-05, | |
| "loss": 1.8574, | |
| "step": 537000 | |
| }, | |
| { | |
| "epoch": 36.72, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 1.8617, | |
| "step": 538000 | |
| }, | |
| { | |
| "epoch": 36.78, | |
| "learning_rate": 4.656565656565657e-05, | |
| "loss": 1.8561, | |
| "step": 539000 | |
| }, | |
| { | |
| "epoch": 36.85, | |
| "learning_rate": 4.6464646464646464e-05, | |
| "loss": 1.8459, | |
| "step": 540000 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "learning_rate": 4.636363636363636e-05, | |
| "loss": 1.8547, | |
| "step": 541000 | |
| }, | |
| { | |
| "epoch": 36.99, | |
| "learning_rate": 4.626262626262626e-05, | |
| "loss": 1.8442, | |
| "step": 542000 | |
| }, | |
| { | |
| "epoch": 37.06, | |
| "learning_rate": 4.616161616161616e-05, | |
| "loss": 1.8388, | |
| "step": 543000 | |
| }, | |
| { | |
| "epoch": 37.13, | |
| "learning_rate": 4.606060606060607e-05, | |
| "loss": 1.8371, | |
| "step": 544000 | |
| }, | |
| { | |
| "epoch": 37.19, | |
| "learning_rate": 4.595959595959596e-05, | |
| "loss": 1.8328, | |
| "step": 545000 | |
| }, | |
| { | |
| "epoch": 37.26, | |
| "learning_rate": 4.585858585858586e-05, | |
| "loss": 1.831, | |
| "step": 546000 | |
| }, | |
| { | |
| "epoch": 37.33, | |
| "learning_rate": 4.575757575757576e-05, | |
| "loss": 1.8389, | |
| "step": 547000 | |
| }, | |
| { | |
| "epoch": 37.4, | |
| "learning_rate": 4.565656565656566e-05, | |
| "loss": 1.8457, | |
| "step": 548000 | |
| }, | |
| { | |
| "epoch": 37.47, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 1.8297, | |
| "step": 549000 | |
| }, | |
| { | |
| "epoch": 37.53, | |
| "learning_rate": 4.545454545454546e-05, | |
| "loss": 1.8298, | |
| "step": 550000 | |
| }, | |
| { | |
| "epoch": 37.6, | |
| "learning_rate": 4.535353535353535e-05, | |
| "loss": 1.8342, | |
| "step": 551000 | |
| }, | |
| { | |
| "epoch": 37.67, | |
| "learning_rate": 4.525252525252526e-05, | |
| "loss": 1.8237, | |
| "step": 552000 | |
| }, | |
| { | |
| "epoch": 37.74, | |
| "learning_rate": 4.515151515151516e-05, | |
| "loss": 1.8435, | |
| "step": 553000 | |
| }, | |
| { | |
| "epoch": 37.81, | |
| "learning_rate": 4.5050505050505056e-05, | |
| "loss": 1.8286, | |
| "step": 554000 | |
| }, | |
| { | |
| "epoch": 37.88, | |
| "learning_rate": 4.494949494949495e-05, | |
| "loss": 1.8331, | |
| "step": 555000 | |
| }, | |
| { | |
| "epoch": 37.94, | |
| "learning_rate": 4.484848484848485e-05, | |
| "loss": 1.8346, | |
| "step": 556000 | |
| }, | |
| { | |
| "epoch": 38.01, | |
| "learning_rate": 4.474747474747475e-05, | |
| "loss": 1.8207, | |
| "step": 557000 | |
| }, | |
| { | |
| "epoch": 38.08, | |
| "learning_rate": 4.464646464646465e-05, | |
| "loss": 1.8037, | |
| "step": 558000 | |
| }, | |
| { | |
| "epoch": 38.15, | |
| "learning_rate": 4.454545454545455e-05, | |
| "loss": 1.8067, | |
| "step": 559000 | |
| }, | |
| { | |
| "epoch": 38.22, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 1.7991, | |
| "step": 560000 | |
| }, | |
| { | |
| "epoch": 38.29, | |
| "learning_rate": 4.4343434343434346e-05, | |
| "loss": 1.799, | |
| "step": 561000 | |
| }, | |
| { | |
| "epoch": 38.35, | |
| "learning_rate": 4.4242424242424246e-05, | |
| "loss": 1.8069, | |
| "step": 562000 | |
| }, | |
| { | |
| "epoch": 38.42, | |
| "learning_rate": 4.4141414141414145e-05, | |
| "loss": 1.8146, | |
| "step": 563000 | |
| }, | |
| { | |
| "epoch": 38.49, | |
| "learning_rate": 4.4040404040404044e-05, | |
| "loss": 1.8102, | |
| "step": 564000 | |
| }, | |
| { | |
| "epoch": 38.56, | |
| "learning_rate": 4.3939393939393944e-05, | |
| "loss": 1.8188, | |
| "step": 565000 | |
| }, | |
| { | |
| "epoch": 38.63, | |
| "learning_rate": 4.383838383838384e-05, | |
| "loss": 1.813, | |
| "step": 566000 | |
| }, | |
| { | |
| "epoch": 38.7, | |
| "learning_rate": 4.3737373737373736e-05, | |
| "loss": 1.8334, | |
| "step": 567000 | |
| }, | |
| { | |
| "epoch": 38.76, | |
| "learning_rate": 4.3636363636363636e-05, | |
| "loss": 1.8117, | |
| "step": 568000 | |
| }, | |
| { | |
| "epoch": 38.83, | |
| "learning_rate": 4.3535353535353535e-05, | |
| "loss": 1.804, | |
| "step": 569000 | |
| }, | |
| { | |
| "epoch": 38.9, | |
| "learning_rate": 4.343434343434344e-05, | |
| "loss": 1.8222, | |
| "step": 570000 | |
| }, | |
| { | |
| "epoch": 38.97, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 1.816, | |
| "step": 571000 | |
| }, | |
| { | |
| "epoch": 39.04, | |
| "learning_rate": 4.3232323232323234e-05, | |
| "loss": 1.8037, | |
| "step": 572000 | |
| }, | |
| { | |
| "epoch": 39.1, | |
| "learning_rate": 4.313131313131313e-05, | |
| "loss": 1.796, | |
| "step": 573000 | |
| }, | |
| { | |
| "epoch": 39.17, | |
| "learning_rate": 4.303030303030303e-05, | |
| "loss": 1.8026, | |
| "step": 574000 | |
| }, | |
| { | |
| "epoch": 39.24, | |
| "learning_rate": 4.292929292929293e-05, | |
| "loss": 1.8142, | |
| "step": 575000 | |
| }, | |
| { | |
| "epoch": 39.31, | |
| "learning_rate": 4.282828282828283e-05, | |
| "loss": 1.8008, | |
| "step": 576000 | |
| }, | |
| { | |
| "epoch": 39.38, | |
| "learning_rate": 4.2727272727272724e-05, | |
| "loss": 1.8023, | |
| "step": 577000 | |
| }, | |
| { | |
| "epoch": 39.45, | |
| "learning_rate": 4.262626262626263e-05, | |
| "loss": 1.8107, | |
| "step": 578000 | |
| }, | |
| { | |
| "epoch": 39.51, | |
| "learning_rate": 4.252525252525253e-05, | |
| "loss": 1.8136, | |
| "step": 579000 | |
| }, | |
| { | |
| "epoch": 39.58, | |
| "learning_rate": 4.242424242424243e-05, | |
| "loss": 1.8202, | |
| "step": 580000 | |
| }, | |
| { | |
| "epoch": 39.65, | |
| "learning_rate": 4.232323232323233e-05, | |
| "loss": 1.8042, | |
| "step": 581000 | |
| }, | |
| { | |
| "epoch": 39.72, | |
| "learning_rate": 4.222222222222222e-05, | |
| "loss": 1.8204, | |
| "step": 582000 | |
| }, | |
| { | |
| "epoch": 39.79, | |
| "learning_rate": 4.212121212121212e-05, | |
| "loss": 1.8157, | |
| "step": 583000 | |
| }, | |
| { | |
| "epoch": 39.86, | |
| "learning_rate": 4.202020202020202e-05, | |
| "loss": 1.8028, | |
| "step": 584000 | |
| }, | |
| { | |
| "epoch": 39.92, | |
| "learning_rate": 4.191919191919192e-05, | |
| "loss": 1.8196, | |
| "step": 585000 | |
| } | |
| ], | |
| "max_steps": 1000000, | |
| "num_train_epochs": 69, | |
| "total_flos": 7.31875459270527e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |