| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "global_step": 5000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 2.8375, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 2.6779, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3e-06, | |
| "loss": 2.57, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 2.5656, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5e-06, | |
| "loss": 2.3681, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 6e-06, | |
| "loss": 2.3631, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 2.388, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 2.2585, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 9e-06, | |
| "loss": 2.3164, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1e-05, | |
| "loss": 2.3083, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 2.2434, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.2e-05, | |
| "loss": 2.2638, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 2.2401, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 2.2564, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.5e-05, | |
| "loss": 2.2993, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 2.3755, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 2.3019, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.8e-05, | |
| "loss": 2.2788, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9e-05, | |
| "loss": 2.2605, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2447, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.1e-05, | |
| "loss": 2.2964, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 2.1982, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 2.2083, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.4e-05, | |
| "loss": 2.2543, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.2566, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 2.1604, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 2.3027, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 2.2066, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 2.9e-05, | |
| "loss": 2.2323, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 3e-05, | |
| "loss": 2.1687, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 3.1e-05, | |
| "loss": 2.2966, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 2.2202, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 3.3e-05, | |
| "loss": 2.2124, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 2.1902, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 3.5e-05, | |
| "loss": 2.2963, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 3.6e-05, | |
| "loss": 2.181, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3.7e-05, | |
| "loss": 2.2098, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 3.8e-05, | |
| "loss": 2.2389, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 2.2249, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4e-05, | |
| "loss": 2.1545, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.1e-05, | |
| "loss": 2.2302, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.2e-05, | |
| "loss": 2.1801, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.3e-05, | |
| "loss": 2.2626, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 2.2543, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.5e-05, | |
| "loss": 2.1455, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 2.1994, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.7e-05, | |
| "loss": 2.2642, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.2493, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.9e-05, | |
| "loss": 2.2332, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 5e-05, | |
| "loss": 2.1859, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.9888888888888894e-05, | |
| "loss": 2.205, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.977777777777778e-05, | |
| "loss": 2.3042, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.966666666666667e-05, | |
| "loss": 2.3318, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.955555555555556e-05, | |
| "loss": 2.249, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.9444444444444446e-05, | |
| "loss": 2.2936, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.933333333333334e-05, | |
| "loss": 2.189, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.922222222222222e-05, | |
| "loss": 2.2387, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.9111111111111114e-05, | |
| "loss": 2.1974, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.9e-05, | |
| "loss": 2.2088, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 2.2264, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.8777777777777775e-05, | |
| "loss": 2.2152, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.866666666666667e-05, | |
| "loss": 2.2981, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.855555555555556e-05, | |
| "loss": 2.1741, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.844444444444445e-05, | |
| "loss": 2.228, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "loss": 2.2239, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.8222222222222225e-05, | |
| "loss": 2.1791, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.811111111111111e-05, | |
| "loss": 2.1563, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.8e-05, | |
| "loss": 2.1668, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.7888888888888886e-05, | |
| "loss": 2.1757, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 2.0375, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.766666666666667e-05, | |
| "loss": 2.2324, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.755555555555556e-05, | |
| "loss": 2.2474, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.7444444444444445e-05, | |
| "loss": 2.2136, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.7333333333333336e-05, | |
| "loss": 2.2169, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.722222222222222e-05, | |
| "loss": 2.2344, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.711111111111111e-05, | |
| "loss": 2.2769, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.7e-05, | |
| "loss": 2.1766, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.6888888888888895e-05, | |
| "loss": 2.287, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.677777777777778e-05, | |
| "loss": 2.2396, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 2.2257, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.6555555555555556e-05, | |
| "loss": 2.1978, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.644444444444445e-05, | |
| "loss": 2.1719, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.633333333333333e-05, | |
| "loss": 2.2309, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.6222222222222224e-05, | |
| "loss": 2.0824, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.6111111111111115e-05, | |
| "loss": 2.2039, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 2.1636, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.588888888888889e-05, | |
| "loss": 2.2237, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.577777777777778e-05, | |
| "loss": 2.2943, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.566666666666667e-05, | |
| "loss": 2.2324, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 2.2258, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.5444444444444444e-05, | |
| "loss": 2.1927, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.5333333333333335e-05, | |
| "loss": 2.2067, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.522222222222223e-05, | |
| "loss": 2.2127, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.511111111111112e-05, | |
| "loss": 2.1303, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.5e-05, | |
| "loss": 2.2569, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.4888888888888894e-05, | |
| "loss": 2.0968, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.477777777777778e-05, | |
| "loss": 2.1282, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.466666666666667e-05, | |
| "loss": 2.1771, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.4555555555555555e-05, | |
| "loss": 2.2062, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.445555555555555e-05, | |
| "loss": 2.1468, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.4344444444444444e-05, | |
| "loss": 2.1912, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.4233333333333336e-05, | |
| "loss": 2.2167, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.412222222222223e-05, | |
| "loss": 2.2232, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.401111111111111e-05, | |
| "loss": 2.1965, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.39e-05, | |
| "loss": 2.1658, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.378888888888889e-05, | |
| "loss": 2.3051, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.367777777777778e-05, | |
| "loss": 2.1915, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.3566666666666664e-05, | |
| "loss": 2.1081, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.3455555555555555e-05, | |
| "loss": 2.2553, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.334444444444445e-05, | |
| "loss": 2.1794, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.323333333333334e-05, | |
| "loss": 2.1691, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.312222222222222e-05, | |
| "loss": 2.1764, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.3011111111111115e-05, | |
| "loss": 2.1462, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.29e-05, | |
| "loss": 2.1322, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.278888888888889e-05, | |
| "loss": 2.1283, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.2677777777777775e-05, | |
| "loss": 2.1366, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.2566666666666674e-05, | |
| "loss": 2.2162, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.245555555555556e-05, | |
| "loss": 2.1199, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.234444444444445e-05, | |
| "loss": 2.206, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.2233333333333334e-05, | |
| "loss": 2.115, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.2122222222222226e-05, | |
| "loss": 2.1905, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.201111111111111e-05, | |
| "loss": 2.1426, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.19e-05, | |
| "loss": 2.2061, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.178888888888889e-05, | |
| "loss": 2.2047, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.1677777777777785e-05, | |
| "loss": 2.2049, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.156666666666667e-05, | |
| "loss": 2.157, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.145555555555556e-05, | |
| "loss": 2.1282, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.1344444444444446e-05, | |
| "loss": 2.1219, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.123333333333334e-05, | |
| "loss": 2.2332, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.112222222222222e-05, | |
| "loss": 2.1688, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.101111111111111e-05, | |
| "loss": 2.0697, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.09e-05, | |
| "loss": 2.1763, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.0788888888888896e-05, | |
| "loss": 2.1581, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.067777777777778e-05, | |
| "loss": 2.1462, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.056666666666667e-05, | |
| "loss": 2.2394, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.045555555555556e-05, | |
| "loss": 2.129, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.034444444444445e-05, | |
| "loss": 2.1997, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.023333333333333e-05, | |
| "loss": 2.1919, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.0122222222222225e-05, | |
| "loss": 2.1456, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.001111111111111e-05, | |
| "loss": 2.1603, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 3.99e-05, | |
| "loss": 2.1268, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 3.978888888888889e-05, | |
| "loss": 2.1839, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 3.9677777777777784e-05, | |
| "loss": 2.1678, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 3.956666666666667e-05, | |
| "loss": 2.1552, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 3.945555555555556e-05, | |
| "loss": 2.1212, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 3.9344444444444445e-05, | |
| "loss": 2.1712, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 3.9233333333333336e-05, | |
| "loss": 2.1182, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 3.912222222222223e-05, | |
| "loss": 2.0769, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.901111111111111e-05, | |
| "loss": 2.1975, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.8900000000000004e-05, | |
| "loss": 2.1386, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.878888888888889e-05, | |
| "loss": 2.1822, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.867777777777778e-05, | |
| "loss": 2.1114, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.8566666666666664e-05, | |
| "loss": 2.0906, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.8455555555555556e-05, | |
| "loss": 2.2494, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.834444444444444e-05, | |
| "loss": 2.1981, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.823333333333334e-05, | |
| "loss": 2.1289, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.8122222222222224e-05, | |
| "loss": 2.2029, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.8011111111111115e-05, | |
| "loss": 2.2259, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.79e-05, | |
| "loss": 2.1545, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.778888888888889e-05, | |
| "loss": 2.1687, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.7677777777777776e-05, | |
| "loss": 2.2138, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.756666666666667e-05, | |
| "loss": 2.1843, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.745555555555555e-05, | |
| "loss": 2.0983, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.734444444444445e-05, | |
| "loss": 2.128, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.7233333333333335e-05, | |
| "loss": 2.0945, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.7122222222222226e-05, | |
| "loss": 2.2031, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.701111111111111e-05, | |
| "loss": 2.1492, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.69e-05, | |
| "loss": 2.1556, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.678888888888889e-05, | |
| "loss": 2.2145, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.667777777777778e-05, | |
| "loss": 2.1574, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.656666666666666e-05, | |
| "loss": 2.1781, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.645555555555556e-05, | |
| "loss": 2.1396, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.6344444444444446e-05, | |
| "loss": 2.1376, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.623333333333334e-05, | |
| "loss": 2.1554, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.612222222222222e-05, | |
| "loss": 2.162, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.6011111111111114e-05, | |
| "loss": 2.1683, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.59e-05, | |
| "loss": 2.0821, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.578888888888889e-05, | |
| "loss": 2.189, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.567777777777778e-05, | |
| "loss": 2.193, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.556666666666667e-05, | |
| "loss": 2.1326, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.545555555555556e-05, | |
| "loss": 2.0713, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.534444444444445e-05, | |
| "loss": 2.2754, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.5233333333333334e-05, | |
| "loss": 2.0774, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.5122222222222225e-05, | |
| "loss": 2.1025, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.501111111111111e-05, | |
| "loss": 2.0258, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.49e-05, | |
| "loss": 2.1155, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.478888888888889e-05, | |
| "loss": 2.1176, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.4677777777777784e-05, | |
| "loss": 2.1447, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.456666666666667e-05, | |
| "loss": 2.1083, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.445555555555556e-05, | |
| "loss": 2.0821, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.4344444444444445e-05, | |
| "loss": 2.2, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.4233333333333336e-05, | |
| "loss": 2.1913, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.412222222222222e-05, | |
| "loss": 2.1596, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.401111111111111e-05, | |
| "loss": 2.1381, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.3900000000000004e-05, | |
| "loss": 2.1225, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.378888888888889e-05, | |
| "loss": 2.1596, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.367777777777778e-05, | |
| "loss": 2.196, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.356666666666667e-05, | |
| "loss": 2.1491, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.3455555555555556e-05, | |
| "loss": 2.143, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.334444444444445e-05, | |
| "loss": 2.1908, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.323333333333333e-05, | |
| "loss": 2.224, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.3122222222222224e-05, | |
| "loss": 2.0906, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.3011111111111115e-05, | |
| "loss": 2.0705, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.29e-05, | |
| "loss": 2.1297, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.278888888888889e-05, | |
| "loss": 2.0697, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.2677777777777776e-05, | |
| "loss": 2.1413, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.256666666666667e-05, | |
| "loss": 2.1995, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.245555555555555e-05, | |
| "loss": 2.2242, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.2344444444444444e-05, | |
| "loss": 2.1759, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.2233333333333335e-05, | |
| "loss": 2.1406, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.212222222222223e-05, | |
| "loss": 2.1722, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.201111111111111e-05, | |
| "loss": 2.1181, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.19e-05, | |
| "loss": 2.0824, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.178888888888889e-05, | |
| "loss": 2.1826, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.167777777777778e-05, | |
| "loss": 2.0846, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.1566666666666664e-05, | |
| "loss": 2.1772, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.1455555555555555e-05, | |
| "loss": 2.1471, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.134444444444445e-05, | |
| "loss": 2.1125, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.123333333333334e-05, | |
| "loss": 2.055, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.112222222222222e-05, | |
| "loss": 2.0922, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.1011111111111114e-05, | |
| "loss": 2.2003, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.09e-05, | |
| "loss": 2.1425, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.078888888888889e-05, | |
| "loss": 2.1195, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.0677777777777775e-05, | |
| "loss": 2.1251, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.0566666666666667e-05, | |
| "loss": 2.1828, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.0455555555555558e-05, | |
| "loss": 2.1316, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.0344444444444446e-05, | |
| "loss": 2.1339, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.0233333333333334e-05, | |
| "loss": 2.1252, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.0122222222222226e-05, | |
| "loss": 2.2308, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.0011111111111114e-05, | |
| "loss": 2.1462, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.9900000000000002e-05, | |
| "loss": 2.1068, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.978888888888889e-05, | |
| "loss": 2.0946, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.9677777777777778e-05, | |
| "loss": 2.0563, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.956666666666667e-05, | |
| "loss": 2.1172, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.9455555555555557e-05, | |
| "loss": 2.0987, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.9344444444444445e-05, | |
| "loss": 2.1107, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.9233333333333334e-05, | |
| "loss": 2.061, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.912222222222222e-05, | |
| "loss": 2.0903, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.901111111111111e-05, | |
| "loss": 2.1508, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.8899999999999998e-05, | |
| "loss": 2.1378, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.8788888888888893e-05, | |
| "loss": 2.117, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.867777777777778e-05, | |
| "loss": 2.1878, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.856666666666667e-05, | |
| "loss": 2.11, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.8455555555555557e-05, | |
| "loss": 2.0339, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.8344444444444445e-05, | |
| "loss": 2.1127, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.8233333333333333e-05, | |
| "loss": 2.1542, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.812222222222222e-05, | |
| "loss": 2.1369, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.801111111111111e-05, | |
| "loss": 2.1702, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.7900000000000004e-05, | |
| "loss": 2.0552, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.7788888888888892e-05, | |
| "loss": 2.2228, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.767777777777778e-05, | |
| "loss": 1.9165, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 2.7566666666666668e-05, | |
| "loss": 1.9713, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 2.7455555555555556e-05, | |
| "loss": 1.8864, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.7344444444444444e-05, | |
| "loss": 1.8848, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.7233333333333332e-05, | |
| "loss": 1.962, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.712222222222222e-05, | |
| "loss": 1.9398, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.7011111111111115e-05, | |
| "loss": 1.9051, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.6900000000000003e-05, | |
| "loss": 2.0315, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 2.678888888888889e-05, | |
| "loss": 1.9723, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 2.667777777777778e-05, | |
| "loss": 1.9358, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 2.6566666666666668e-05, | |
| "loss": 1.9036, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.6455555555555556e-05, | |
| "loss": 1.902, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.6344444444444444e-05, | |
| "loss": 1.911, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.6233333333333332e-05, | |
| "loss": 1.9689, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.6122222222222227e-05, | |
| "loss": 1.9162, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.6011111111111115e-05, | |
| "loss": 1.8957, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.5900000000000003e-05, | |
| "loss": 1.9308, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.578888888888889e-05, | |
| "loss": 1.9296, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.567777777777778e-05, | |
| "loss": 1.8809, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.5566666666666667e-05, | |
| "loss": 1.8844, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.5455555555555555e-05, | |
| "loss": 1.9367, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.534444444444445e-05, | |
| "loss": 1.9344, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.5233333333333338e-05, | |
| "loss": 1.897, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.5122222222222226e-05, | |
| "loss": 1.8671, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.5011111111111114e-05, | |
| "loss": 1.9589, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.4900000000000002e-05, | |
| "loss": 2.0053, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.478888888888889e-05, | |
| "loss": 1.8778, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.467777777777778e-05, | |
| "loss": 1.9399, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.456666666666667e-05, | |
| "loss": 1.9006, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.4455555555555558e-05, | |
| "loss": 1.9952, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.4344444444444446e-05, | |
| "loss": 1.97, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.4233333333333337e-05, | |
| "loss": 1.9419, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.4122222222222225e-05, | |
| "loss": 1.947, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.4011111111111113e-05, | |
| "loss": 1.852, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.39e-05, | |
| "loss": 1.9214, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.378888888888889e-05, | |
| "loss": 1.9193, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.3677777777777778e-05, | |
| "loss": 1.9713, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.3566666666666666e-05, | |
| "loss": 1.9001, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.3455555555555557e-05, | |
| "loss": 1.9345, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.3344444444444445e-05, | |
| "loss": 1.9964, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.3233333333333333e-05, | |
| "loss": 1.9469, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.312222222222222e-05, | |
| "loss": 1.9957, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.301111111111111e-05, | |
| "loss": 1.8821, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.29e-05, | |
| "loss": 1.9346, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.278888888888889e-05, | |
| "loss": 1.9556, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.2677777777777777e-05, | |
| "loss": 1.9467, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.2566666666666665e-05, | |
| "loss": 1.8833, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.2455555555555557e-05, | |
| "loss": 2.0343, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.2344444444444445e-05, | |
| "loss": 1.9408, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.2233333333333333e-05, | |
| "loss": 1.9366, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.212222222222222e-05, | |
| "loss": 1.9199, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.2011111111111112e-05, | |
| "loss": 1.8901, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.19e-05, | |
| "loss": 2.0184, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.178888888888889e-05, | |
| "loss": 1.9105, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.167777777777778e-05, | |
| "loss": 1.9003, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.1566666666666668e-05, | |
| "loss": 1.8537, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.1455555555555556e-05, | |
| "loss": 1.9618, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.1344444444444444e-05, | |
| "loss": 1.9464, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.1233333333333336e-05, | |
| "loss": 1.9511, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.1122222222222224e-05, | |
| "loss": 1.9676, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.1011111111111112e-05, | |
| "loss": 1.8459, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.09e-05, | |
| "loss": 1.8821, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.078888888888889e-05, | |
| "loss": 1.9443, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.067777777777778e-05, | |
| "loss": 2.0075, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.0566666666666667e-05, | |
| "loss": 1.9962, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.0455555555555555e-05, | |
| "loss": 1.936, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.0344444444444447e-05, | |
| "loss": 1.9004, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.0233333333333335e-05, | |
| "loss": 1.9157, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.0122222222222223e-05, | |
| "loss": 2.0501, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.001111111111111e-05, | |
| "loss": 1.9606, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.9900000000000003e-05, | |
| "loss": 1.9151, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.978888888888889e-05, | |
| "loss": 1.9383, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.967777777777778e-05, | |
| "loss": 1.9378, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.9566666666666667e-05, | |
| "loss": 1.8672, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.9455555555555558e-05, | |
| "loss": 1.905, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.9344444444444446e-05, | |
| "loss": 1.9974, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.9233333333333334e-05, | |
| "loss": 1.9442, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.9122222222222222e-05, | |
| "loss": 1.9567, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.9011111111111114e-05, | |
| "loss": 1.9023, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.8900000000000002e-05, | |
| "loss": 1.9873, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.878888888888889e-05, | |
| "loss": 1.9145, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.8677777777777778e-05, | |
| "loss": 1.8977, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.856666666666667e-05, | |
| "loss": 1.966, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.8455555555555558e-05, | |
| "loss": 1.827, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.8344444444444446e-05, | |
| "loss": 1.8427, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.8233333333333334e-05, | |
| "loss": 1.909, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.8122222222222225e-05, | |
| "loss": 1.8733, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.8011111111111113e-05, | |
| "loss": 1.8588, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.79e-05, | |
| "loss": 1.9429, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.778888888888889e-05, | |
| "loss": 1.9621, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.7677777777777778e-05, | |
| "loss": 1.8835, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.756666666666667e-05, | |
| "loss": 1.8782, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.7455555555555557e-05, | |
| "loss": 1.9371, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.7344444444444445e-05, | |
| "loss": 1.9396, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.7233333333333333e-05, | |
| "loss": 1.9371, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.712222222222222e-05, | |
| "loss": 1.9918, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.701111111111111e-05, | |
| "loss": 1.9009, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.69e-05, | |
| "loss": 1.9499, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.678888888888889e-05, | |
| "loss": 1.9332, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.6677777777777777e-05, | |
| "loss": 2.0289, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.6566666666666665e-05, | |
| "loss": 1.9174, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.6455555555555556e-05, | |
| "loss": 1.9847, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.6344444444444445e-05, | |
| "loss": 1.8906, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.6233333333333333e-05, | |
| "loss": 1.8828, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.612222222222222e-05, | |
| "loss": 1.9662, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.6011111111111112e-05, | |
| "loss": 1.9673, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.59e-05, | |
| "loss": 1.9257, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.5788888888888888e-05, | |
| "loss": 1.8924, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.5677777777777776e-05, | |
| "loss": 1.8919, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.5566666666666668e-05, | |
| "loss": 2.0188, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.5455555555555556e-05, | |
| "loss": 1.9547, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.5344444444444444e-05, | |
| "loss": 1.9254, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.5233333333333332e-05, | |
| "loss": 1.8957, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.5122222222222224e-05, | |
| "loss": 2.0042, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.5011111111111112e-05, | |
| "loss": 1.9171, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.49e-05, | |
| "loss": 1.9193, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.4788888888888888e-05, | |
| "loss": 1.9595, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.467777777777778e-05, | |
| "loss": 1.9526, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.4566666666666667e-05, | |
| "loss": 1.8556, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.4455555555555555e-05, | |
| "loss": 1.9649, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.4344444444444447e-05, | |
| "loss": 1.9939, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.4233333333333335e-05, | |
| "loss": 1.9012, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.4122222222222223e-05, | |
| "loss": 1.9954, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.4011111111111111e-05, | |
| "loss": 1.8724, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.3900000000000002e-05, | |
| "loss": 1.8799, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.378888888888889e-05, | |
| "loss": 1.9449, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.3677777777777779e-05, | |
| "loss": 1.9504, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.3566666666666667e-05, | |
| "loss": 1.8637, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.3455555555555558e-05, | |
| "loss": 1.9266, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.3344444444444446e-05, | |
| "loss": 1.8858, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.3233333333333334e-05, | |
| "loss": 1.9453, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.3122222222222222e-05, | |
| "loss": 1.911, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.3011111111111112e-05, | |
| "loss": 1.9267, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.29e-05, | |
| "loss": 1.9917, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.278888888888889e-05, | |
| "loss": 1.8827, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.2677777777777778e-05, | |
| "loss": 1.9137, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.2566666666666668e-05, | |
| "loss": 1.9071, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.2455555555555556e-05, | |
| "loss": 1.9658, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.2344444444444444e-05, | |
| "loss": 1.8949, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.2233333333333334e-05, | |
| "loss": 1.9165, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.2122222222222222e-05, | |
| "loss": 1.8986, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.2011111111111111e-05, | |
| "loss": 1.9468, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.19e-05, | |
| "loss": 1.9018, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.178888888888889e-05, | |
| "loss": 1.9663, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.1677777777777777e-05, | |
| "loss": 1.9203, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.1566666666666667e-05, | |
| "loss": 1.9288, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.1455555555555555e-05, | |
| "loss": 1.8868, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.1344444444444445e-05, | |
| "loss": 1.9763, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.1233333333333333e-05, | |
| "loss": 1.854, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.1122222222222223e-05, | |
| "loss": 1.8515, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.1011111111111113e-05, | |
| "loss": 1.8175, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.09e-05, | |
| "loss": 1.9691, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.078888888888889e-05, | |
| "loss": 1.9804, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 1.0677777777777779e-05, | |
| "loss": 1.9356, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 1.0566666666666668e-05, | |
| "loss": 1.9108, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 1.0466666666666668e-05, | |
| "loss": 1.8432, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 1.0355555555555556e-05, | |
| "loss": 1.9151, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 1.0244444444444445e-05, | |
| "loss": 1.8866, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 1.0133333333333333e-05, | |
| "loss": 1.943, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 1.0022222222222223e-05, | |
| "loss": 1.8325, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 9.911111111111111e-06, | |
| "loss": 1.8784, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 9.800000000000001e-06, | |
| "loss": 1.8412, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 9.688888888888889e-06, | |
| "loss": 1.8959, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 9.577777777777779e-06, | |
| "loss": 1.8586, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 9.466666666666667e-06, | |
| "loss": 1.9707, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 9.355555555555557e-06, | |
| "loss": 1.9178, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 9.244444444444445e-06, | |
| "loss": 1.9418, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 9.133333333333335e-06, | |
| "loss": 1.9177, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 9.022222222222223e-06, | |
| "loss": 1.9082, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.911111111111112e-06, | |
| "loss": 1.9093, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.8e-06, | |
| "loss": 1.863, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 8.68888888888889e-06, | |
| "loss": 1.9436, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 8.577777777777778e-06, | |
| "loss": 1.852, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 8.466666666666666e-06, | |
| "loss": 1.9117, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 8.355555555555556e-06, | |
| "loss": 1.8644, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 8.244444444444444e-06, | |
| "loss": 1.9091, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 8.133333333333332e-06, | |
| "loss": 1.9222, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 8.022222222222222e-06, | |
| "loss": 1.9054, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.91111111111111e-06, | |
| "loss": 1.9646, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.8e-06, | |
| "loss": 2.0081, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.68888888888889e-06, | |
| "loss": 1.9646, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 7.577777777777778e-06, | |
| "loss": 1.993, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 7.4666666666666675e-06, | |
| "loss": 1.9562, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 7.3555555555555555e-06, | |
| "loss": 1.8728, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 7.244444444444445e-06, | |
| "loss": 1.9075, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 7.133333333333333e-06, | |
| "loss": 1.9145, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 7.022222222222223e-06, | |
| "loss": 1.9148, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 6.911111111111111e-06, | |
| "loss": 1.8758, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 6.800000000000001e-06, | |
| "loss": 1.9491, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 6.688888888888889e-06, | |
| "loss": 1.8994, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 6.577777777777779e-06, | |
| "loss": 1.8458, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 6.466666666666667e-06, | |
| "loss": 1.9138, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 6.355555555555557e-06, | |
| "loss": 1.9492, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 6.244444444444445e-06, | |
| "loss": 1.9269, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 6.133333333333334e-06, | |
| "loss": 1.9292, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 6.0222222222222225e-06, | |
| "loss": 1.9449, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 5.9111111111111115e-06, | |
| "loss": 1.8984, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 5.8e-06, | |
| "loss": 1.9091, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 5.688888888888889e-06, | |
| "loss": 1.8666, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 5.577777777777778e-06, | |
| "loss": 1.8918, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 5.466666666666667e-06, | |
| "loss": 1.9444, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 5.355555555555556e-06, | |
| "loss": 1.9556, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 5.244444444444445e-06, | |
| "loss": 1.9307, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 5.133333333333334e-06, | |
| "loss": 1.9893, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 5.022222222222223e-06, | |
| "loss": 1.9046, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.911111111111112e-06, | |
| "loss": 1.909, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 1.9228, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.6888888888888895e-06, | |
| "loss": 1.911, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 4.5777777777777785e-06, | |
| "loss": 1.8263, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 4.4666666666666665e-06, | |
| "loss": 1.8926, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 4.3555555555555555e-06, | |
| "loss": 1.9381, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 4.244444444444444e-06, | |
| "loss": 1.9395, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 4.133333333333333e-06, | |
| "loss": 1.9294, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 4.022222222222222e-06, | |
| "loss": 1.9132, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.911111111111111e-06, | |
| "loss": 1.9346, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.8e-06, | |
| "loss": 1.9006, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.688888888888889e-06, | |
| "loss": 1.911, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.577777777777778e-06, | |
| "loss": 1.8575, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.466666666666667e-06, | |
| "loss": 1.9116, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.3555555555555557e-06, | |
| "loss": 1.941, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.244444444444444e-06, | |
| "loss": 1.9793, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.133333333333333e-06, | |
| "loss": 1.8679, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.0222222222222225e-06, | |
| "loss": 1.953, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.9111111111111114e-06, | |
| "loss": 1.8952, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 1.8668, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.6888888888888892e-06, | |
| "loss": 1.9272, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.5777777777777777e-06, | |
| "loss": 1.9028, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.4666666666666666e-06, | |
| "loss": 1.9104, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.3555555555555555e-06, | |
| "loss": 1.93, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.2444444444444445e-06, | |
| "loss": 1.8212, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.1333333333333334e-06, | |
| "loss": 1.9506, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 2.0222222222222223e-06, | |
| "loss": 1.9462, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.9111111111111112e-06, | |
| "loss": 1.9526, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.8e-06, | |
| "loss": 1.8744, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.6888888888888888e-06, | |
| "loss": 1.882, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.577777777777778e-06, | |
| "loss": 1.8525, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.4666666666666667e-06, | |
| "loss": 1.9561, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.3555555555555556e-06, | |
| "loss": 1.9858, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.2444444444444445e-06, | |
| "loss": 1.8857, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.1333333333333334e-06, | |
| "loss": 1.8852, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.0222222222222223e-06, | |
| "loss": 1.9136, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 9.111111111111112e-07, | |
| "loss": 1.8647, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 1.9747, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 6.888888888888889e-07, | |
| "loss": 1.844, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 5.777777777777778e-07, | |
| "loss": 1.9402, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 4.666666666666667e-07, | |
| "loss": 1.8838, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.555555555555556e-07, | |
| "loss": 1.8683, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 2.4444444444444445e-07, | |
| "loss": 1.9806, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.3333333333333334e-07, | |
| "loss": 1.8673, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 2.2222222222222224e-08, | |
| "loss": 1.9773, | |
| "step": 5000 | |
| } | |
| ], | |
| "max_steps": 5000, | |
| "num_train_epochs": 2, | |
| "total_flos": 1.7336836816896e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |