| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1000.0, | |
| "eval_steps": 500, | |
| "global_step": 13000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.003992, | |
| "loss": 10.6341, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.003984, | |
| "loss": 8.0261, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.003976, | |
| "loss": 7.6356, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.003968, | |
| "loss": 7.489, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.00396, | |
| "loss": 7.3955, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.003952, | |
| "loss": 7.3814, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 0.0039440000000000005, | |
| "loss": 7.3919, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.003936, | |
| "loss": 7.2877, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.003928, | |
| "loss": 7.0588, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.00392, | |
| "loss": 6.9853, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 0.003912, | |
| "loss": 6.9981, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 0.003904, | |
| "loss": 6.8759, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 0.003896, | |
| "loss": 6.8897, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 0.003888, | |
| "loss": 7.1851, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 0.0038799999999999998, | |
| "loss": 7.3121, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 0.003872, | |
| "loss": 7.2602, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 0.003864, | |
| "loss": 7.2026, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.003856, | |
| "loss": 7.2713, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 0.003848, | |
| "loss": 7.1885, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.00384, | |
| "loss": 7.2042, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 0.003832, | |
| "loss": 7.1744, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 0.0038239999999999997, | |
| "loss": 7.0481, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 0.003816, | |
| "loss": 6.8698, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 0.0038079999999999998, | |
| "loss": 6.7722, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 0.0038, | |
| "loss": 6.7018, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 0.003792, | |
| "loss": 6.6881, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 0.003784, | |
| "loss": 6.7485, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 0.003776, | |
| "loss": 6.5876, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 0.003768, | |
| "loss": 6.5597, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 0.00376, | |
| "loss": 6.5379, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 0.0037519999999999997, | |
| "loss": 6.3772, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 0.0037440000000000004, | |
| "loss": 6.3651, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 0.003736, | |
| "loss": 6.305, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 0.0037280000000000004, | |
| "loss": 6.2724, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 0.00372, | |
| "loss": 6.183, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 0.0037120000000000005, | |
| "loss": 6.2141, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 0.0037040000000000003, | |
| "loss": 6.1447, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 0.003696, | |
| "loss": 6.3683, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "learning_rate": 0.0036880000000000003, | |
| "loss": 6.2738, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.00368, | |
| "loss": 6.0499, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "learning_rate": 0.0036720000000000004, | |
| "loss": 5.9005, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "learning_rate": 0.003664, | |
| "loss": 5.8533, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "learning_rate": 0.0036560000000000004, | |
| "loss": 5.8199, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "learning_rate": 0.003648, | |
| "loss": 6.051, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "learning_rate": 0.00364, | |
| "loss": 5.8496, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "learning_rate": 0.0036320000000000002, | |
| "loss": 5.7252, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "learning_rate": 0.003624, | |
| "loss": 5.6958, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "learning_rate": 0.0036160000000000003, | |
| "loss": 5.7218, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "learning_rate": 0.003608, | |
| "loss": 5.6656, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 0.0036000000000000003, | |
| "loss": 5.612, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "learning_rate": 0.003592, | |
| "loss": 5.5532, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "learning_rate": 0.003584, | |
| "loss": 5.4327, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "learning_rate": 0.003576, | |
| "loss": 5.3979, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "learning_rate": 0.003568, | |
| "loss": 5.2903, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "learning_rate": 0.0035600000000000002, | |
| "loss": 5.4521, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "learning_rate": 0.003552, | |
| "loss": 5.6021, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "learning_rate": 0.0035440000000000003, | |
| "loss": 5.5058, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "learning_rate": 0.003536, | |
| "loss": 5.2167, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "learning_rate": 0.003528, | |
| "loss": 5.2102, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 0.00352, | |
| "loss": 5.2617, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "learning_rate": 0.003512, | |
| "loss": 5.3012, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "learning_rate": 0.003504, | |
| "loss": 5.2158, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "learning_rate": 0.003496, | |
| "loss": 5.1959, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "learning_rate": 0.003488, | |
| "loss": 5.1716, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "learning_rate": 0.00348, | |
| "loss": 5.0796, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "learning_rate": 0.0034720000000000003, | |
| "loss": 4.9764, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "learning_rate": 0.003464, | |
| "loss": 4.974, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "learning_rate": 0.003456, | |
| "loss": 4.876, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "learning_rate": 0.003448, | |
| "loss": 4.8596, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "learning_rate": 0.00344, | |
| "loss": 4.7792, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "learning_rate": 0.003432, | |
| "loss": 4.765, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "learning_rate": 0.003424, | |
| "loss": 4.7933, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "learning_rate": 0.003416, | |
| "loss": 4.7636, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "learning_rate": 0.003408, | |
| "loss": 4.7114, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "learning_rate": 0.0034, | |
| "loss": 4.7079, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "learning_rate": 0.003392, | |
| "loss": 4.6745, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "learning_rate": 0.003384, | |
| "loss": 4.6765, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "learning_rate": 0.003376, | |
| "loss": 4.5913, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "learning_rate": 0.003368, | |
| "loss": 4.7949, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 0.00336, | |
| "loss": 4.6311, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "learning_rate": 0.003352, | |
| "loss": 4.4818, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "learning_rate": 0.0033439999999999998, | |
| "loss": 4.4462, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "learning_rate": 0.003336, | |
| "loss": 4.5129, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "learning_rate": 0.003328, | |
| "loss": 4.4626, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "learning_rate": 0.00332, | |
| "loss": 4.3505, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "learning_rate": 0.003312, | |
| "loss": 4.3377, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "learning_rate": 0.003304, | |
| "loss": 4.4076, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "learning_rate": 0.003296, | |
| "loss": 4.3765, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "learning_rate": 0.0032879999999999997, | |
| "loss": 4.2473, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "learning_rate": 0.00328, | |
| "loss": 4.2142, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "learning_rate": 0.0032719999999999997, | |
| "loss": 4.1567, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "learning_rate": 0.003264, | |
| "loss": 4.1569, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "learning_rate": 0.0032559999999999998, | |
| "loss": 4.1347, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "learning_rate": 0.0032480000000000005, | |
| "loss": 4.0786, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "learning_rate": 0.0032400000000000003, | |
| "loss": 4.0796, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "learning_rate": 0.003232, | |
| "loss": 4.0432, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "learning_rate": 0.0032240000000000003, | |
| "loss": 4.033, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "learning_rate": 0.003216, | |
| "loss": 3.952, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "learning_rate": 0.0032080000000000003, | |
| "loss": 4.0043, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "learning_rate": 0.0032, | |
| "loss": 4.2161, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 101.0, | |
| "learning_rate": 0.0031920000000000004, | |
| "loss": 4.1006, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 102.0, | |
| "learning_rate": 0.003184, | |
| "loss": 4.1527, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 103.0, | |
| "learning_rate": 0.0031760000000000004, | |
| "loss": 3.9791, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "learning_rate": 0.0031680000000000002, | |
| "loss": 3.9599, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 105.0, | |
| "learning_rate": 0.00316, | |
| "loss": 3.9998, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 106.0, | |
| "learning_rate": 0.0031520000000000003, | |
| "loss": 3.973, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 107.0, | |
| "learning_rate": 0.003144, | |
| "loss": 3.9976, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "learning_rate": 0.0031360000000000003, | |
| "loss": 3.9862, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 109.0, | |
| "learning_rate": 0.003128, | |
| "loss": 3.8562, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 110.0, | |
| "learning_rate": 0.0031200000000000004, | |
| "loss": 3.8322, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 111.0, | |
| "learning_rate": 0.003112, | |
| "loss": 3.8451, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "learning_rate": 0.003104, | |
| "loss": 3.8274, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 113.0, | |
| "learning_rate": 0.0030960000000000002, | |
| "loss": 3.8483, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 114.0, | |
| "learning_rate": 0.003088, | |
| "loss": 3.7911, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 115.0, | |
| "learning_rate": 0.0030800000000000003, | |
| "loss": 3.8203, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "learning_rate": 0.003072, | |
| "loss": 3.7111, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 117.0, | |
| "learning_rate": 0.0030640000000000003, | |
| "loss": 3.7186, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 118.0, | |
| "learning_rate": 0.003056, | |
| "loss": 3.6357, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 119.0, | |
| "learning_rate": 0.003048, | |
| "loss": 3.6484, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "learning_rate": 0.00304, | |
| "loss": 3.7188, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 121.0, | |
| "learning_rate": 0.003032, | |
| "loss": 3.6217, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 122.0, | |
| "learning_rate": 0.003024, | |
| "loss": 3.5853, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 123.0, | |
| "learning_rate": 0.003016, | |
| "loss": 3.6381, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 124.0, | |
| "learning_rate": 0.0030080000000000003, | |
| "loss": 3.6051, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 125.0, | |
| "learning_rate": 0.003, | |
| "loss": 3.6293, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 126.0, | |
| "learning_rate": 0.002992, | |
| "loss": 3.626, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 127.0, | |
| "learning_rate": 0.002984, | |
| "loss": 3.6121, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 128.0, | |
| "learning_rate": 0.002976, | |
| "loss": 3.5777, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 129.0, | |
| "learning_rate": 0.002968, | |
| "loss": 3.551, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 130.0, | |
| "learning_rate": 0.00296, | |
| "loss": 3.534, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 131.0, | |
| "learning_rate": 0.002952, | |
| "loss": 3.5946, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 132.0, | |
| "learning_rate": 0.002944, | |
| "loss": 3.6511, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 133.0, | |
| "learning_rate": 0.002936, | |
| "loss": 3.5556, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 134.0, | |
| "learning_rate": 0.002928, | |
| "loss": 3.5453, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 135.0, | |
| "learning_rate": 0.00292, | |
| "loss": 3.5641, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 136.0, | |
| "learning_rate": 0.002912, | |
| "loss": 3.5357, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 137.0, | |
| "learning_rate": 0.002904, | |
| "loss": 3.5738, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 138.0, | |
| "learning_rate": 0.002896, | |
| "loss": 3.4697, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 139.0, | |
| "learning_rate": 0.002888, | |
| "loss": 3.4405, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "learning_rate": 0.0028799999999999997, | |
| "loss": 3.3998, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 141.0, | |
| "learning_rate": 0.002872, | |
| "loss": 3.4035, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 142.0, | |
| "learning_rate": 0.002864, | |
| "loss": 3.4335, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 143.0, | |
| "learning_rate": 0.002856, | |
| "loss": 3.4105, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 144.0, | |
| "learning_rate": 0.002848, | |
| "loss": 3.3161, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 145.0, | |
| "learning_rate": 0.00284, | |
| "loss": 3.2802, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 146.0, | |
| "learning_rate": 0.002832, | |
| "loss": 3.2573, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 147.0, | |
| "learning_rate": 0.0028239999999999997, | |
| "loss": 3.265, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 148.0, | |
| "learning_rate": 0.002816, | |
| "loss": 3.3362, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 149.0, | |
| "learning_rate": 0.0028079999999999997, | |
| "loss": 3.2085, | |
| "step": 1937 | |
| }, | |
| { | |
| "epoch": 150.0, | |
| "learning_rate": 0.0028, | |
| "loss": 3.2445, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 151.0, | |
| "learning_rate": 0.0027919999999999998, | |
| "loss": 3.2212, | |
| "step": 1963 | |
| }, | |
| { | |
| "epoch": 152.0, | |
| "learning_rate": 0.002784, | |
| "loss": 3.2135, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 153.0, | |
| "learning_rate": 0.002776, | |
| "loss": 3.173, | |
| "step": 1989 | |
| }, | |
| { | |
| "epoch": 154.0, | |
| "learning_rate": 0.002768, | |
| "loss": 3.1946, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 155.0, | |
| "learning_rate": 0.00276, | |
| "loss": 3.1739, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 156.0, | |
| "learning_rate": 0.0027519999999999997, | |
| "loss": 3.1975, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 157.0, | |
| "learning_rate": 0.0027440000000000003, | |
| "loss": 3.148, | |
| "step": 2041 | |
| }, | |
| { | |
| "epoch": 158.0, | |
| "learning_rate": 0.002736, | |
| "loss": 3.1124, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 159.0, | |
| "learning_rate": 0.0027280000000000004, | |
| "loss": 3.1101, | |
| "step": 2067 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "learning_rate": 0.00272, | |
| "loss": 3.155, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 161.0, | |
| "learning_rate": 0.0027120000000000004, | |
| "loss": 3.091, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 162.0, | |
| "learning_rate": 0.0027040000000000002, | |
| "loss": 3.0156, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 163.0, | |
| "learning_rate": 0.002696, | |
| "loss": 3.031, | |
| "step": 2119 | |
| }, | |
| { | |
| "epoch": 164.0, | |
| "learning_rate": 0.0026880000000000003, | |
| "loss": 3.0426, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 165.0, | |
| "learning_rate": 0.00268, | |
| "loss": 2.9667, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 166.0, | |
| "learning_rate": 0.0026720000000000003, | |
| "loss": 2.9496, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 167.0, | |
| "learning_rate": 0.002664, | |
| "loss": 3.0151, | |
| "step": 2171 | |
| }, | |
| { | |
| "epoch": 168.0, | |
| "learning_rate": 0.0026560000000000004, | |
| "loss": 3.0202, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 169.0, | |
| "learning_rate": 0.002648, | |
| "loss": 3.1202, | |
| "step": 2197 | |
| }, | |
| { | |
| "epoch": 170.0, | |
| "learning_rate": 0.00264, | |
| "loss": 3.0814, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 171.0, | |
| "learning_rate": 0.0026320000000000002, | |
| "loss": 2.9501, | |
| "step": 2223 | |
| }, | |
| { | |
| "epoch": 172.0, | |
| "learning_rate": 0.002624, | |
| "loss": 2.8994, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 173.0, | |
| "learning_rate": 0.0026160000000000003, | |
| "loss": 2.8437, | |
| "step": 2249 | |
| }, | |
| { | |
| "epoch": 174.0, | |
| "learning_rate": 0.002608, | |
| "loss": 2.8867, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 175.0, | |
| "learning_rate": 0.0026000000000000003, | |
| "loss": 2.8977, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 176.0, | |
| "learning_rate": 0.002592, | |
| "loss": 2.8601, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 177.0, | |
| "learning_rate": 0.002584, | |
| "loss": 2.9511, | |
| "step": 2301 | |
| }, | |
| { | |
| "epoch": 178.0, | |
| "learning_rate": 0.002576, | |
| "loss": 2.8396, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 179.0, | |
| "learning_rate": 0.002568, | |
| "loss": 2.8238, | |
| "step": 2327 | |
| }, | |
| { | |
| "epoch": 180.0, | |
| "learning_rate": 0.00256, | |
| "loss": 2.8048, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 181.0, | |
| "learning_rate": 0.002552, | |
| "loss": 2.7583, | |
| "step": 2353 | |
| }, | |
| { | |
| "epoch": 182.0, | |
| "learning_rate": 0.0025440000000000003, | |
| "loss": 2.7443, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 183.0, | |
| "learning_rate": 0.002536, | |
| "loss": 2.7362, | |
| "step": 2379 | |
| }, | |
| { | |
| "epoch": 184.0, | |
| "learning_rate": 0.002528, | |
| "loss": 2.7878, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 185.0, | |
| "learning_rate": 0.00252, | |
| "loss": 2.7811, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 186.0, | |
| "learning_rate": 0.002512, | |
| "loss": 2.7213, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 187.0, | |
| "learning_rate": 0.002504, | |
| "loss": 2.7716, | |
| "step": 2431 | |
| }, | |
| { | |
| "epoch": 188.0, | |
| "learning_rate": 0.002496, | |
| "loss": 2.7761, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 189.0, | |
| "learning_rate": 0.002488, | |
| "loss": 2.7456, | |
| "step": 2457 | |
| }, | |
| { | |
| "epoch": 190.0, | |
| "learning_rate": 0.00248, | |
| "loss": 2.9211, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 191.0, | |
| "learning_rate": 0.0024720000000000002, | |
| "loss": 2.9644, | |
| "step": 2483 | |
| }, | |
| { | |
| "epoch": 192.0, | |
| "learning_rate": 0.002464, | |
| "loss": 2.7444, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 193.0, | |
| "learning_rate": 0.002456, | |
| "loss": 2.7094, | |
| "step": 2509 | |
| }, | |
| { | |
| "epoch": 194.0, | |
| "learning_rate": 0.002448, | |
| "loss": 2.6593, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 195.0, | |
| "learning_rate": 0.00244, | |
| "loss": 2.6424, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 196.0, | |
| "learning_rate": 0.002432, | |
| "loss": 2.5913, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 197.0, | |
| "learning_rate": 0.002424, | |
| "loss": 2.6003, | |
| "step": 2561 | |
| }, | |
| { | |
| "epoch": 198.0, | |
| "learning_rate": 0.002416, | |
| "loss": 2.6317, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 199.0, | |
| "learning_rate": 0.002408, | |
| "loss": 2.6468, | |
| "step": 2587 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "learning_rate": 0.0024, | |
| "loss": 2.5951, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 201.0, | |
| "learning_rate": 0.002392, | |
| "loss": 2.5915, | |
| "step": 2613 | |
| }, | |
| { | |
| "epoch": 202.0, | |
| "learning_rate": 0.002384, | |
| "loss": 2.568, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 203.0, | |
| "learning_rate": 0.002376, | |
| "loss": 2.5466, | |
| "step": 2639 | |
| }, | |
| { | |
| "epoch": 204.0, | |
| "learning_rate": 0.002368, | |
| "loss": 2.6858, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 205.0, | |
| "learning_rate": 0.00236, | |
| "loss": 2.5551, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 206.0, | |
| "learning_rate": 0.002352, | |
| "loss": 2.5618, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 207.0, | |
| "learning_rate": 0.0023439999999999997, | |
| "loss": 2.5309, | |
| "step": 2691 | |
| }, | |
| { | |
| "epoch": 208.0, | |
| "learning_rate": 0.002336, | |
| "loss": 2.5307, | |
| "step": 2704 | |
| }, | |
| { | |
| "epoch": 209.0, | |
| "learning_rate": 0.0023279999999999998, | |
| "loss": 2.5008, | |
| "step": 2717 | |
| }, | |
| { | |
| "epoch": 210.0, | |
| "learning_rate": 0.00232, | |
| "loss": 2.5485, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 211.0, | |
| "learning_rate": 0.002312, | |
| "loss": 2.547, | |
| "step": 2743 | |
| }, | |
| { | |
| "epoch": 212.0, | |
| "learning_rate": 0.002304, | |
| "loss": 2.461, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 213.0, | |
| "learning_rate": 0.002296, | |
| "loss": 2.4375, | |
| "step": 2769 | |
| }, | |
| { | |
| "epoch": 214.0, | |
| "learning_rate": 0.0022879999999999997, | |
| "loss": 2.4417, | |
| "step": 2782 | |
| }, | |
| { | |
| "epoch": 215.0, | |
| "learning_rate": 0.00228, | |
| "loss": 2.4427, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 216.0, | |
| "learning_rate": 0.0022719999999999997, | |
| "loss": 2.4756, | |
| "step": 2808 | |
| }, | |
| { | |
| "epoch": 217.0, | |
| "learning_rate": 0.002264, | |
| "loss": 2.4662, | |
| "step": 2821 | |
| }, | |
| { | |
| "epoch": 218.0, | |
| "learning_rate": 0.0022559999999999998, | |
| "loss": 2.4931, | |
| "step": 2834 | |
| }, | |
| { | |
| "epoch": 219.0, | |
| "learning_rate": 0.0022480000000000004, | |
| "loss": 2.4438, | |
| "step": 2847 | |
| }, | |
| { | |
| "epoch": 220.0, | |
| "learning_rate": 0.0022400000000000002, | |
| "loss": 2.3834, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 221.0, | |
| "learning_rate": 0.002232, | |
| "loss": 2.4078, | |
| "step": 2873 | |
| }, | |
| { | |
| "epoch": 222.0, | |
| "learning_rate": 0.0022240000000000003, | |
| "loss": 2.3813, | |
| "step": 2886 | |
| }, | |
| { | |
| "epoch": 223.0, | |
| "learning_rate": 0.002216, | |
| "loss": 2.382, | |
| "step": 2899 | |
| }, | |
| { | |
| "epoch": 224.0, | |
| "learning_rate": 0.0022080000000000003, | |
| "loss": 2.361, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 225.0, | |
| "learning_rate": 0.0022, | |
| "loss": 2.3106, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 226.0, | |
| "learning_rate": 0.0021920000000000004, | |
| "loss": 2.2991, | |
| "step": 2938 | |
| }, | |
| { | |
| "epoch": 227.0, | |
| "learning_rate": 0.002184, | |
| "loss": 2.231, | |
| "step": 2951 | |
| }, | |
| { | |
| "epoch": 228.0, | |
| "learning_rate": 0.0021760000000000004, | |
| "loss": 2.2748, | |
| "step": 2964 | |
| }, | |
| { | |
| "epoch": 229.0, | |
| "learning_rate": 0.0021680000000000002, | |
| "loss": 2.2974, | |
| "step": 2977 | |
| }, | |
| { | |
| "epoch": 230.0, | |
| "learning_rate": 0.00216, | |
| "loss": 2.2974, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 231.0, | |
| "learning_rate": 0.0021520000000000003, | |
| "loss": 2.2755, | |
| "step": 3003 | |
| }, | |
| { | |
| "epoch": 232.0, | |
| "learning_rate": 0.002144, | |
| "loss": 2.287, | |
| "step": 3016 | |
| }, | |
| { | |
| "epoch": 233.0, | |
| "learning_rate": 0.0021360000000000003, | |
| "loss": 2.2462, | |
| "step": 3029 | |
| }, | |
| { | |
| "epoch": 234.0, | |
| "learning_rate": 0.002128, | |
| "loss": 2.2528, | |
| "step": 3042 | |
| }, | |
| { | |
| "epoch": 235.0, | |
| "learning_rate": 0.0021200000000000004, | |
| "loss": 2.2052, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 236.0, | |
| "learning_rate": 0.002112, | |
| "loss": 2.2461, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 237.0, | |
| "learning_rate": 0.002104, | |
| "loss": 2.2099, | |
| "step": 3081 | |
| }, | |
| { | |
| "epoch": 238.0, | |
| "learning_rate": 0.002096, | |
| "loss": 2.1273, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 239.0, | |
| "learning_rate": 0.002088, | |
| "loss": 2.1668, | |
| "step": 3107 | |
| }, | |
| { | |
| "epoch": 240.0, | |
| "learning_rate": 0.0020800000000000003, | |
| "loss": 2.1719, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 241.0, | |
| "learning_rate": 0.002072, | |
| "loss": 2.171, | |
| "step": 3133 | |
| }, | |
| { | |
| "epoch": 242.0, | |
| "learning_rate": 0.0020640000000000003, | |
| "loss": 2.1436, | |
| "step": 3146 | |
| }, | |
| { | |
| "epoch": 243.0, | |
| "learning_rate": 0.002056, | |
| "loss": 2.1698, | |
| "step": 3159 | |
| }, | |
| { | |
| "epoch": 244.0, | |
| "learning_rate": 0.002048, | |
| "loss": 2.1576, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 245.0, | |
| "learning_rate": 0.00204, | |
| "loss": 2.1641, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 246.0, | |
| "learning_rate": 0.002032, | |
| "loss": 2.1721, | |
| "step": 3198 | |
| }, | |
| { | |
| "epoch": 247.0, | |
| "learning_rate": 0.002024, | |
| "loss": 2.1615, | |
| "step": 3211 | |
| }, | |
| { | |
| "epoch": 248.0, | |
| "learning_rate": 0.002016, | |
| "loss": 2.0983, | |
| "step": 3224 | |
| }, | |
| { | |
| "epoch": 249.0, | |
| "learning_rate": 0.0020080000000000002, | |
| "loss": 2.108, | |
| "step": 3237 | |
| }, | |
| { | |
| "epoch": 250.0, | |
| "learning_rate": 0.002, | |
| "loss": 2.1167, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 251.0, | |
| "learning_rate": 0.001992, | |
| "loss": 2.0951, | |
| "step": 3263 | |
| }, | |
| { | |
| "epoch": 252.0, | |
| "learning_rate": 0.001984, | |
| "loss": 2.0415, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 253.0, | |
| "learning_rate": 0.001976, | |
| "loss": 2.101, | |
| "step": 3289 | |
| }, | |
| { | |
| "epoch": 254.0, | |
| "learning_rate": 0.001968, | |
| "loss": 2.1233, | |
| "step": 3302 | |
| }, | |
| { | |
| "epoch": 255.0, | |
| "learning_rate": 0.00196, | |
| "loss": 2.0782, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 256.0, | |
| "learning_rate": 0.001952, | |
| "loss": 2.0033, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 257.0, | |
| "learning_rate": 0.001944, | |
| "loss": 2.051, | |
| "step": 3341 | |
| }, | |
| { | |
| "epoch": 258.0, | |
| "learning_rate": 0.001936, | |
| "loss": 2.0587, | |
| "step": 3354 | |
| }, | |
| { | |
| "epoch": 259.0, | |
| "learning_rate": 0.001928, | |
| "loss": 1.9981, | |
| "step": 3367 | |
| }, | |
| { | |
| "epoch": 260.0, | |
| "learning_rate": 0.00192, | |
| "loss": 2.0506, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 261.0, | |
| "learning_rate": 0.0019119999999999999, | |
| "loss": 2.0815, | |
| "step": 3393 | |
| }, | |
| { | |
| "epoch": 262.0, | |
| "learning_rate": 0.0019039999999999999, | |
| "loss": 2.0054, | |
| "step": 3406 | |
| }, | |
| { | |
| "epoch": 263.0, | |
| "learning_rate": 0.001896, | |
| "loss": 1.9923, | |
| "step": 3419 | |
| }, | |
| { | |
| "epoch": 264.0, | |
| "learning_rate": 0.001888, | |
| "loss": 1.9892, | |
| "step": 3432 | |
| }, | |
| { | |
| "epoch": 265.0, | |
| "learning_rate": 0.00188, | |
| "loss": 1.9406, | |
| "step": 3445 | |
| }, | |
| { | |
| "epoch": 266.0, | |
| "learning_rate": 0.0018720000000000002, | |
| "loss": 1.9295, | |
| "step": 3458 | |
| }, | |
| { | |
| "epoch": 267.0, | |
| "learning_rate": 0.0018640000000000002, | |
| "loss": 1.9791, | |
| "step": 3471 | |
| }, | |
| { | |
| "epoch": 268.0, | |
| "learning_rate": 0.0018560000000000002, | |
| "loss": 1.9413, | |
| "step": 3484 | |
| }, | |
| { | |
| "epoch": 269.0, | |
| "learning_rate": 0.001848, | |
| "loss": 1.9363, | |
| "step": 3497 | |
| }, | |
| { | |
| "epoch": 270.0, | |
| "learning_rate": 0.00184, | |
| "loss": 2.0056, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 271.0, | |
| "learning_rate": 0.001832, | |
| "loss": 1.9298, | |
| "step": 3523 | |
| }, | |
| { | |
| "epoch": 272.0, | |
| "learning_rate": 0.001824, | |
| "loss": 1.9045, | |
| "step": 3536 | |
| }, | |
| { | |
| "epoch": 273.0, | |
| "learning_rate": 0.0018160000000000001, | |
| "loss": 1.9165, | |
| "step": 3549 | |
| }, | |
| { | |
| "epoch": 274.0, | |
| "learning_rate": 0.0018080000000000001, | |
| "loss": 1.9214, | |
| "step": 3562 | |
| }, | |
| { | |
| "epoch": 275.0, | |
| "learning_rate": 0.0018000000000000002, | |
| "loss": 1.9063, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 276.0, | |
| "learning_rate": 0.001792, | |
| "loss": 1.9016, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 277.0, | |
| "learning_rate": 0.001784, | |
| "loss": 1.8091, | |
| "step": 3601 | |
| }, | |
| { | |
| "epoch": 278.0, | |
| "learning_rate": 0.001776, | |
| "loss": 1.8626, | |
| "step": 3614 | |
| }, | |
| { | |
| "epoch": 279.0, | |
| "learning_rate": 0.001768, | |
| "loss": 1.8663, | |
| "step": 3627 | |
| }, | |
| { | |
| "epoch": 280.0, | |
| "learning_rate": 0.00176, | |
| "loss": 1.9432, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 281.0, | |
| "learning_rate": 0.001752, | |
| "loss": 1.8664, | |
| "step": 3653 | |
| }, | |
| { | |
| "epoch": 282.0, | |
| "learning_rate": 0.001744, | |
| "loss": 1.8603, | |
| "step": 3666 | |
| }, | |
| { | |
| "epoch": 283.0, | |
| "learning_rate": 0.0017360000000000001, | |
| "loss": 1.8335, | |
| "step": 3679 | |
| }, | |
| { | |
| "epoch": 284.0, | |
| "learning_rate": 0.001728, | |
| "loss": 1.8625, | |
| "step": 3692 | |
| }, | |
| { | |
| "epoch": 285.0, | |
| "learning_rate": 0.00172, | |
| "loss": 1.8043, | |
| "step": 3705 | |
| }, | |
| { | |
| "epoch": 286.0, | |
| "learning_rate": 0.001712, | |
| "loss": 1.8061, | |
| "step": 3718 | |
| }, | |
| { | |
| "epoch": 287.0, | |
| "learning_rate": 0.001704, | |
| "loss": 1.835, | |
| "step": 3731 | |
| }, | |
| { | |
| "epoch": 288.0, | |
| "learning_rate": 0.001696, | |
| "loss": 1.7944, | |
| "step": 3744 | |
| }, | |
| { | |
| "epoch": 289.0, | |
| "learning_rate": 0.001688, | |
| "loss": 1.8492, | |
| "step": 3757 | |
| }, | |
| { | |
| "epoch": 290.0, | |
| "learning_rate": 0.00168, | |
| "loss": 1.812, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 291.0, | |
| "learning_rate": 0.0016719999999999999, | |
| "loss": 1.8175, | |
| "step": 3783 | |
| }, | |
| { | |
| "epoch": 292.0, | |
| "learning_rate": 0.001664, | |
| "loss": 1.7943, | |
| "step": 3796 | |
| }, | |
| { | |
| "epoch": 293.0, | |
| "learning_rate": 0.001656, | |
| "loss": 1.8063, | |
| "step": 3809 | |
| }, | |
| { | |
| "epoch": 294.0, | |
| "learning_rate": 0.001648, | |
| "loss": 1.7992, | |
| "step": 3822 | |
| }, | |
| { | |
| "epoch": 295.0, | |
| "learning_rate": 0.00164, | |
| "loss": 1.7959, | |
| "step": 3835 | |
| }, | |
| { | |
| "epoch": 296.0, | |
| "learning_rate": 0.001632, | |
| "loss": 1.7256, | |
| "step": 3848 | |
| }, | |
| { | |
| "epoch": 297.0, | |
| "learning_rate": 0.0016240000000000002, | |
| "loss": 1.7673, | |
| "step": 3861 | |
| }, | |
| { | |
| "epoch": 298.0, | |
| "learning_rate": 0.001616, | |
| "loss": 1.8299, | |
| "step": 3874 | |
| }, | |
| { | |
| "epoch": 299.0, | |
| "learning_rate": 0.001608, | |
| "loss": 1.8147, | |
| "step": 3887 | |
| }, | |
| { | |
| "epoch": 300.0, | |
| "learning_rate": 0.0016, | |
| "loss": 1.7495, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 301.0, | |
| "learning_rate": 0.001592, | |
| "loss": 1.8001, | |
| "step": 3913 | |
| }, | |
| { | |
| "epoch": 302.0, | |
| "learning_rate": 0.0015840000000000001, | |
| "loss": 1.7707, | |
| "step": 3926 | |
| }, | |
| { | |
| "epoch": 303.0, | |
| "learning_rate": 0.0015760000000000001, | |
| "loss": 1.7283, | |
| "step": 3939 | |
| }, | |
| { | |
| "epoch": 304.0, | |
| "learning_rate": 0.0015680000000000002, | |
| "loss": 1.7133, | |
| "step": 3952 | |
| }, | |
| { | |
| "epoch": 305.0, | |
| "learning_rate": 0.0015600000000000002, | |
| "loss": 1.71, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 306.0, | |
| "learning_rate": 0.001552, | |
| "loss": 1.6685, | |
| "step": 3978 | |
| }, | |
| { | |
| "epoch": 307.0, | |
| "learning_rate": 0.001544, | |
| "loss": 1.6526, | |
| "step": 3991 | |
| }, | |
| { | |
| "epoch": 308.0, | |
| "learning_rate": 0.001536, | |
| "loss": 1.6433, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 309.0, | |
| "learning_rate": 0.001528, | |
| "loss": 1.6823, | |
| "step": 4017 | |
| }, | |
| { | |
| "epoch": 310.0, | |
| "learning_rate": 0.00152, | |
| "loss": 1.6843, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 311.0, | |
| "learning_rate": 0.001512, | |
| "loss": 1.7029, | |
| "step": 4043 | |
| }, | |
| { | |
| "epoch": 312.0, | |
| "learning_rate": 0.0015040000000000001, | |
| "loss": 1.6362, | |
| "step": 4056 | |
| }, | |
| { | |
| "epoch": 313.0, | |
| "learning_rate": 0.001496, | |
| "loss": 1.6648, | |
| "step": 4069 | |
| }, | |
| { | |
| "epoch": 314.0, | |
| "learning_rate": 0.001488, | |
| "loss": 1.7202, | |
| "step": 4082 | |
| }, | |
| { | |
| "epoch": 315.0, | |
| "learning_rate": 0.00148, | |
| "loss": 1.677, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 316.0, | |
| "learning_rate": 0.001472, | |
| "loss": 1.6187, | |
| "step": 4108 | |
| }, | |
| { | |
| "epoch": 317.0, | |
| "learning_rate": 0.001464, | |
| "loss": 1.6398, | |
| "step": 4121 | |
| }, | |
| { | |
| "epoch": 318.0, | |
| "learning_rate": 0.001456, | |
| "loss": 1.6371, | |
| "step": 4134 | |
| }, | |
| { | |
| "epoch": 319.0, | |
| "learning_rate": 0.001448, | |
| "loss": 1.6081, | |
| "step": 4147 | |
| }, | |
| { | |
| "epoch": 320.0, | |
| "learning_rate": 0.0014399999999999999, | |
| "loss": 1.5936, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 321.0, | |
| "learning_rate": 0.001432, | |
| "loss": 1.6336, | |
| "step": 4173 | |
| }, | |
| { | |
| "epoch": 322.0, | |
| "learning_rate": 0.001424, | |
| "loss": 1.6022, | |
| "step": 4186 | |
| }, | |
| { | |
| "epoch": 323.0, | |
| "learning_rate": 0.001416, | |
| "loss": 1.6336, | |
| "step": 4199 | |
| }, | |
| { | |
| "epoch": 324.0, | |
| "learning_rate": 0.001408, | |
| "loss": 1.5898, | |
| "step": 4212 | |
| }, | |
| { | |
| "epoch": 325.0, | |
| "learning_rate": 0.0014, | |
| "loss": 1.5528, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 326.0, | |
| "learning_rate": 0.001392, | |
| "loss": 1.5734, | |
| "step": 4238 | |
| }, | |
| { | |
| "epoch": 327.0, | |
| "learning_rate": 0.001384, | |
| "loss": 1.618, | |
| "step": 4251 | |
| }, | |
| { | |
| "epoch": 328.0, | |
| "learning_rate": 0.0013759999999999998, | |
| "loss": 1.6529, | |
| "step": 4264 | |
| }, | |
| { | |
| "epoch": 329.0, | |
| "learning_rate": 0.001368, | |
| "loss": 1.5824, | |
| "step": 4277 | |
| }, | |
| { | |
| "epoch": 330.0, | |
| "learning_rate": 0.00136, | |
| "loss": 1.609, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 331.0, | |
| "learning_rate": 0.0013520000000000001, | |
| "loss": 1.5796, | |
| "step": 4303 | |
| }, | |
| { | |
| "epoch": 332.0, | |
| "learning_rate": 0.0013440000000000001, | |
| "loss": 1.5924, | |
| "step": 4316 | |
| }, | |
| { | |
| "epoch": 333.0, | |
| "learning_rate": 0.0013360000000000002, | |
| "loss": 1.5841, | |
| "step": 4329 | |
| }, | |
| { | |
| "epoch": 334.0, | |
| "learning_rate": 0.0013280000000000002, | |
| "loss": 1.5487, | |
| "step": 4342 | |
| }, | |
| { | |
| "epoch": 335.0, | |
| "learning_rate": 0.00132, | |
| "loss": 1.4625, | |
| "step": 4355 | |
| }, | |
| { | |
| "epoch": 336.0, | |
| "learning_rate": 0.001312, | |
| "loss": 1.5241, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 337.0, | |
| "learning_rate": 0.001304, | |
| "loss": 1.4823, | |
| "step": 4381 | |
| }, | |
| { | |
| "epoch": 338.0, | |
| "learning_rate": 0.001296, | |
| "loss": 1.5027, | |
| "step": 4394 | |
| }, | |
| { | |
| "epoch": 339.0, | |
| "learning_rate": 0.001288, | |
| "loss": 1.5211, | |
| "step": 4407 | |
| }, | |
| { | |
| "epoch": 340.0, | |
| "learning_rate": 0.00128, | |
| "loss": 1.4912, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 341.0, | |
| "learning_rate": 0.0012720000000000001, | |
| "loss": 1.4792, | |
| "step": 4433 | |
| }, | |
| { | |
| "epoch": 342.0, | |
| "learning_rate": 0.001264, | |
| "loss": 1.4932, | |
| "step": 4446 | |
| }, | |
| { | |
| "epoch": 343.0, | |
| "learning_rate": 0.001256, | |
| "loss": 1.4861, | |
| "step": 4459 | |
| }, | |
| { | |
| "epoch": 344.0, | |
| "learning_rate": 0.001248, | |
| "loss": 1.5171, | |
| "step": 4472 | |
| }, | |
| { | |
| "epoch": 345.0, | |
| "learning_rate": 0.00124, | |
| "loss": 1.494, | |
| "step": 4485 | |
| }, | |
| { | |
| "epoch": 346.0, | |
| "learning_rate": 0.001232, | |
| "loss": 1.4992, | |
| "step": 4498 | |
| }, | |
| { | |
| "epoch": 347.0, | |
| "learning_rate": 0.001224, | |
| "loss": 1.5033, | |
| "step": 4511 | |
| }, | |
| { | |
| "epoch": 348.0, | |
| "learning_rate": 0.001216, | |
| "loss": 1.5039, | |
| "step": 4524 | |
| }, | |
| { | |
| "epoch": 349.0, | |
| "learning_rate": 0.001208, | |
| "loss": 1.5341, | |
| "step": 4537 | |
| }, | |
| { | |
| "epoch": 350.0, | |
| "learning_rate": 0.0012, | |
| "loss": 1.5049, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 351.0, | |
| "learning_rate": 0.001192, | |
| "loss": 1.5104, | |
| "step": 4563 | |
| }, | |
| { | |
| "epoch": 352.0, | |
| "learning_rate": 0.001184, | |
| "loss": 1.4569, | |
| "step": 4576 | |
| }, | |
| { | |
| "epoch": 353.0, | |
| "learning_rate": 0.001176, | |
| "loss": 1.3996, | |
| "step": 4589 | |
| }, | |
| { | |
| "epoch": 354.0, | |
| "learning_rate": 0.001168, | |
| "loss": 1.4337, | |
| "step": 4602 | |
| }, | |
| { | |
| "epoch": 355.0, | |
| "learning_rate": 0.00116, | |
| "loss": 1.4572, | |
| "step": 4615 | |
| }, | |
| { | |
| "epoch": 356.0, | |
| "learning_rate": 0.001152, | |
| "loss": 1.4668, | |
| "step": 4628 | |
| }, | |
| { | |
| "epoch": 357.0, | |
| "learning_rate": 0.0011439999999999998, | |
| "loss": 1.4298, | |
| "step": 4641 | |
| }, | |
| { | |
| "epoch": 358.0, | |
| "learning_rate": 0.0011359999999999999, | |
| "loss": 1.4187, | |
| "step": 4654 | |
| }, | |
| { | |
| "epoch": 359.0, | |
| "learning_rate": 0.0011279999999999999, | |
| "loss": 1.4026, | |
| "step": 4667 | |
| }, | |
| { | |
| "epoch": 360.0, | |
| "learning_rate": 0.0011200000000000001, | |
| "loss": 1.4461, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 361.0, | |
| "learning_rate": 0.0011120000000000001, | |
| "loss": 1.4497, | |
| "step": 4693 | |
| }, | |
| { | |
| "epoch": 362.0, | |
| "learning_rate": 0.0011040000000000002, | |
| "loss": 1.3667, | |
| "step": 4706 | |
| }, | |
| { | |
| "epoch": 363.0, | |
| "learning_rate": 0.0010960000000000002, | |
| "loss": 1.4237, | |
| "step": 4719 | |
| }, | |
| { | |
| "epoch": 364.0, | |
| "learning_rate": 0.0010880000000000002, | |
| "loss": 1.485, | |
| "step": 4732 | |
| }, | |
| { | |
| "epoch": 365.0, | |
| "learning_rate": 0.00108, | |
| "loss": 1.4271, | |
| "step": 4745 | |
| }, | |
| { | |
| "epoch": 366.0, | |
| "learning_rate": 0.001072, | |
| "loss": 1.4046, | |
| "step": 4758 | |
| }, | |
| { | |
| "epoch": 367.0, | |
| "learning_rate": 0.001064, | |
| "loss": 1.3771, | |
| "step": 4771 | |
| }, | |
| { | |
| "epoch": 368.0, | |
| "learning_rate": 0.001056, | |
| "loss": 1.4054, | |
| "step": 4784 | |
| }, | |
| { | |
| "epoch": 369.0, | |
| "learning_rate": 0.001048, | |
| "loss": 1.3886, | |
| "step": 4797 | |
| }, | |
| { | |
| "epoch": 370.0, | |
| "learning_rate": 0.0010400000000000001, | |
| "loss": 1.3583, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 371.0, | |
| "learning_rate": 0.0010320000000000001, | |
| "loss": 1.3606, | |
| "step": 4823 | |
| }, | |
| { | |
| "epoch": 372.0, | |
| "learning_rate": 0.001024, | |
| "loss": 1.3619, | |
| "step": 4836 | |
| }, | |
| { | |
| "epoch": 373.0, | |
| "learning_rate": 0.001016, | |
| "loss": 1.3723, | |
| "step": 4849 | |
| }, | |
| { | |
| "epoch": 374.0, | |
| "learning_rate": 0.001008, | |
| "loss": 1.3604, | |
| "step": 4862 | |
| }, | |
| { | |
| "epoch": 375.0, | |
| "learning_rate": 0.001, | |
| "loss": 1.3745, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 376.0, | |
| "learning_rate": 0.000992, | |
| "loss": 1.393, | |
| "step": 4888 | |
| }, | |
| { | |
| "epoch": 377.0, | |
| "learning_rate": 0.000984, | |
| "loss": 1.3846, | |
| "step": 4901 | |
| }, | |
| { | |
| "epoch": 378.0, | |
| "learning_rate": 0.000976, | |
| "loss": 1.4033, | |
| "step": 4914 | |
| }, | |
| { | |
| "epoch": 379.0, | |
| "learning_rate": 0.000968, | |
| "loss": 1.3204, | |
| "step": 4927 | |
| }, | |
| { | |
| "epoch": 380.0, | |
| "learning_rate": 0.00096, | |
| "loss": 1.3257, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 381.0, | |
| "learning_rate": 0.0009519999999999999, | |
| "loss": 1.3274, | |
| "step": 4953 | |
| }, | |
| { | |
| "epoch": 382.0, | |
| "learning_rate": 0.000944, | |
| "loss": 1.3177, | |
| "step": 4966 | |
| }, | |
| { | |
| "epoch": 383.0, | |
| "learning_rate": 0.0009360000000000001, | |
| "loss": 1.3204, | |
| "step": 4979 | |
| }, | |
| { | |
| "epoch": 384.0, | |
| "learning_rate": 0.0009280000000000001, | |
| "loss": 1.3349, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 385.0, | |
| "learning_rate": 0.00092, | |
| "loss": 1.3149, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 386.0, | |
| "learning_rate": 0.000912, | |
| "loss": 1.2994, | |
| "step": 5018 | |
| }, | |
| { | |
| "epoch": 387.0, | |
| "learning_rate": 0.0009040000000000001, | |
| "loss": 1.3295, | |
| "step": 5031 | |
| }, | |
| { | |
| "epoch": 388.0, | |
| "learning_rate": 0.000896, | |
| "loss": 1.2975, | |
| "step": 5044 | |
| }, | |
| { | |
| "epoch": 389.0, | |
| "learning_rate": 0.000888, | |
| "loss": 1.3118, | |
| "step": 5057 | |
| }, | |
| { | |
| "epoch": 390.0, | |
| "learning_rate": 0.00088, | |
| "loss": 1.2712, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 391.0, | |
| "learning_rate": 0.000872, | |
| "loss": 1.3184, | |
| "step": 5083 | |
| }, | |
| { | |
| "epoch": 392.0, | |
| "learning_rate": 0.000864, | |
| "loss": 1.2687, | |
| "step": 5096 | |
| }, | |
| { | |
| "epoch": 393.0, | |
| "learning_rate": 0.000856, | |
| "loss": 1.2826, | |
| "step": 5109 | |
| }, | |
| { | |
| "epoch": 394.0, | |
| "learning_rate": 0.000848, | |
| "loss": 1.2766, | |
| "step": 5122 | |
| }, | |
| { | |
| "epoch": 395.0, | |
| "learning_rate": 0.00084, | |
| "loss": 1.2935, | |
| "step": 5135 | |
| }, | |
| { | |
| "epoch": 396.0, | |
| "learning_rate": 0.000832, | |
| "loss": 1.288, | |
| "step": 5148 | |
| }, | |
| { | |
| "epoch": 397.0, | |
| "learning_rate": 0.000824, | |
| "loss": 1.2617, | |
| "step": 5161 | |
| }, | |
| { | |
| "epoch": 398.0, | |
| "learning_rate": 0.000816, | |
| "loss": 1.2675, | |
| "step": 5174 | |
| }, | |
| { | |
| "epoch": 399.0, | |
| "learning_rate": 0.000808, | |
| "loss": 1.2895, | |
| "step": 5187 | |
| }, | |
| { | |
| "epoch": 400.0, | |
| "learning_rate": 0.0008, | |
| "loss": 1.2721, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 401.0, | |
| "learning_rate": 0.0007920000000000001, | |
| "loss": 1.2897, | |
| "step": 5213 | |
| }, | |
| { | |
| "epoch": 402.0, | |
| "learning_rate": 0.0007840000000000001, | |
| "loss": 1.2608, | |
| "step": 5226 | |
| }, | |
| { | |
| "epoch": 403.0, | |
| "learning_rate": 0.000776, | |
| "loss": 1.271, | |
| "step": 5239 | |
| }, | |
| { | |
| "epoch": 404.0, | |
| "learning_rate": 0.000768, | |
| "loss": 1.2581, | |
| "step": 5252 | |
| }, | |
| { | |
| "epoch": 405.0, | |
| "learning_rate": 0.00076, | |
| "loss": 1.2497, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 406.0, | |
| "learning_rate": 0.0007520000000000001, | |
| "loss": 1.2846, | |
| "step": 5278 | |
| }, | |
| { | |
| "epoch": 407.0, | |
| "learning_rate": 0.000744, | |
| "loss": 1.2718, | |
| "step": 5291 | |
| }, | |
| { | |
| "epoch": 408.0, | |
| "learning_rate": 0.000736, | |
| "loss": 1.2733, | |
| "step": 5304 | |
| }, | |
| { | |
| "epoch": 409.0, | |
| "learning_rate": 0.000728, | |
| "loss": 1.2918, | |
| "step": 5317 | |
| }, | |
| { | |
| "epoch": 410.0, | |
| "learning_rate": 0.0007199999999999999, | |
| "loss": 1.2659, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 411.0, | |
| "learning_rate": 0.000712, | |
| "loss": 1.2946, | |
| "step": 5343 | |
| }, | |
| { | |
| "epoch": 412.0, | |
| "learning_rate": 0.000704, | |
| "loss": 1.2425, | |
| "step": 5356 | |
| }, | |
| { | |
| "epoch": 413.0, | |
| "learning_rate": 0.000696, | |
| "loss": 1.2293, | |
| "step": 5369 | |
| }, | |
| { | |
| "epoch": 414.0, | |
| "learning_rate": 0.0006879999999999999, | |
| "loss": 1.2847, | |
| "step": 5382 | |
| }, | |
| { | |
| "epoch": 415.0, | |
| "learning_rate": 0.00068, | |
| "loss": 1.2318, | |
| "step": 5395 | |
| }, | |
| { | |
| "epoch": 416.0, | |
| "learning_rate": 0.0006720000000000001, | |
| "loss": 1.237, | |
| "step": 5408 | |
| }, | |
| { | |
| "epoch": 417.0, | |
| "learning_rate": 0.0006640000000000001, | |
| "loss": 1.1875, | |
| "step": 5421 | |
| }, | |
| { | |
| "epoch": 418.0, | |
| "learning_rate": 0.000656, | |
| "loss": 1.2204, | |
| "step": 5434 | |
| }, | |
| { | |
| "epoch": 419.0, | |
| "learning_rate": 0.000648, | |
| "loss": 1.1848, | |
| "step": 5447 | |
| }, | |
| { | |
| "epoch": 420.0, | |
| "learning_rate": 0.00064, | |
| "loss": 1.2146, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 421.0, | |
| "learning_rate": 0.000632, | |
| "loss": 1.1621, | |
| "step": 5473 | |
| }, | |
| { | |
| "epoch": 422.0, | |
| "learning_rate": 0.000624, | |
| "loss": 1.1883, | |
| "step": 5486 | |
| }, | |
| { | |
| "epoch": 423.0, | |
| "learning_rate": 0.000616, | |
| "loss": 1.183, | |
| "step": 5499 | |
| }, | |
| { | |
| "epoch": 424.0, | |
| "learning_rate": 0.000608, | |
| "loss": 1.1649, | |
| "step": 5512 | |
| }, | |
| { | |
| "epoch": 425.0, | |
| "learning_rate": 0.0006, | |
| "loss": 1.1824, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 426.0, | |
| "learning_rate": 0.000592, | |
| "loss": 1.2073, | |
| "step": 5538 | |
| }, | |
| { | |
| "epoch": 427.0, | |
| "learning_rate": 0.000584, | |
| "loss": 1.147, | |
| "step": 5551 | |
| }, | |
| { | |
| "epoch": 428.0, | |
| "learning_rate": 0.000576, | |
| "loss": 1.1798, | |
| "step": 5564 | |
| }, | |
| { | |
| "epoch": 429.0, | |
| "learning_rate": 0.0005679999999999999, | |
| "loss": 1.14, | |
| "step": 5577 | |
| }, | |
| { | |
| "epoch": 430.0, | |
| "learning_rate": 0.0005600000000000001, | |
| "loss": 1.1585, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 431.0, | |
| "learning_rate": 0.0005520000000000001, | |
| "loss": 1.1687, | |
| "step": 5603 | |
| }, | |
| { | |
| "epoch": 432.0, | |
| "learning_rate": 0.0005440000000000001, | |
| "loss": 1.1285, | |
| "step": 5616 | |
| }, | |
| { | |
| "epoch": 433.0, | |
| "learning_rate": 0.000536, | |
| "loss": 1.1472, | |
| "step": 5629 | |
| }, | |
| { | |
| "epoch": 434.0, | |
| "learning_rate": 0.000528, | |
| "loss": 1.1894, | |
| "step": 5642 | |
| }, | |
| { | |
| "epoch": 435.0, | |
| "learning_rate": 0.0005200000000000001, | |
| "loss": 1.1606, | |
| "step": 5655 | |
| }, | |
| { | |
| "epoch": 436.0, | |
| "learning_rate": 0.000512, | |
| "loss": 1.1294, | |
| "step": 5668 | |
| }, | |
| { | |
| "epoch": 437.0, | |
| "learning_rate": 0.000504, | |
| "loss": 1.1597, | |
| "step": 5681 | |
| }, | |
| { | |
| "epoch": 438.0, | |
| "learning_rate": 0.000496, | |
| "loss": 1.1772, | |
| "step": 5694 | |
| }, | |
| { | |
| "epoch": 439.0, | |
| "learning_rate": 0.000488, | |
| "loss": 1.2044, | |
| "step": 5707 | |
| }, | |
| { | |
| "epoch": 440.0, | |
| "learning_rate": 0.00048, | |
| "loss": 1.1543, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 441.0, | |
| "learning_rate": 0.000472, | |
| "loss": 1.1868, | |
| "step": 5733 | |
| }, | |
| { | |
| "epoch": 442.0, | |
| "learning_rate": 0.00046400000000000006, | |
| "loss": 1.1821, | |
| "step": 5746 | |
| }, | |
| { | |
| "epoch": 443.0, | |
| "learning_rate": 0.000456, | |
| "loss": 1.0897, | |
| "step": 5759 | |
| }, | |
| { | |
| "epoch": 444.0, | |
| "learning_rate": 0.000448, | |
| "loss": 1.0977, | |
| "step": 5772 | |
| }, | |
| { | |
| "epoch": 445.0, | |
| "learning_rate": 0.00044, | |
| "loss": 1.1695, | |
| "step": 5785 | |
| }, | |
| { | |
| "epoch": 446.0, | |
| "learning_rate": 0.000432, | |
| "loss": 1.1332, | |
| "step": 5798 | |
| }, | |
| { | |
| "epoch": 447.0, | |
| "learning_rate": 0.000424, | |
| "loss": 1.1321, | |
| "step": 5811 | |
| }, | |
| { | |
| "epoch": 448.0, | |
| "learning_rate": 0.000416, | |
| "loss": 1.1315, | |
| "step": 5824 | |
| }, | |
| { | |
| "epoch": 449.0, | |
| "learning_rate": 0.000408, | |
| "loss": 1.1178, | |
| "step": 5837 | |
| }, | |
| { | |
| "epoch": 450.0, | |
| "learning_rate": 0.0004, | |
| "loss": 1.1163, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 451.0, | |
| "learning_rate": 0.00039200000000000004, | |
| "loss": 1.1414, | |
| "step": 5863 | |
| }, | |
| { | |
| "epoch": 452.0, | |
| "learning_rate": 0.000384, | |
| "loss": 1.1274, | |
| "step": 5876 | |
| }, | |
| { | |
| "epoch": 453.0, | |
| "learning_rate": 0.00037600000000000003, | |
| "loss": 1.1067, | |
| "step": 5889 | |
| }, | |
| { | |
| "epoch": 454.0, | |
| "learning_rate": 0.000368, | |
| "loss": 1.0889, | |
| "step": 5902 | |
| }, | |
| { | |
| "epoch": 455.0, | |
| "learning_rate": 0.00035999999999999997, | |
| "loss": 1.0844, | |
| "step": 5915 | |
| }, | |
| { | |
| "epoch": 456.0, | |
| "learning_rate": 0.000352, | |
| "loss": 1.1341, | |
| "step": 5928 | |
| }, | |
| { | |
| "epoch": 457.0, | |
| "learning_rate": 0.00034399999999999996, | |
| "loss": 1.0644, | |
| "step": 5941 | |
| }, | |
| { | |
| "epoch": 458.0, | |
| "learning_rate": 0.00033600000000000004, | |
| "loss": 1.0991, | |
| "step": 5954 | |
| }, | |
| { | |
| "epoch": 459.0, | |
| "learning_rate": 0.000328, | |
| "loss": 1.1176, | |
| "step": 5967 | |
| }, | |
| { | |
| "epoch": 460.0, | |
| "learning_rate": 0.00032, | |
| "loss": 1.0997, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 461.0, | |
| "learning_rate": 0.000312, | |
| "loss": 1.0997, | |
| "step": 5993 | |
| }, | |
| { | |
| "epoch": 462.0, | |
| "learning_rate": 0.000304, | |
| "loss": 1.0763, | |
| "step": 6006 | |
| }, | |
| { | |
| "epoch": 463.0, | |
| "learning_rate": 0.000296, | |
| "loss": 1.1102, | |
| "step": 6019 | |
| }, | |
| { | |
| "epoch": 464.0, | |
| "learning_rate": 0.000288, | |
| "loss": 1.1236, | |
| "step": 6032 | |
| }, | |
| { | |
| "epoch": 465.0, | |
| "learning_rate": 0.00028000000000000003, | |
| "loss": 1.0941, | |
| "step": 6045 | |
| }, | |
| { | |
| "epoch": 466.0, | |
| "learning_rate": 0.00027200000000000005, | |
| "loss": 1.0976, | |
| "step": 6058 | |
| }, | |
| { | |
| "epoch": 467.0, | |
| "learning_rate": 0.000264, | |
| "loss": 1.0688, | |
| "step": 6071 | |
| }, | |
| { | |
| "epoch": 468.0, | |
| "learning_rate": 0.000256, | |
| "loss": 1.0591, | |
| "step": 6084 | |
| }, | |
| { | |
| "epoch": 469.0, | |
| "learning_rate": 0.000248, | |
| "loss": 1.0695, | |
| "step": 6097 | |
| }, | |
| { | |
| "epoch": 470.0, | |
| "learning_rate": 0.00024, | |
| "loss": 1.071, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 471.0, | |
| "learning_rate": 0.00023200000000000003, | |
| "loss": 1.0709, | |
| "step": 6123 | |
| }, | |
| { | |
| "epoch": 472.0, | |
| "learning_rate": 0.000224, | |
| "loss": 1.0767, | |
| "step": 6136 | |
| }, | |
| { | |
| "epoch": 473.0, | |
| "learning_rate": 0.000216, | |
| "loss": 1.0741, | |
| "step": 6149 | |
| }, | |
| { | |
| "epoch": 474.0, | |
| "learning_rate": 0.000208, | |
| "loss": 1.0644, | |
| "step": 6162 | |
| }, | |
| { | |
| "epoch": 475.0, | |
| "learning_rate": 0.0002, | |
| "loss": 1.0699, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 476.0, | |
| "learning_rate": 0.000192, | |
| "loss": 1.0727, | |
| "step": 6188 | |
| }, | |
| { | |
| "epoch": 477.0, | |
| "learning_rate": 0.000184, | |
| "loss": 1.06, | |
| "step": 6201 | |
| }, | |
| { | |
| "epoch": 478.0, | |
| "learning_rate": 0.000176, | |
| "loss": 1.0568, | |
| "step": 6214 | |
| }, | |
| { | |
| "epoch": 479.0, | |
| "learning_rate": 0.00016800000000000002, | |
| "loss": 1.0616, | |
| "step": 6227 | |
| }, | |
| { | |
| "epoch": 480.0, | |
| "learning_rate": 0.00016, | |
| "loss": 1.0491, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 481.0, | |
| "learning_rate": 0.000152, | |
| "loss": 1.0502, | |
| "step": 6253 | |
| }, | |
| { | |
| "epoch": 482.0, | |
| "learning_rate": 0.000144, | |
| "loss": 1.0742, | |
| "step": 6266 | |
| }, | |
| { | |
| "epoch": 483.0, | |
| "learning_rate": 0.00013600000000000003, | |
| "loss": 1.0582, | |
| "step": 6279 | |
| }, | |
| { | |
| "epoch": 484.0, | |
| "learning_rate": 0.000128, | |
| "loss": 1.0803, | |
| "step": 6292 | |
| }, | |
| { | |
| "epoch": 485.0, | |
| "learning_rate": 0.00012, | |
| "loss": 1.067, | |
| "step": 6305 | |
| }, | |
| { | |
| "epoch": 486.0, | |
| "learning_rate": 0.000112, | |
| "loss": 1.0397, | |
| "step": 6318 | |
| }, | |
| { | |
| "epoch": 487.0, | |
| "learning_rate": 0.000104, | |
| "loss": 1.0489, | |
| "step": 6331 | |
| }, | |
| { | |
| "epoch": 488.0, | |
| "learning_rate": 9.6e-05, | |
| "loss": 1.0378, | |
| "step": 6344 | |
| }, | |
| { | |
| "epoch": 489.0, | |
| "learning_rate": 8.8e-05, | |
| "loss": 1.0418, | |
| "step": 6357 | |
| }, | |
| { | |
| "epoch": 490.0, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0344, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 491.0, | |
| "learning_rate": 7.2e-05, | |
| "loss": 1.0768, | |
| "step": 6383 | |
| }, | |
| { | |
| "epoch": 492.0, | |
| "learning_rate": 6.4e-05, | |
| "loss": 1.0296, | |
| "step": 6396 | |
| }, | |
| { | |
| "epoch": 493.0, | |
| "learning_rate": 5.6e-05, | |
| "loss": 1.0336, | |
| "step": 6409 | |
| }, | |
| { | |
| "epoch": 494.0, | |
| "learning_rate": 4.8e-05, | |
| "loss": 1.0568, | |
| "step": 6422 | |
| }, | |
| { | |
| "epoch": 495.0, | |
| "learning_rate": 4e-05, | |
| "loss": 1.0647, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 496.0, | |
| "learning_rate": 3.2e-05, | |
| "loss": 1.0448, | |
| "step": 6448 | |
| }, | |
| { | |
| "epoch": 497.0, | |
| "learning_rate": 2.4e-05, | |
| "loss": 1.0602, | |
| "step": 6461 | |
| }, | |
| { | |
| "epoch": 498.0, | |
| "learning_rate": 1.6e-05, | |
| "loss": 1.0615, | |
| "step": 6474 | |
| }, | |
| { | |
| "epoch": 499.0, | |
| "learning_rate": 8e-06, | |
| "loss": 1.0389, | |
| "step": 6487 | |
| }, | |
| { | |
| "epoch": 500.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.0629, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 500.0, | |
| "step": 6500, | |
| "total_flos": 284798065115136.0, | |
| "train_loss": 2.748476623535156, | |
| "train_runtime": 71445.8185, | |
| "train_samples_per_second": 0.7, | |
| "train_steps_per_second": 0.091 | |
| }, | |
| { | |
| "epoch": 500.0, | |
| "step": 6500, | |
| "total_flos": 284798065115136.0, | |
| "train_loss": 0.0, | |
| "train_runtime": 1.3574, | |
| "train_samples_per_second": 36834.222, | |
| "train_steps_per_second": 4788.449 | |
| }, | |
| { | |
| "epoch": 501.0, | |
| "learning_rate": 0.003996, | |
| "loss": 1.4863, | |
| "step": 6513 | |
| }, | |
| { | |
| "epoch": 502.0, | |
| "learning_rate": 0.003992, | |
| "loss": 1.6645, | |
| "step": 6526 | |
| }, | |
| { | |
| "epoch": 503.0, | |
| "learning_rate": 0.003988, | |
| "loss": 1.7012, | |
| "step": 6539 | |
| }, | |
| { | |
| "epoch": 504.0, | |
| "learning_rate": 0.003984, | |
| "loss": 1.6185, | |
| "step": 6552 | |
| }, | |
| { | |
| "epoch": 505.0, | |
| "learning_rate": 0.00398, | |
| "loss": 1.5629, | |
| "step": 6565 | |
| }, | |
| { | |
| "epoch": 506.0, | |
| "learning_rate": 0.003976, | |
| "loss": 1.5867, | |
| "step": 6578 | |
| }, | |
| { | |
| "epoch": 507.0, | |
| "learning_rate": 0.003972, | |
| "loss": 1.6144, | |
| "step": 6591 | |
| }, | |
| { | |
| "epoch": 508.0, | |
| "learning_rate": 0.003968, | |
| "loss": 1.7844, | |
| "step": 6604 | |
| }, | |
| { | |
| "epoch": 509.0, | |
| "learning_rate": 0.003964, | |
| "loss": 1.7508, | |
| "step": 6617 | |
| }, | |
| { | |
| "epoch": 510.0, | |
| "learning_rate": 0.00396, | |
| "loss": 1.7693, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 511.0, | |
| "learning_rate": 0.003956, | |
| "loss": 1.8884, | |
| "step": 6643 | |
| }, | |
| { | |
| "epoch": 512.0, | |
| "learning_rate": 0.003952, | |
| "loss": 1.8287, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 513.0, | |
| "learning_rate": 0.003948, | |
| "loss": 1.8228, | |
| "step": 6669 | |
| }, | |
| { | |
| "epoch": 514.0, | |
| "learning_rate": 0.0039440000000000005, | |
| "loss": 1.7632, | |
| "step": 6682 | |
| }, | |
| { | |
| "epoch": 515.0, | |
| "learning_rate": 0.00394, | |
| "loss": 1.7943, | |
| "step": 6695 | |
| }, | |
| { | |
| "epoch": 516.0, | |
| "learning_rate": 0.003936, | |
| "loss": 1.7451, | |
| "step": 6708 | |
| }, | |
| { | |
| "epoch": 517.0, | |
| "learning_rate": 0.003932, | |
| "loss": 1.8542, | |
| "step": 6721 | |
| }, | |
| { | |
| "epoch": 518.0, | |
| "learning_rate": 0.003928, | |
| "loss": 2.0283, | |
| "step": 6734 | |
| }, | |
| { | |
| "epoch": 519.0, | |
| "learning_rate": 0.003924, | |
| "loss": 2.0074, | |
| "step": 6747 | |
| }, | |
| { | |
| "epoch": 520.0, | |
| "learning_rate": 0.00392, | |
| "loss": 2.1644, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 521.0, | |
| "learning_rate": 0.003916, | |
| "loss": 1.9558, | |
| "step": 6773 | |
| }, | |
| { | |
| "epoch": 522.0, | |
| "learning_rate": 0.003912, | |
| "loss": 1.9104, | |
| "step": 6786 | |
| }, | |
| { | |
| "epoch": 523.0, | |
| "learning_rate": 0.003908, | |
| "loss": 1.9961, | |
| "step": 6799 | |
| }, | |
| { | |
| "epoch": 524.0, | |
| "learning_rate": 0.003904, | |
| "loss": 2.0827, | |
| "step": 6812 | |
| }, | |
| { | |
| "epoch": 525.0, | |
| "learning_rate": 0.0039, | |
| "loss": 2.0293, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 526.0, | |
| "learning_rate": 0.003896, | |
| "loss": 1.9904, | |
| "step": 6838 | |
| }, | |
| { | |
| "epoch": 527.0, | |
| "learning_rate": 0.003892, | |
| "loss": 1.9175, | |
| "step": 6851 | |
| }, | |
| { | |
| "epoch": 528.0, | |
| "learning_rate": 0.003888, | |
| "loss": 1.8658, | |
| "step": 6864 | |
| }, | |
| { | |
| "epoch": 529.0, | |
| "learning_rate": 0.003884, | |
| "loss": 1.8219, | |
| "step": 6877 | |
| }, | |
| { | |
| "epoch": 530.0, | |
| "learning_rate": 0.0038799999999999998, | |
| "loss": 1.884, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 531.0, | |
| "learning_rate": 0.003876, | |
| "loss": 1.9361, | |
| "step": 6903 | |
| }, | |
| { | |
| "epoch": 532.0, | |
| "learning_rate": 0.003872, | |
| "loss": 1.8961, | |
| "step": 6916 | |
| }, | |
| { | |
| "epoch": 533.0, | |
| "learning_rate": 0.003868, | |
| "loss": 1.9082, | |
| "step": 6929 | |
| }, | |
| { | |
| "epoch": 534.0, | |
| "learning_rate": 0.003864, | |
| "loss": 2.0034, | |
| "step": 6942 | |
| }, | |
| { | |
| "epoch": 535.0, | |
| "learning_rate": 0.00386, | |
| "loss": 2.0058, | |
| "step": 6955 | |
| }, | |
| { | |
| "epoch": 536.0, | |
| "learning_rate": 0.003856, | |
| "loss": 1.9934, | |
| "step": 6968 | |
| }, | |
| { | |
| "epoch": 537.0, | |
| "learning_rate": 0.003852, | |
| "loss": 1.9674, | |
| "step": 6981 | |
| }, | |
| { | |
| "epoch": 538.0, | |
| "learning_rate": 0.003848, | |
| "loss": 1.9737, | |
| "step": 6994 | |
| }, | |
| { | |
| "epoch": 539.0, | |
| "learning_rate": 0.0038439999999999998, | |
| "loss": 1.9184, | |
| "step": 7007 | |
| }, | |
| { | |
| "epoch": 540.0, | |
| "learning_rate": 0.00384, | |
| "loss": 1.9147, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 541.0, | |
| "learning_rate": 0.003836, | |
| "loss": 1.9792, | |
| "step": 7033 | |
| }, | |
| { | |
| "epoch": 542.0, | |
| "learning_rate": 0.003832, | |
| "loss": 1.9448, | |
| "step": 7046 | |
| }, | |
| { | |
| "epoch": 543.0, | |
| "learning_rate": 0.003828, | |
| "loss": 1.8897, | |
| "step": 7059 | |
| }, | |
| { | |
| "epoch": 544.0, | |
| "learning_rate": 0.0038239999999999997, | |
| "loss": 1.9048, | |
| "step": 7072 | |
| }, | |
| { | |
| "epoch": 545.0, | |
| "learning_rate": 0.00382, | |
| "loss": 1.9577, | |
| "step": 7085 | |
| }, | |
| { | |
| "epoch": 546.0, | |
| "learning_rate": 0.003816, | |
| "loss": 1.9996, | |
| "step": 7098 | |
| }, | |
| { | |
| "epoch": 547.0, | |
| "learning_rate": 0.003812, | |
| "loss": 1.9895, | |
| "step": 7111 | |
| }, | |
| { | |
| "epoch": 548.0, | |
| "learning_rate": 0.0038079999999999998, | |
| "loss": 2.0381, | |
| "step": 7124 | |
| }, | |
| { | |
| "epoch": 549.0, | |
| "learning_rate": 0.003804, | |
| "loss": 1.9362, | |
| "step": 7137 | |
| }, | |
| { | |
| "epoch": 550.0, | |
| "learning_rate": 0.0038, | |
| "loss": 1.9544, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 551.0, | |
| "learning_rate": 0.003796, | |
| "loss": 1.9497, | |
| "step": 7163 | |
| }, | |
| { | |
| "epoch": 552.0, | |
| "learning_rate": 0.003792, | |
| "loss": 1.9105, | |
| "step": 7176 | |
| }, | |
| { | |
| "epoch": 553.0, | |
| "learning_rate": 0.0037879999999999997, | |
| "loss": 1.896, | |
| "step": 7189 | |
| }, | |
| { | |
| "epoch": 554.0, | |
| "learning_rate": 0.003784, | |
| "loss": 2.0015, | |
| "step": 7202 | |
| }, | |
| { | |
| "epoch": 555.0, | |
| "learning_rate": 0.00378, | |
| "loss": 1.8753, | |
| "step": 7215 | |
| }, | |
| { | |
| "epoch": 556.0, | |
| "learning_rate": 0.003776, | |
| "loss": 1.9087, | |
| "step": 7228 | |
| }, | |
| { | |
| "epoch": 557.0, | |
| "learning_rate": 0.0037719999999999997, | |
| "loss": 1.959, | |
| "step": 7241 | |
| }, | |
| { | |
| "epoch": 558.0, | |
| "learning_rate": 0.003768, | |
| "loss": 2.0541, | |
| "step": 7254 | |
| }, | |
| { | |
| "epoch": 559.0, | |
| "learning_rate": 0.003764, | |
| "loss": 2.0614, | |
| "step": 7267 | |
| }, | |
| { | |
| "epoch": 560.0, | |
| "learning_rate": 0.00376, | |
| "loss": 2.0349, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 561.0, | |
| "learning_rate": 0.003756, | |
| "loss": 1.9517, | |
| "step": 7293 | |
| }, | |
| { | |
| "epoch": 562.0, | |
| "learning_rate": 0.0037519999999999997, | |
| "loss": 2.0094, | |
| "step": 7306 | |
| }, | |
| { | |
| "epoch": 563.0, | |
| "learning_rate": 0.0037480000000000005, | |
| "loss": 2.0459, | |
| "step": 7319 | |
| }, | |
| { | |
| "epoch": 564.0, | |
| "learning_rate": 0.0037440000000000004, | |
| "loss": 2.172, | |
| "step": 7332 | |
| }, | |
| { | |
| "epoch": 565.0, | |
| "learning_rate": 0.0037400000000000003, | |
| "loss": 2.0798, | |
| "step": 7345 | |
| }, | |
| { | |
| "epoch": 566.0, | |
| "learning_rate": 0.003736, | |
| "loss": 2.0222, | |
| "step": 7358 | |
| }, | |
| { | |
| "epoch": 567.0, | |
| "learning_rate": 0.003732, | |
| "loss": 2.0051, | |
| "step": 7371 | |
| }, | |
| { | |
| "epoch": 568.0, | |
| "learning_rate": 0.0037280000000000004, | |
| "loss": 2.2096, | |
| "step": 7384 | |
| }, | |
| { | |
| "epoch": 569.0, | |
| "learning_rate": 0.0037240000000000003, | |
| "loss": 2.2197, | |
| "step": 7397 | |
| }, | |
| { | |
| "epoch": 570.0, | |
| "learning_rate": 0.00372, | |
| "loss": 2.1259, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 571.0, | |
| "learning_rate": 0.003716, | |
| "loss": 2.1098, | |
| "step": 7423 | |
| }, | |
| { | |
| "epoch": 572.0, | |
| "learning_rate": 0.0037120000000000005, | |
| "loss": 2.0734, | |
| "step": 7436 | |
| }, | |
| { | |
| "epoch": 573.0, | |
| "learning_rate": 0.0037080000000000004, | |
| "loss": 2.0822, | |
| "step": 7449 | |
| }, | |
| { | |
| "epoch": 574.0, | |
| "learning_rate": 0.0037040000000000003, | |
| "loss": 2.0771, | |
| "step": 7462 | |
| }, | |
| { | |
| "epoch": 575.0, | |
| "learning_rate": 0.0037, | |
| "loss": 2.0217, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 576.0, | |
| "learning_rate": 0.003696, | |
| "loss": 1.9762, | |
| "step": 7488 | |
| }, | |
| { | |
| "epoch": 577.0, | |
| "learning_rate": 0.0036920000000000004, | |
| "loss": 1.9341, | |
| "step": 7501 | |
| }, | |
| { | |
| "epoch": 578.0, | |
| "learning_rate": 0.0036880000000000003, | |
| "loss": 1.9837, | |
| "step": 7514 | |
| }, | |
| { | |
| "epoch": 579.0, | |
| "learning_rate": 0.003684, | |
| "loss": 1.9337, | |
| "step": 7527 | |
| }, | |
| { | |
| "epoch": 580.0, | |
| "learning_rate": 0.00368, | |
| "loss": 1.8968, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 581.0, | |
| "learning_rate": 0.0036760000000000004, | |
| "loss": 1.8705, | |
| "step": 7553 | |
| }, | |
| { | |
| "epoch": 582.0, | |
| "learning_rate": 0.0036720000000000004, | |
| "loss": 1.8261, | |
| "step": 7566 | |
| }, | |
| { | |
| "epoch": 583.0, | |
| "learning_rate": 0.0036680000000000003, | |
| "loss": 1.9411, | |
| "step": 7579 | |
| }, | |
| { | |
| "epoch": 584.0, | |
| "learning_rate": 0.003664, | |
| "loss": 1.9961, | |
| "step": 7592 | |
| }, | |
| { | |
| "epoch": 585.0, | |
| "learning_rate": 0.00366, | |
| "loss": 1.8865, | |
| "step": 7605 | |
| }, | |
| { | |
| "epoch": 586.0, | |
| "learning_rate": 0.0036560000000000004, | |
| "loss": 1.829, | |
| "step": 7618 | |
| }, | |
| { | |
| "epoch": 587.0, | |
| "learning_rate": 0.0036520000000000003, | |
| "loss": 1.8424, | |
| "step": 7631 | |
| }, | |
| { | |
| "epoch": 588.0, | |
| "learning_rate": 0.003648, | |
| "loss": 1.8463, | |
| "step": 7644 | |
| }, | |
| { | |
| "epoch": 589.0, | |
| "learning_rate": 0.003644, | |
| "loss": 1.8452, | |
| "step": 7657 | |
| }, | |
| { | |
| "epoch": 590.0, | |
| "learning_rate": 0.00364, | |
| "loss": 1.7974, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 591.0, | |
| "learning_rate": 0.0036360000000000003, | |
| "loss": 1.7995, | |
| "step": 7683 | |
| }, | |
| { | |
| "epoch": 592.0, | |
| "learning_rate": 0.0036320000000000002, | |
| "loss": 1.7664, | |
| "step": 7696 | |
| }, | |
| { | |
| "epoch": 593.0, | |
| "learning_rate": 0.003628, | |
| "loss": 1.7451, | |
| "step": 7709 | |
| }, | |
| { | |
| "epoch": 594.0, | |
| "learning_rate": 0.003624, | |
| "loss": 1.7978, | |
| "step": 7722 | |
| }, | |
| { | |
| "epoch": 595.0, | |
| "learning_rate": 0.0036200000000000004, | |
| "loss": 1.9067, | |
| "step": 7735 | |
| }, | |
| { | |
| "epoch": 596.0, | |
| "learning_rate": 0.0036160000000000003, | |
| "loss": 1.8932, | |
| "step": 7748 | |
| }, | |
| { | |
| "epoch": 597.0, | |
| "learning_rate": 0.003612, | |
| "loss": 1.9407, | |
| "step": 7761 | |
| }, | |
| { | |
| "epoch": 598.0, | |
| "learning_rate": 0.003608, | |
| "loss": 1.8776, | |
| "step": 7774 | |
| }, | |
| { | |
| "epoch": 599.0, | |
| "learning_rate": 0.003604, | |
| "loss": 1.8223, | |
| "step": 7787 | |
| }, | |
| { | |
| "epoch": 600.0, | |
| "learning_rate": 0.0036000000000000003, | |
| "loss": 1.7761, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 601.0, | |
| "learning_rate": 0.0035960000000000002, | |
| "loss": 1.7768, | |
| "step": 7813 | |
| }, | |
| { | |
| "epoch": 602.0, | |
| "learning_rate": 0.003592, | |
| "loss": 1.8109, | |
| "step": 7826 | |
| }, | |
| { | |
| "epoch": 603.0, | |
| "learning_rate": 0.003588, | |
| "loss": 1.7787, | |
| "step": 7839 | |
| }, | |
| { | |
| "epoch": 604.0, | |
| "learning_rate": 0.003584, | |
| "loss": 1.9842, | |
| "step": 7852 | |
| }, | |
| { | |
| "epoch": 605.0, | |
| "learning_rate": 0.0035800000000000003, | |
| "loss": 1.9262, | |
| "step": 7865 | |
| }, | |
| { | |
| "epoch": 606.0, | |
| "learning_rate": 0.003576, | |
| "loss": 1.9124, | |
| "step": 7878 | |
| }, | |
| { | |
| "epoch": 607.0, | |
| "learning_rate": 0.003572, | |
| "loss": 1.8407, | |
| "step": 7891 | |
| }, | |
| { | |
| "epoch": 608.0, | |
| "learning_rate": 0.003568, | |
| "loss": 1.8722, | |
| "step": 7904 | |
| }, | |
| { | |
| "epoch": 609.0, | |
| "learning_rate": 0.0035640000000000003, | |
| "loss": 1.7409, | |
| "step": 7917 | |
| }, | |
| { | |
| "epoch": 610.0, | |
| "learning_rate": 0.0035600000000000002, | |
| "loss": 1.712, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 611.0, | |
| "learning_rate": 0.003556, | |
| "loss": 1.6115, | |
| "step": 7943 | |
| }, | |
| { | |
| "epoch": 612.0, | |
| "learning_rate": 0.003552, | |
| "loss": 1.6805, | |
| "step": 7956 | |
| }, | |
| { | |
| "epoch": 613.0, | |
| "learning_rate": 0.003548, | |
| "loss": 1.7829, | |
| "step": 7969 | |
| }, | |
| { | |
| "epoch": 614.0, | |
| "learning_rate": 0.0035440000000000003, | |
| "loss": 1.7498, | |
| "step": 7982 | |
| }, | |
| { | |
| "epoch": 615.0, | |
| "learning_rate": 0.00354, | |
| "loss": 1.7536, | |
| "step": 7995 | |
| }, | |
| { | |
| "epoch": 616.0, | |
| "learning_rate": 0.003536, | |
| "loss": 1.7015, | |
| "step": 8008 | |
| }, | |
| { | |
| "epoch": 617.0, | |
| "learning_rate": 0.003532, | |
| "loss": 1.6556, | |
| "step": 8021 | |
| }, | |
| { | |
| "epoch": 618.0, | |
| "learning_rate": 0.003528, | |
| "loss": 1.7314, | |
| "step": 8034 | |
| }, | |
| { | |
| "epoch": 619.0, | |
| "learning_rate": 0.0035240000000000002, | |
| "loss": 1.6996, | |
| "step": 8047 | |
| }, | |
| { | |
| "epoch": 620.0, | |
| "learning_rate": 0.00352, | |
| "loss": 1.6819, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 621.0, | |
| "learning_rate": 0.003516, | |
| "loss": 1.6994, | |
| "step": 8073 | |
| }, | |
| { | |
| "epoch": 622.0, | |
| "learning_rate": 0.003512, | |
| "loss": 1.6657, | |
| "step": 8086 | |
| }, | |
| { | |
| "epoch": 623.0, | |
| "learning_rate": 0.0035080000000000003, | |
| "loss": 1.6558, | |
| "step": 8099 | |
| }, | |
| { | |
| "epoch": 624.0, | |
| "learning_rate": 0.003504, | |
| "loss": 1.6822, | |
| "step": 8112 | |
| }, | |
| { | |
| "epoch": 625.0, | |
| "learning_rate": 0.0035, | |
| "loss": 1.7245, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 626.0, | |
| "learning_rate": 0.003496, | |
| "loss": 1.8045, | |
| "step": 8138 | |
| }, | |
| { | |
| "epoch": 627.0, | |
| "learning_rate": 0.003492, | |
| "loss": 1.7307, | |
| "step": 8151 | |
| }, | |
| { | |
| "epoch": 628.0, | |
| "learning_rate": 0.003488, | |
| "loss": 1.7469, | |
| "step": 8164 | |
| }, | |
| { | |
| "epoch": 629.0, | |
| "learning_rate": 0.003484, | |
| "loss": 1.7047, | |
| "step": 8177 | |
| }, | |
| { | |
| "epoch": 630.0, | |
| "learning_rate": 0.00348, | |
| "loss": 1.6359, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 631.0, | |
| "learning_rate": 0.003476, | |
| "loss": 1.7324, | |
| "step": 8203 | |
| }, | |
| { | |
| "epoch": 632.0, | |
| "learning_rate": 0.0034720000000000003, | |
| "loss": 1.6107, | |
| "step": 8216 | |
| }, | |
| { | |
| "epoch": 633.0, | |
| "learning_rate": 0.003468, | |
| "loss": 1.5336, | |
| "step": 8229 | |
| }, | |
| { | |
| "epoch": 634.0, | |
| "learning_rate": 0.003464, | |
| "loss": 1.5587, | |
| "step": 8242 | |
| }, | |
| { | |
| "epoch": 635.0, | |
| "learning_rate": 0.00346, | |
| "loss": 1.581, | |
| "step": 8255 | |
| }, | |
| { | |
| "epoch": 636.0, | |
| "learning_rate": 0.003456, | |
| "loss": 1.5281, | |
| "step": 8268 | |
| }, | |
| { | |
| "epoch": 637.0, | |
| "learning_rate": 0.003452, | |
| "loss": 1.5198, | |
| "step": 8281 | |
| }, | |
| { | |
| "epoch": 638.0, | |
| "learning_rate": 0.003448, | |
| "loss": 1.5671, | |
| "step": 8294 | |
| }, | |
| { | |
| "epoch": 639.0, | |
| "learning_rate": 0.003444, | |
| "loss": 1.5257, | |
| "step": 8307 | |
| }, | |
| { | |
| "epoch": 640.0, | |
| "learning_rate": 0.00344, | |
| "loss": 1.5525, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 641.0, | |
| "learning_rate": 0.003436, | |
| "loss": 1.5005, | |
| "step": 8333 | |
| }, | |
| { | |
| "epoch": 642.0, | |
| "learning_rate": 0.003432, | |
| "loss": 1.4971, | |
| "step": 8346 | |
| }, | |
| { | |
| "epoch": 643.0, | |
| "learning_rate": 0.003428, | |
| "loss": 1.4738, | |
| "step": 8359 | |
| }, | |
| { | |
| "epoch": 644.0, | |
| "learning_rate": 0.003424, | |
| "loss": 1.5397, | |
| "step": 8372 | |
| }, | |
| { | |
| "epoch": 645.0, | |
| "learning_rate": 0.00342, | |
| "loss": 1.5092, | |
| "step": 8385 | |
| }, | |
| { | |
| "epoch": 646.0, | |
| "learning_rate": 0.003416, | |
| "loss": 1.5638, | |
| "step": 8398 | |
| }, | |
| { | |
| "epoch": 647.0, | |
| "learning_rate": 0.003412, | |
| "loss": 1.4813, | |
| "step": 8411 | |
| }, | |
| { | |
| "epoch": 648.0, | |
| "learning_rate": 0.003408, | |
| "loss": 1.4827, | |
| "step": 8424 | |
| }, | |
| { | |
| "epoch": 649.0, | |
| "learning_rate": 0.003404, | |
| "loss": 1.5285, | |
| "step": 8437 | |
| }, | |
| { | |
| "epoch": 650.0, | |
| "learning_rate": 0.0034, | |
| "loss": 1.5059, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 651.0, | |
| "learning_rate": 0.003396, | |
| "loss": 1.5452, | |
| "step": 8463 | |
| }, | |
| { | |
| "epoch": 652.0, | |
| "learning_rate": 0.003392, | |
| "loss": 1.6823, | |
| "step": 8476 | |
| }, | |
| { | |
| "epoch": 653.0, | |
| "learning_rate": 0.003388, | |
| "loss": 1.6004, | |
| "step": 8489 | |
| }, | |
| { | |
| "epoch": 654.0, | |
| "learning_rate": 0.003384, | |
| "loss": 1.648, | |
| "step": 8502 | |
| }, | |
| { | |
| "epoch": 655.0, | |
| "learning_rate": 0.0033799999999999998, | |
| "loss": 1.6614, | |
| "step": 8515 | |
| }, | |
| { | |
| "epoch": 656.0, | |
| "learning_rate": 0.003376, | |
| "loss": 1.6969, | |
| "step": 8528 | |
| }, | |
| { | |
| "epoch": 657.0, | |
| "learning_rate": 0.003372, | |
| "loss": 1.5948, | |
| "step": 8541 | |
| }, | |
| { | |
| "epoch": 658.0, | |
| "learning_rate": 0.003368, | |
| "loss": 1.5743, | |
| "step": 8554 | |
| }, | |
| { | |
| "epoch": 659.0, | |
| "learning_rate": 0.003364, | |
| "loss": 1.5521, | |
| "step": 8567 | |
| }, | |
| { | |
| "epoch": 660.0, | |
| "learning_rate": 0.00336, | |
| "loss": 1.5639, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 661.0, | |
| "learning_rate": 0.003356, | |
| "loss": 1.5155, | |
| "step": 8593 | |
| }, | |
| { | |
| "epoch": 662.0, | |
| "learning_rate": 0.003352, | |
| "loss": 1.4494, | |
| "step": 8606 | |
| }, | |
| { | |
| "epoch": 663.0, | |
| "learning_rate": 0.003348, | |
| "loss": 1.5803, | |
| "step": 8619 | |
| }, | |
| { | |
| "epoch": 664.0, | |
| "learning_rate": 0.0033439999999999998, | |
| "loss": 1.5896, | |
| "step": 8632 | |
| }, | |
| { | |
| "epoch": 665.0, | |
| "learning_rate": 0.00334, | |
| "loss": 1.6208, | |
| "step": 8645 | |
| }, | |
| { | |
| "epoch": 666.0, | |
| "learning_rate": 0.003336, | |
| "loss": 1.6345, | |
| "step": 8658 | |
| }, | |
| { | |
| "epoch": 667.0, | |
| "learning_rate": 0.003332, | |
| "loss": 1.6087, | |
| "step": 8671 | |
| }, | |
| { | |
| "epoch": 668.0, | |
| "learning_rate": 0.003328, | |
| "loss": 1.5938, | |
| "step": 8684 | |
| }, | |
| { | |
| "epoch": 669.0, | |
| "learning_rate": 0.0033239999999999997, | |
| "loss": 1.5391, | |
| "step": 8697 | |
| }, | |
| { | |
| "epoch": 670.0, | |
| "learning_rate": 0.00332, | |
| "loss": 1.5938, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 671.0, | |
| "learning_rate": 0.003316, | |
| "loss": 1.6235, | |
| "step": 8723 | |
| }, | |
| { | |
| "epoch": 672.0, | |
| "learning_rate": 0.003312, | |
| "loss": 1.6882, | |
| "step": 8736 | |
| }, | |
| { | |
| "epoch": 673.0, | |
| "learning_rate": 0.0033079999999999997, | |
| "loss": 1.5822, | |
| "step": 8749 | |
| }, | |
| { | |
| "epoch": 674.0, | |
| "learning_rate": 0.003304, | |
| "loss": 1.622, | |
| "step": 8762 | |
| }, | |
| { | |
| "epoch": 675.0, | |
| "learning_rate": 0.0033, | |
| "loss": 1.587, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 676.0, | |
| "learning_rate": 0.003296, | |
| "loss": 1.5119, | |
| "step": 8788 | |
| }, | |
| { | |
| "epoch": 677.0, | |
| "learning_rate": 0.003292, | |
| "loss": 1.467, | |
| "step": 8801 | |
| }, | |
| { | |
| "epoch": 678.0, | |
| "learning_rate": 0.0032879999999999997, | |
| "loss": 1.438, | |
| "step": 8814 | |
| }, | |
| { | |
| "epoch": 679.0, | |
| "learning_rate": 0.003284, | |
| "loss": 1.4497, | |
| "step": 8827 | |
| }, | |
| { | |
| "epoch": 680.0, | |
| "learning_rate": 0.00328, | |
| "loss": 1.4349, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 681.0, | |
| "learning_rate": 0.003276, | |
| "loss": 1.378, | |
| "step": 8853 | |
| }, | |
| { | |
| "epoch": 682.0, | |
| "learning_rate": 0.0032719999999999997, | |
| "loss": 1.3847, | |
| "step": 8866 | |
| }, | |
| { | |
| "epoch": 683.0, | |
| "learning_rate": 0.003268, | |
| "loss": 1.4556, | |
| "step": 8879 | |
| }, | |
| { | |
| "epoch": 684.0, | |
| "learning_rate": 0.003264, | |
| "loss": 1.4435, | |
| "step": 8892 | |
| }, | |
| { | |
| "epoch": 685.0, | |
| "learning_rate": 0.00326, | |
| "loss": 1.4494, | |
| "step": 8905 | |
| }, | |
| { | |
| "epoch": 686.0, | |
| "learning_rate": 0.0032559999999999998, | |
| "loss": 1.4065, | |
| "step": 8918 | |
| }, | |
| { | |
| "epoch": 687.0, | |
| "learning_rate": 0.0032519999999999997, | |
| "loss": 1.4323, | |
| "step": 8931 | |
| }, | |
| { | |
| "epoch": 688.0, | |
| "learning_rate": 0.0032480000000000005, | |
| "loss": 1.4698, | |
| "step": 8944 | |
| }, | |
| { | |
| "epoch": 689.0, | |
| "learning_rate": 0.0032440000000000004, | |
| "loss": 1.4293, | |
| "step": 8957 | |
| }, | |
| { | |
| "epoch": 690.0, | |
| "learning_rate": 0.0032400000000000003, | |
| "loss": 1.5205, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 691.0, | |
| "learning_rate": 0.003236, | |
| "loss": 1.5039, | |
| "step": 8983 | |
| }, | |
| { | |
| "epoch": 692.0, | |
| "learning_rate": 0.003232, | |
| "loss": 1.4715, | |
| "step": 8996 | |
| }, | |
| { | |
| "epoch": 693.0, | |
| "learning_rate": 0.0032280000000000004, | |
| "loss": 1.4448, | |
| "step": 9009 | |
| }, | |
| { | |
| "epoch": 694.0, | |
| "learning_rate": 0.0032240000000000003, | |
| "loss": 1.5031, | |
| "step": 9022 | |
| }, | |
| { | |
| "epoch": 695.0, | |
| "learning_rate": 0.00322, | |
| "loss": 1.5261, | |
| "step": 9035 | |
| }, | |
| { | |
| "epoch": 696.0, | |
| "learning_rate": 0.003216, | |
| "loss": 1.4389, | |
| "step": 9048 | |
| }, | |
| { | |
| "epoch": 697.0, | |
| "learning_rate": 0.0032120000000000004, | |
| "loss": 1.4792, | |
| "step": 9061 | |
| }, | |
| { | |
| "epoch": 698.0, | |
| "learning_rate": 0.0032080000000000003, | |
| "loss": 1.4317, | |
| "step": 9074 | |
| }, | |
| { | |
| "epoch": 699.0, | |
| "learning_rate": 0.0032040000000000003, | |
| "loss": 1.5776, | |
| "step": 9087 | |
| }, | |
| { | |
| "epoch": 700.0, | |
| "learning_rate": 0.0032, | |
| "loss": 1.5326, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 701.0, | |
| "learning_rate": 0.003196, | |
| "loss": 1.5345, | |
| "step": 9113 | |
| }, | |
| { | |
| "epoch": 702.0, | |
| "learning_rate": 0.0031920000000000004, | |
| "loss": 1.5269, | |
| "step": 9126 | |
| }, | |
| { | |
| "epoch": 703.0, | |
| "learning_rate": 0.0031880000000000003, | |
| "loss": 1.4819, | |
| "step": 9139 | |
| }, | |
| { | |
| "epoch": 704.0, | |
| "learning_rate": 0.003184, | |
| "loss": 1.5326, | |
| "step": 9152 | |
| }, | |
| { | |
| "epoch": 705.0, | |
| "learning_rate": 0.00318, | |
| "loss": 1.4257, | |
| "step": 9165 | |
| }, | |
| { | |
| "epoch": 706.0, | |
| "learning_rate": 0.0031760000000000004, | |
| "loss": 1.4306, | |
| "step": 9178 | |
| }, | |
| { | |
| "epoch": 707.0, | |
| "learning_rate": 0.0031720000000000003, | |
| "loss": 1.3884, | |
| "step": 9191 | |
| }, | |
| { | |
| "epoch": 708.0, | |
| "learning_rate": 0.0031680000000000002, | |
| "loss": 1.3421, | |
| "step": 9204 | |
| }, | |
| { | |
| "epoch": 709.0, | |
| "learning_rate": 0.003164, | |
| "loss": 1.394, | |
| "step": 9217 | |
| }, | |
| { | |
| "epoch": 710.0, | |
| "learning_rate": 0.00316, | |
| "loss": 1.3892, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 711.0, | |
| "learning_rate": 0.0031560000000000004, | |
| "loss": 1.4832, | |
| "step": 9243 | |
| }, | |
| { | |
| "epoch": 712.0, | |
| "learning_rate": 0.0031520000000000003, | |
| "loss": 1.4088, | |
| "step": 9256 | |
| }, | |
| { | |
| "epoch": 713.0, | |
| "learning_rate": 0.003148, | |
| "loss": 1.386, | |
| "step": 9269 | |
| }, | |
| { | |
| "epoch": 714.0, | |
| "learning_rate": 0.003144, | |
| "loss": 1.3992, | |
| "step": 9282 | |
| }, | |
| { | |
| "epoch": 715.0, | |
| "learning_rate": 0.00314, | |
| "loss": 1.381, | |
| "step": 9295 | |
| }, | |
| { | |
| "epoch": 716.0, | |
| "learning_rate": 0.0031360000000000003, | |
| "loss": 1.394, | |
| "step": 9308 | |
| }, | |
| { | |
| "epoch": 717.0, | |
| "learning_rate": 0.0031320000000000002, | |
| "loss": 1.4024, | |
| "step": 9321 | |
| }, | |
| { | |
| "epoch": 718.0, | |
| "learning_rate": 0.003128, | |
| "loss": 1.3334, | |
| "step": 9334 | |
| }, | |
| { | |
| "epoch": 719.0, | |
| "learning_rate": 0.003124, | |
| "loss": 1.3467, | |
| "step": 9347 | |
| }, | |
| { | |
| "epoch": 720.0, | |
| "learning_rate": 0.0031200000000000004, | |
| "loss": 1.284, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 721.0, | |
| "learning_rate": 0.0031160000000000003, | |
| "loss": 1.2705, | |
| "step": 9373 | |
| }, | |
| { | |
| "epoch": 722.0, | |
| "learning_rate": 0.003112, | |
| "loss": 1.2919, | |
| "step": 9386 | |
| }, | |
| { | |
| "epoch": 723.0, | |
| "learning_rate": 0.003108, | |
| "loss": 1.3071, | |
| "step": 9399 | |
| }, | |
| { | |
| "epoch": 724.0, | |
| "learning_rate": 0.003104, | |
| "loss": 1.3584, | |
| "step": 9412 | |
| }, | |
| { | |
| "epoch": 725.0, | |
| "learning_rate": 0.0031000000000000003, | |
| "loss": 1.4149, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 726.0, | |
| "learning_rate": 0.0030960000000000002, | |
| "loss": 1.3721, | |
| "step": 9438 | |
| }, | |
| { | |
| "epoch": 727.0, | |
| "learning_rate": 0.003092, | |
| "loss": 1.3719, | |
| "step": 9451 | |
| }, | |
| { | |
| "epoch": 728.0, | |
| "learning_rate": 0.003088, | |
| "loss": 1.3559, | |
| "step": 9464 | |
| }, | |
| { | |
| "epoch": 729.0, | |
| "learning_rate": 0.003084, | |
| "loss": 1.3108, | |
| "step": 9477 | |
| }, | |
| { | |
| "epoch": 730.0, | |
| "learning_rate": 0.0030800000000000003, | |
| "loss": 1.316, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 731.0, | |
| "learning_rate": 0.003076, | |
| "loss": 1.3154, | |
| "step": 9503 | |
| }, | |
| { | |
| "epoch": 732.0, | |
| "learning_rate": 0.003072, | |
| "loss": 1.327, | |
| "step": 9516 | |
| }, | |
| { | |
| "epoch": 733.0, | |
| "learning_rate": 0.003068, | |
| "loss": 1.2914, | |
| "step": 9529 | |
| }, | |
| { | |
| "epoch": 734.0, | |
| "learning_rate": 0.0030640000000000003, | |
| "loss": 1.2891, | |
| "step": 9542 | |
| }, | |
| { | |
| "epoch": 735.0, | |
| "learning_rate": 0.0030600000000000002, | |
| "loss": 1.2923, | |
| "step": 9555 | |
| }, | |
| { | |
| "epoch": 736.0, | |
| "learning_rate": 0.003056, | |
| "loss": 1.3608, | |
| "step": 9568 | |
| }, | |
| { | |
| "epoch": 737.0, | |
| "learning_rate": 0.003052, | |
| "loss": 1.3126, | |
| "step": 9581 | |
| }, | |
| { | |
| "epoch": 738.0, | |
| "learning_rate": 0.003048, | |
| "loss": 1.3673, | |
| "step": 9594 | |
| }, | |
| { | |
| "epoch": 739.0, | |
| "learning_rate": 0.0030440000000000003, | |
| "loss": 1.3951, | |
| "step": 9607 | |
| }, | |
| { | |
| "epoch": 740.0, | |
| "learning_rate": 0.00304, | |
| "loss": 1.3128, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 741.0, | |
| "learning_rate": 0.003036, | |
| "loss": 1.3117, | |
| "step": 9633 | |
| }, | |
| { | |
| "epoch": 742.0, | |
| "learning_rate": 0.003032, | |
| "loss": 1.2828, | |
| "step": 9646 | |
| }, | |
| { | |
| "epoch": 743.0, | |
| "learning_rate": 0.003028, | |
| "loss": 1.3054, | |
| "step": 9659 | |
| }, | |
| { | |
| "epoch": 744.0, | |
| "learning_rate": 0.003024, | |
| "loss": 1.289, | |
| "step": 9672 | |
| }, | |
| { | |
| "epoch": 745.0, | |
| "learning_rate": 0.00302, | |
| "loss": 1.3023, | |
| "step": 9685 | |
| }, | |
| { | |
| "epoch": 746.0, | |
| "learning_rate": 0.003016, | |
| "loss": 1.2972, | |
| "step": 9698 | |
| }, | |
| { | |
| "epoch": 747.0, | |
| "learning_rate": 0.003012, | |
| "loss": 1.281, | |
| "step": 9711 | |
| }, | |
| { | |
| "epoch": 748.0, | |
| "learning_rate": 0.0030080000000000003, | |
| "loss": 1.2475, | |
| "step": 9724 | |
| }, | |
| { | |
| "epoch": 749.0, | |
| "learning_rate": 0.003004, | |
| "loss": 1.2721, | |
| "step": 9737 | |
| }, | |
| { | |
| "epoch": 750.0, | |
| "learning_rate": 0.003, | |
| "loss": 1.3066, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 751.0, | |
| "learning_rate": 0.002996, | |
| "loss": 1.3229, | |
| "step": 9763 | |
| }, | |
| { | |
| "epoch": 752.0, | |
| "learning_rate": 0.002992, | |
| "loss": 1.2095, | |
| "step": 9776 | |
| }, | |
| { | |
| "epoch": 753.0, | |
| "learning_rate": 0.002988, | |
| "loss": 1.2389, | |
| "step": 9789 | |
| }, | |
| { | |
| "epoch": 754.0, | |
| "learning_rate": 0.002984, | |
| "loss": 1.2046, | |
| "step": 9802 | |
| }, | |
| { | |
| "epoch": 755.0, | |
| "learning_rate": 0.00298, | |
| "loss": 1.1953, | |
| "step": 9815 | |
| }, | |
| { | |
| "epoch": 756.0, | |
| "learning_rate": 0.002976, | |
| "loss": 1.1359, | |
| "step": 9828 | |
| }, | |
| { | |
| "epoch": 757.0, | |
| "learning_rate": 0.0029720000000000002, | |
| "loss": 1.13, | |
| "step": 9841 | |
| }, | |
| { | |
| "epoch": 758.0, | |
| "learning_rate": 0.002968, | |
| "loss": 1.1946, | |
| "step": 9854 | |
| }, | |
| { | |
| "epoch": 759.0, | |
| "learning_rate": 0.002964, | |
| "loss": 1.2325, | |
| "step": 9867 | |
| }, | |
| { | |
| "epoch": 760.0, | |
| "learning_rate": 0.00296, | |
| "loss": 1.2435, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 761.0, | |
| "learning_rate": 0.002956, | |
| "loss": 1.2878, | |
| "step": 9893 | |
| }, | |
| { | |
| "epoch": 762.0, | |
| "learning_rate": 0.002952, | |
| "loss": 1.2123, | |
| "step": 9906 | |
| }, | |
| { | |
| "epoch": 763.0, | |
| "learning_rate": 0.002948, | |
| "loss": 1.1953, | |
| "step": 9919 | |
| }, | |
| { | |
| "epoch": 764.0, | |
| "learning_rate": 0.002944, | |
| "loss": 1.2623, | |
| "step": 9932 | |
| }, | |
| { | |
| "epoch": 765.0, | |
| "learning_rate": 0.00294, | |
| "loss": 1.2676, | |
| "step": 9945 | |
| }, | |
| { | |
| "epoch": 766.0, | |
| "learning_rate": 0.002936, | |
| "loss": 1.1999, | |
| "step": 9958 | |
| }, | |
| { | |
| "epoch": 767.0, | |
| "learning_rate": 0.002932, | |
| "loss": 1.2521, | |
| "step": 9971 | |
| }, | |
| { | |
| "epoch": 768.0, | |
| "learning_rate": 0.002928, | |
| "loss": 1.4043, | |
| "step": 9984 | |
| }, | |
| { | |
| "epoch": 769.0, | |
| "learning_rate": 0.002924, | |
| "loss": 1.3043, | |
| "step": 9997 | |
| }, | |
| { | |
| "epoch": 770.0, | |
| "learning_rate": 0.00292, | |
| "loss": 1.1831, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 771.0, | |
| "learning_rate": 0.002916, | |
| "loss": 1.1813, | |
| "step": 10023 | |
| }, | |
| { | |
| "epoch": 772.0, | |
| "learning_rate": 0.002912, | |
| "loss": 1.1946, | |
| "step": 10036 | |
| }, | |
| { | |
| "epoch": 773.0, | |
| "learning_rate": 0.002908, | |
| "loss": 1.2182, | |
| "step": 10049 | |
| }, | |
| { | |
| "epoch": 774.0, | |
| "learning_rate": 0.002904, | |
| "loss": 1.2491, | |
| "step": 10062 | |
| }, | |
| { | |
| "epoch": 775.0, | |
| "learning_rate": 0.0029, | |
| "loss": 1.2422, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 776.0, | |
| "learning_rate": 0.002896, | |
| "loss": 1.2784, | |
| "step": 10088 | |
| }, | |
| { | |
| "epoch": 777.0, | |
| "learning_rate": 0.002892, | |
| "loss": 1.1924, | |
| "step": 10101 | |
| }, | |
| { | |
| "epoch": 778.0, | |
| "learning_rate": 0.002888, | |
| "loss": 1.1739, | |
| "step": 10114 | |
| }, | |
| { | |
| "epoch": 779.0, | |
| "learning_rate": 0.002884, | |
| "loss": 1.2357, | |
| "step": 10127 | |
| }, | |
| { | |
| "epoch": 780.0, | |
| "learning_rate": 0.0028799999999999997, | |
| "loss": 1.1845, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 781.0, | |
| "learning_rate": 0.002876, | |
| "loss": 1.1947, | |
| "step": 10153 | |
| }, | |
| { | |
| "epoch": 782.0, | |
| "learning_rate": 0.002872, | |
| "loss": 1.2135, | |
| "step": 10166 | |
| }, | |
| { | |
| "epoch": 783.0, | |
| "learning_rate": 0.002868, | |
| "loss": 1.1923, | |
| "step": 10179 | |
| }, | |
| { | |
| "epoch": 784.0, | |
| "learning_rate": 0.002864, | |
| "loss": 1.1954, | |
| "step": 10192 | |
| }, | |
| { | |
| "epoch": 785.0, | |
| "learning_rate": 0.00286, | |
| "loss": 1.2599, | |
| "step": 10205 | |
| }, | |
| { | |
| "epoch": 786.0, | |
| "learning_rate": 0.002856, | |
| "loss": 1.1905, | |
| "step": 10218 | |
| }, | |
| { | |
| "epoch": 787.0, | |
| "learning_rate": 0.002852, | |
| "loss": 1.1463, | |
| "step": 10231 | |
| }, | |
| { | |
| "epoch": 788.0, | |
| "learning_rate": 0.002848, | |
| "loss": 1.1417, | |
| "step": 10244 | |
| }, | |
| { | |
| "epoch": 789.0, | |
| "learning_rate": 0.0028439999999999997, | |
| "loss": 1.1696, | |
| "step": 10257 | |
| }, | |
| { | |
| "epoch": 790.0, | |
| "learning_rate": 0.00284, | |
| "loss": 1.1227, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 791.0, | |
| "learning_rate": 0.002836, | |
| "loss": 1.1858, | |
| "step": 10283 | |
| }, | |
| { | |
| "epoch": 792.0, | |
| "learning_rate": 0.002832, | |
| "loss": 1.1615, | |
| "step": 10296 | |
| }, | |
| { | |
| "epoch": 793.0, | |
| "learning_rate": 0.002828, | |
| "loss": 1.2113, | |
| "step": 10309 | |
| }, | |
| { | |
| "epoch": 794.0, | |
| "learning_rate": 0.0028239999999999997, | |
| "loss": 1.1995, | |
| "step": 10322 | |
| }, | |
| { | |
| "epoch": 795.0, | |
| "learning_rate": 0.00282, | |
| "loss": 1.2497, | |
| "step": 10335 | |
| }, | |
| { | |
| "epoch": 796.0, | |
| "learning_rate": 0.002816, | |
| "loss": 1.2255, | |
| "step": 10348 | |
| }, | |
| { | |
| "epoch": 797.0, | |
| "learning_rate": 0.002812, | |
| "loss": 1.2728, | |
| "step": 10361 | |
| }, | |
| { | |
| "epoch": 798.0, | |
| "learning_rate": 0.0028079999999999997, | |
| "loss": 1.2053, | |
| "step": 10374 | |
| }, | |
| { | |
| "epoch": 799.0, | |
| "learning_rate": 0.002804, | |
| "loss": 1.1941, | |
| "step": 10387 | |
| }, | |
| { | |
| "epoch": 800.0, | |
| "learning_rate": 0.0028, | |
| "loss": 1.184, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 801.0, | |
| "learning_rate": 0.002796, | |
| "loss": 1.1575, | |
| "step": 10413 | |
| }, | |
| { | |
| "epoch": 802.0, | |
| "learning_rate": 0.0027919999999999998, | |
| "loss": 1.1571, | |
| "step": 10426 | |
| }, | |
| { | |
| "epoch": 803.0, | |
| "learning_rate": 0.0027879999999999997, | |
| "loss": 1.1619, | |
| "step": 10439 | |
| }, | |
| { | |
| "epoch": 804.0, | |
| "learning_rate": 0.002784, | |
| "loss": 1.175, | |
| "step": 10452 | |
| }, | |
| { | |
| "epoch": 805.0, | |
| "learning_rate": 0.00278, | |
| "loss": 1.2, | |
| "step": 10465 | |
| }, | |
| { | |
| "epoch": 806.0, | |
| "learning_rate": 0.002776, | |
| "loss": 1.1761, | |
| "step": 10478 | |
| }, | |
| { | |
| "epoch": 807.0, | |
| "learning_rate": 0.0027719999999999997, | |
| "loss": 1.0651, | |
| "step": 10491 | |
| }, | |
| { | |
| "epoch": 808.0, | |
| "learning_rate": 0.002768, | |
| "loss": 1.0923, | |
| "step": 10504 | |
| }, | |
| { | |
| "epoch": 809.0, | |
| "learning_rate": 0.002764, | |
| "loss": 1.1537, | |
| "step": 10517 | |
| }, | |
| { | |
| "epoch": 810.0, | |
| "learning_rate": 0.00276, | |
| "loss": 1.1618, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 811.0, | |
| "learning_rate": 0.0027559999999999998, | |
| "loss": 1.2048, | |
| "step": 10543 | |
| }, | |
| { | |
| "epoch": 812.0, | |
| "learning_rate": 0.0027519999999999997, | |
| "loss": 1.1584, | |
| "step": 10556 | |
| }, | |
| { | |
| "epoch": 813.0, | |
| "learning_rate": 0.0027480000000000004, | |
| "loss": 1.1815, | |
| "step": 10569 | |
| }, | |
| { | |
| "epoch": 814.0, | |
| "learning_rate": 0.0027440000000000003, | |
| "loss": 1.1204, | |
| "step": 10582 | |
| }, | |
| { | |
| "epoch": 815.0, | |
| "learning_rate": 0.0027400000000000002, | |
| "loss": 1.1662, | |
| "step": 10595 | |
| }, | |
| { | |
| "epoch": 816.0, | |
| "learning_rate": 0.002736, | |
| "loss": 1.1275, | |
| "step": 10608 | |
| }, | |
| { | |
| "epoch": 817.0, | |
| "learning_rate": 0.002732, | |
| "loss": 1.1124, | |
| "step": 10621 | |
| }, | |
| { | |
| "epoch": 818.0, | |
| "learning_rate": 0.0027280000000000004, | |
| "loss": 1.0765, | |
| "step": 10634 | |
| }, | |
| { | |
| "epoch": 819.0, | |
| "learning_rate": 0.0027240000000000003, | |
| "loss": 1.1159, | |
| "step": 10647 | |
| }, | |
| { | |
| "epoch": 820.0, | |
| "learning_rate": 0.00272, | |
| "loss": 1.1124, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 821.0, | |
| "learning_rate": 0.002716, | |
| "loss": 1.1045, | |
| "step": 10673 | |
| }, | |
| { | |
| "epoch": 822.0, | |
| "learning_rate": 0.0027120000000000004, | |
| "loss": 1.1152, | |
| "step": 10686 | |
| }, | |
| { | |
| "epoch": 823.0, | |
| "learning_rate": 0.0027080000000000003, | |
| "loss": 1.0664, | |
| "step": 10699 | |
| }, | |
| { | |
| "epoch": 824.0, | |
| "learning_rate": 0.0027040000000000002, | |
| "loss": 1.0165, | |
| "step": 10712 | |
| }, | |
| { | |
| "epoch": 825.0, | |
| "learning_rate": 0.0027, | |
| "loss": 1.004, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 826.0, | |
| "learning_rate": 0.002696, | |
| "loss": 1.0194, | |
| "step": 10738 | |
| }, | |
| { | |
| "epoch": 827.0, | |
| "learning_rate": 0.0026920000000000004, | |
| "loss": 1.04, | |
| "step": 10751 | |
| }, | |
| { | |
| "epoch": 828.0, | |
| "learning_rate": 0.0026880000000000003, | |
| "loss": 1.0393, | |
| "step": 10764 | |
| }, | |
| { | |
| "epoch": 829.0, | |
| "learning_rate": 0.002684, | |
| "loss": 0.9552, | |
| "step": 10777 | |
| }, | |
| { | |
| "epoch": 830.0, | |
| "learning_rate": 0.00268, | |
| "loss": 0.9634, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 831.0, | |
| "learning_rate": 0.0026760000000000004, | |
| "loss": 0.9967, | |
| "step": 10803 | |
| }, | |
| { | |
| "epoch": 832.0, | |
| "learning_rate": 0.0026720000000000003, | |
| "loss": 0.994, | |
| "step": 10816 | |
| }, | |
| { | |
| "epoch": 833.0, | |
| "learning_rate": 0.0026680000000000002, | |
| "loss": 0.9906, | |
| "step": 10829 | |
| }, | |
| { | |
| "epoch": 834.0, | |
| "learning_rate": 0.002664, | |
| "loss": 1.0132, | |
| "step": 10842 | |
| }, | |
| { | |
| "epoch": 835.0, | |
| "learning_rate": 0.00266, | |
| "loss": 1.0088, | |
| "step": 10855 | |
| }, | |
| { | |
| "epoch": 836.0, | |
| "learning_rate": 0.0026560000000000004, | |
| "loss": 0.9914, | |
| "step": 10868 | |
| }, | |
| { | |
| "epoch": 837.0, | |
| "learning_rate": 0.0026520000000000003, | |
| "loss": 0.9947, | |
| "step": 10881 | |
| }, | |
| { | |
| "epoch": 838.0, | |
| "learning_rate": 0.002648, | |
| "loss": 1.0053, | |
| "step": 10894 | |
| }, | |
| { | |
| "epoch": 839.0, | |
| "learning_rate": 0.002644, | |
| "loss": 1.0138, | |
| "step": 10907 | |
| }, | |
| { | |
| "epoch": 840.0, | |
| "learning_rate": 0.00264, | |
| "loss": 0.9785, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 841.0, | |
| "learning_rate": 0.0026360000000000003, | |
| "loss": 1.0174, | |
| "step": 10933 | |
| }, | |
| { | |
| "epoch": 842.0, | |
| "learning_rate": 0.0026320000000000002, | |
| "loss": 0.9855, | |
| "step": 10946 | |
| }, | |
| { | |
| "epoch": 843.0, | |
| "learning_rate": 0.002628, | |
| "loss": 1.0005, | |
| "step": 10959 | |
| }, | |
| { | |
| "epoch": 844.0, | |
| "learning_rate": 0.002624, | |
| "loss": 0.9966, | |
| "step": 10972 | |
| }, | |
| { | |
| "epoch": 845.0, | |
| "learning_rate": 0.0026200000000000004, | |
| "loss": 0.9952, | |
| "step": 10985 | |
| }, | |
| { | |
| "epoch": 846.0, | |
| "learning_rate": 0.0026160000000000003, | |
| "loss": 1.001, | |
| "step": 10998 | |
| }, | |
| { | |
| "epoch": 847.0, | |
| "learning_rate": 0.002612, | |
| "loss": 1.0235, | |
| "step": 11011 | |
| }, | |
| { | |
| "epoch": 848.0, | |
| "learning_rate": 0.002608, | |
| "loss": 0.9835, | |
| "step": 11024 | |
| }, | |
| { | |
| "epoch": 849.0, | |
| "learning_rate": 0.002604, | |
| "loss": 0.9951, | |
| "step": 11037 | |
| }, | |
| { | |
| "epoch": 850.0, | |
| "learning_rate": 0.0026000000000000003, | |
| "loss": 1.0329, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 851.0, | |
| "learning_rate": 0.0025960000000000002, | |
| "loss": 1.0021, | |
| "step": 11063 | |
| }, | |
| { | |
| "epoch": 852.0, | |
| "learning_rate": 0.002592, | |
| "loss": 1.0391, | |
| "step": 11076 | |
| }, | |
| { | |
| "epoch": 853.0, | |
| "learning_rate": 0.002588, | |
| "loss": 1.0249, | |
| "step": 11089 | |
| }, | |
| { | |
| "epoch": 854.0, | |
| "learning_rate": 0.002584, | |
| "loss": 0.9974, | |
| "step": 11102 | |
| }, | |
| { | |
| "epoch": 855.0, | |
| "learning_rate": 0.0025800000000000003, | |
| "loss": 1.0149, | |
| "step": 11115 | |
| }, | |
| { | |
| "epoch": 856.0, | |
| "learning_rate": 0.002576, | |
| "loss": 1.0002, | |
| "step": 11128 | |
| }, | |
| { | |
| "epoch": 857.0, | |
| "learning_rate": 0.002572, | |
| "loss": 1.0379, | |
| "step": 11141 | |
| }, | |
| { | |
| "epoch": 858.0, | |
| "learning_rate": 0.002568, | |
| "loss": 1.0381, | |
| "step": 11154 | |
| }, | |
| { | |
| "epoch": 859.0, | |
| "learning_rate": 0.0025640000000000003, | |
| "loss": 0.9772, | |
| "step": 11167 | |
| }, | |
| { | |
| "epoch": 860.0, | |
| "learning_rate": 0.00256, | |
| "loss": 1.0263, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 861.0, | |
| "learning_rate": 0.002556, | |
| "loss": 0.982, | |
| "step": 11193 | |
| }, | |
| { | |
| "epoch": 862.0, | |
| "learning_rate": 0.002552, | |
| "loss": 0.9892, | |
| "step": 11206 | |
| }, | |
| { | |
| "epoch": 863.0, | |
| "learning_rate": 0.002548, | |
| "loss": 0.9708, | |
| "step": 11219 | |
| }, | |
| { | |
| "epoch": 864.0, | |
| "learning_rate": 0.0025440000000000003, | |
| "loss": 0.9883, | |
| "step": 11232 | |
| }, | |
| { | |
| "epoch": 865.0, | |
| "learning_rate": 0.00254, | |
| "loss": 0.9446, | |
| "step": 11245 | |
| }, | |
| { | |
| "epoch": 866.0, | |
| "learning_rate": 0.002536, | |
| "loss": 0.9686, | |
| "step": 11258 | |
| }, | |
| { | |
| "epoch": 867.0, | |
| "learning_rate": 0.002532, | |
| "loss": 1.0044, | |
| "step": 11271 | |
| }, | |
| { | |
| "epoch": 868.0, | |
| "learning_rate": 0.002528, | |
| "loss": 1.0128, | |
| "step": 11284 | |
| }, | |
| { | |
| "epoch": 869.0, | |
| "learning_rate": 0.002524, | |
| "loss": 0.9876, | |
| "step": 11297 | |
| }, | |
| { | |
| "epoch": 870.0, | |
| "learning_rate": 0.00252, | |
| "loss": 0.9992, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 871.0, | |
| "learning_rate": 0.002516, | |
| "loss": 1.1017, | |
| "step": 11323 | |
| }, | |
| { | |
| "epoch": 872.0, | |
| "learning_rate": 0.002512, | |
| "loss": 0.9853, | |
| "step": 11336 | |
| }, | |
| { | |
| "epoch": 873.0, | |
| "learning_rate": 0.0025080000000000002, | |
| "loss": 0.9495, | |
| "step": 11349 | |
| }, | |
| { | |
| "epoch": 874.0, | |
| "learning_rate": 0.002504, | |
| "loss": 0.9292, | |
| "step": 11362 | |
| }, | |
| { | |
| "epoch": 875.0, | |
| "learning_rate": 0.0025, | |
| "loss": 0.9339, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 876.0, | |
| "learning_rate": 0.002496, | |
| "loss": 0.9309, | |
| "step": 11388 | |
| }, | |
| { | |
| "epoch": 877.0, | |
| "learning_rate": 0.002492, | |
| "loss": 0.9303, | |
| "step": 11401 | |
| }, | |
| { | |
| "epoch": 878.0, | |
| "learning_rate": 0.002488, | |
| "loss": 0.8827, | |
| "step": 11414 | |
| }, | |
| { | |
| "epoch": 879.0, | |
| "learning_rate": 0.002484, | |
| "loss": 0.8898, | |
| "step": 11427 | |
| }, | |
| { | |
| "epoch": 880.0, | |
| "learning_rate": 0.00248, | |
| "loss": 0.8748, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 881.0, | |
| "learning_rate": 0.002476, | |
| "loss": 0.921, | |
| "step": 11453 | |
| }, | |
| { | |
| "epoch": 882.0, | |
| "learning_rate": 0.0024720000000000002, | |
| "loss": 0.912, | |
| "step": 11466 | |
| }, | |
| { | |
| "epoch": 883.0, | |
| "learning_rate": 0.002468, | |
| "loss": 0.9684, | |
| "step": 11479 | |
| }, | |
| { | |
| "epoch": 884.0, | |
| "learning_rate": 0.002464, | |
| "loss": 1.0113, | |
| "step": 11492 | |
| }, | |
| { | |
| "epoch": 885.0, | |
| "learning_rate": 0.00246, | |
| "loss": 1.0043, | |
| "step": 11505 | |
| }, | |
| { | |
| "epoch": 886.0, | |
| "learning_rate": 0.002456, | |
| "loss": 0.94, | |
| "step": 11518 | |
| }, | |
| { | |
| "epoch": 887.0, | |
| "learning_rate": 0.002452, | |
| "loss": 0.9166, | |
| "step": 11531 | |
| }, | |
| { | |
| "epoch": 888.0, | |
| "learning_rate": 0.002448, | |
| "loss": 0.9202, | |
| "step": 11544 | |
| }, | |
| { | |
| "epoch": 889.0, | |
| "learning_rate": 0.002444, | |
| "loss": 0.9179, | |
| "step": 11557 | |
| }, | |
| { | |
| "epoch": 890.0, | |
| "learning_rate": 0.00244, | |
| "loss": 0.8928, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 891.0, | |
| "learning_rate": 0.002436, | |
| "loss": 0.9021, | |
| "step": 11583 | |
| }, | |
| { | |
| "epoch": 892.0, | |
| "learning_rate": 0.002432, | |
| "loss": 0.9038, | |
| "step": 11596 | |
| }, | |
| { | |
| "epoch": 893.0, | |
| "learning_rate": 0.002428, | |
| "loss": 0.8446, | |
| "step": 11609 | |
| }, | |
| { | |
| "epoch": 894.0, | |
| "learning_rate": 0.002424, | |
| "loss": 0.9167, | |
| "step": 11622 | |
| }, | |
| { | |
| "epoch": 895.0, | |
| "learning_rate": 0.00242, | |
| "loss": 0.8897, | |
| "step": 11635 | |
| }, | |
| { | |
| "epoch": 896.0, | |
| "learning_rate": 0.002416, | |
| "loss": 0.9227, | |
| "step": 11648 | |
| }, | |
| { | |
| "epoch": 897.0, | |
| "learning_rate": 0.002412, | |
| "loss": 0.8956, | |
| "step": 11661 | |
| }, | |
| { | |
| "epoch": 898.0, | |
| "learning_rate": 0.002408, | |
| "loss": 0.8768, | |
| "step": 11674 | |
| }, | |
| { | |
| "epoch": 899.0, | |
| "learning_rate": 0.002404, | |
| "loss": 0.9134, | |
| "step": 11687 | |
| }, | |
| { | |
| "epoch": 900.0, | |
| "learning_rate": 0.0024, | |
| "loss": 0.8484, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 901.0, | |
| "learning_rate": 0.002396, | |
| "loss": 0.8616, | |
| "step": 11713 | |
| }, | |
| { | |
| "epoch": 902.0, | |
| "learning_rate": 0.002392, | |
| "loss": 0.8669, | |
| "step": 11726 | |
| }, | |
| { | |
| "epoch": 903.0, | |
| "learning_rate": 0.002388, | |
| "loss": 0.8529, | |
| "step": 11739 | |
| }, | |
| { | |
| "epoch": 904.0, | |
| "learning_rate": 0.002384, | |
| "loss": 0.8488, | |
| "step": 11752 | |
| }, | |
| { | |
| "epoch": 905.0, | |
| "learning_rate": 0.0023799999999999997, | |
| "loss": 0.8505, | |
| "step": 11765 | |
| }, | |
| { | |
| "epoch": 906.0, | |
| "learning_rate": 0.002376, | |
| "loss": 0.8264, | |
| "step": 11778 | |
| }, | |
| { | |
| "epoch": 907.0, | |
| "learning_rate": 0.002372, | |
| "loss": 0.8382, | |
| "step": 11791 | |
| }, | |
| { | |
| "epoch": 908.0, | |
| "learning_rate": 0.002368, | |
| "loss": 0.8176, | |
| "step": 11804 | |
| }, | |
| { | |
| "epoch": 909.0, | |
| "learning_rate": 0.0023639999999999998, | |
| "loss": 0.8122, | |
| "step": 11817 | |
| }, | |
| { | |
| "epoch": 910.0, | |
| "learning_rate": 0.00236, | |
| "loss": 0.8175, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 911.0, | |
| "learning_rate": 0.002356, | |
| "loss": 0.8345, | |
| "step": 11843 | |
| }, | |
| { | |
| "epoch": 912.0, | |
| "learning_rate": 0.002352, | |
| "loss": 0.8102, | |
| "step": 11856 | |
| }, | |
| { | |
| "epoch": 913.0, | |
| "learning_rate": 0.002348, | |
| "loss": 0.7818, | |
| "step": 11869 | |
| }, | |
| { | |
| "epoch": 914.0, | |
| "learning_rate": 0.0023439999999999997, | |
| "loss": 0.8027, | |
| "step": 11882 | |
| }, | |
| { | |
| "epoch": 915.0, | |
| "learning_rate": 0.00234, | |
| "loss": 0.7765, | |
| "step": 11895 | |
| }, | |
| { | |
| "epoch": 916.0, | |
| "learning_rate": 0.002336, | |
| "loss": 0.8225, | |
| "step": 11908 | |
| }, | |
| { | |
| "epoch": 917.0, | |
| "learning_rate": 0.002332, | |
| "loss": 0.7882, | |
| "step": 11921 | |
| }, | |
| { | |
| "epoch": 918.0, | |
| "learning_rate": 0.0023279999999999998, | |
| "loss": 0.7784, | |
| "step": 11934 | |
| }, | |
| { | |
| "epoch": 919.0, | |
| "learning_rate": 0.0023239999999999997, | |
| "loss": 0.7751, | |
| "step": 11947 | |
| }, | |
| { | |
| "epoch": 920.0, | |
| "learning_rate": 0.00232, | |
| "loss": 0.7837, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 921.0, | |
| "learning_rate": 0.002316, | |
| "loss": 0.7588, | |
| "step": 11973 | |
| }, | |
| { | |
| "epoch": 922.0, | |
| "learning_rate": 0.002312, | |
| "loss": 0.8106, | |
| "step": 11986 | |
| }, | |
| { | |
| "epoch": 923.0, | |
| "learning_rate": 0.0023079999999999997, | |
| "loss": 0.8359, | |
| "step": 11999 | |
| }, | |
| { | |
| "epoch": 924.0, | |
| "learning_rate": 0.002304, | |
| "loss": 0.7899, | |
| "step": 12012 | |
| }, | |
| { | |
| "epoch": 925.0, | |
| "learning_rate": 0.0023, | |
| "loss": 0.7766, | |
| "step": 12025 | |
| }, | |
| { | |
| "epoch": 926.0, | |
| "learning_rate": 0.002296, | |
| "loss": 0.7978, | |
| "step": 12038 | |
| }, | |
| { | |
| "epoch": 927.0, | |
| "learning_rate": 0.0022919999999999998, | |
| "loss": 0.8012, | |
| "step": 12051 | |
| }, | |
| { | |
| "epoch": 928.0, | |
| "learning_rate": 0.0022879999999999997, | |
| "loss": 0.8112, | |
| "step": 12064 | |
| }, | |
| { | |
| "epoch": 929.0, | |
| "learning_rate": 0.002284, | |
| "loss": 0.8725, | |
| "step": 12077 | |
| }, | |
| { | |
| "epoch": 930.0, | |
| "learning_rate": 0.00228, | |
| "loss": 0.8415, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 931.0, | |
| "learning_rate": 0.002276, | |
| "loss": 0.8444, | |
| "step": 12103 | |
| }, | |
| { | |
| "epoch": 932.0, | |
| "learning_rate": 0.0022719999999999997, | |
| "loss": 0.8459, | |
| "step": 12116 | |
| }, | |
| { | |
| "epoch": 933.0, | |
| "learning_rate": 0.002268, | |
| "loss": 0.7739, | |
| "step": 12129 | |
| }, | |
| { | |
| "epoch": 934.0, | |
| "learning_rate": 0.002264, | |
| "loss": 0.8236, | |
| "step": 12142 | |
| }, | |
| { | |
| "epoch": 935.0, | |
| "learning_rate": 0.00226, | |
| "loss": 0.7746, | |
| "step": 12155 | |
| }, | |
| { | |
| "epoch": 936.0, | |
| "learning_rate": 0.0022559999999999998, | |
| "loss": 0.807, | |
| "step": 12168 | |
| }, | |
| { | |
| "epoch": 937.0, | |
| "learning_rate": 0.0022519999999999997, | |
| "loss": 0.8016, | |
| "step": 12181 | |
| }, | |
| { | |
| "epoch": 938.0, | |
| "learning_rate": 0.0022480000000000004, | |
| "loss": 0.7812, | |
| "step": 12194 | |
| }, | |
| { | |
| "epoch": 939.0, | |
| "learning_rate": 0.0022440000000000003, | |
| "loss": 0.7796, | |
| "step": 12207 | |
| }, | |
| { | |
| "epoch": 940.0, | |
| "learning_rate": 0.0022400000000000002, | |
| "loss": 0.7743, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 941.0, | |
| "learning_rate": 0.002236, | |
| "loss": 0.8141, | |
| "step": 12233 | |
| }, | |
| { | |
| "epoch": 942.0, | |
| "learning_rate": 0.002232, | |
| "loss": 0.7666, | |
| "step": 12246 | |
| }, | |
| { | |
| "epoch": 943.0, | |
| "learning_rate": 0.0022280000000000004, | |
| "loss": 0.7668, | |
| "step": 12259 | |
| }, | |
| { | |
| "epoch": 944.0, | |
| "learning_rate": 0.0022240000000000003, | |
| "loss": 0.7469, | |
| "step": 12272 | |
| }, | |
| { | |
| "epoch": 945.0, | |
| "learning_rate": 0.00222, | |
| "loss": 0.8032, | |
| "step": 12285 | |
| }, | |
| { | |
| "epoch": 946.0, | |
| "learning_rate": 0.002216, | |
| "loss": 0.767, | |
| "step": 12298 | |
| }, | |
| { | |
| "epoch": 947.0, | |
| "learning_rate": 0.0022120000000000004, | |
| "loss": 0.7862, | |
| "step": 12311 | |
| }, | |
| { | |
| "epoch": 948.0, | |
| "learning_rate": 0.0022080000000000003, | |
| "loss": 0.762, | |
| "step": 12324 | |
| }, | |
| { | |
| "epoch": 949.0, | |
| "learning_rate": 0.0022040000000000002, | |
| "loss": 0.762, | |
| "step": 12337 | |
| }, | |
| { | |
| "epoch": 950.0, | |
| "learning_rate": 0.0022, | |
| "loss": 0.7546, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 951.0, | |
| "learning_rate": 0.002196, | |
| "loss": 0.721, | |
| "step": 12363 | |
| }, | |
| { | |
| "epoch": 952.0, | |
| "learning_rate": 0.0021920000000000004, | |
| "loss": 0.7442, | |
| "step": 12376 | |
| }, | |
| { | |
| "epoch": 953.0, | |
| "learning_rate": 0.0021880000000000003, | |
| "loss": 0.7331, | |
| "step": 12389 | |
| }, | |
| { | |
| "epoch": 954.0, | |
| "learning_rate": 0.002184, | |
| "loss": 0.7299, | |
| "step": 12402 | |
| }, | |
| { | |
| "epoch": 955.0, | |
| "learning_rate": 0.00218, | |
| "loss": 0.7114, | |
| "step": 12415 | |
| }, | |
| { | |
| "epoch": 956.0, | |
| "learning_rate": 0.0021760000000000004, | |
| "loss": 0.7443, | |
| "step": 12428 | |
| }, | |
| { | |
| "epoch": 957.0, | |
| "learning_rate": 0.0021720000000000003, | |
| "loss": 0.7247, | |
| "step": 12441 | |
| }, | |
| { | |
| "epoch": 958.0, | |
| "learning_rate": 0.0021680000000000002, | |
| "loss": 0.6941, | |
| "step": 12454 | |
| }, | |
| { | |
| "epoch": 959.0, | |
| "learning_rate": 0.002164, | |
| "loss": 0.6838, | |
| "step": 12467 | |
| }, | |
| { | |
| "epoch": 960.0, | |
| "learning_rate": 0.00216, | |
| "loss": 0.6838, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 961.0, | |
| "learning_rate": 0.0021560000000000004, | |
| "loss": 0.7048, | |
| "step": 12493 | |
| }, | |
| { | |
| "epoch": 962.0, | |
| "learning_rate": 0.0021520000000000003, | |
| "loss": 0.7083, | |
| "step": 12506 | |
| }, | |
| { | |
| "epoch": 963.0, | |
| "learning_rate": 0.002148, | |
| "loss": 0.7166, | |
| "step": 12519 | |
| }, | |
| { | |
| "epoch": 964.0, | |
| "learning_rate": 0.002144, | |
| "loss": 0.7128, | |
| "step": 12532 | |
| }, | |
| { | |
| "epoch": 965.0, | |
| "learning_rate": 0.00214, | |
| "loss": 0.7257, | |
| "step": 12545 | |
| }, | |
| { | |
| "epoch": 966.0, | |
| "learning_rate": 0.0021360000000000003, | |
| "loss": 0.7145, | |
| "step": 12558 | |
| }, | |
| { | |
| "epoch": 967.0, | |
| "learning_rate": 0.002132, | |
| "loss": 0.7173, | |
| "step": 12571 | |
| }, | |
| { | |
| "epoch": 968.0, | |
| "learning_rate": 0.002128, | |
| "loss": 0.7162, | |
| "step": 12584 | |
| }, | |
| { | |
| "epoch": 969.0, | |
| "learning_rate": 0.002124, | |
| "loss": 0.6849, | |
| "step": 12597 | |
| }, | |
| { | |
| "epoch": 970.0, | |
| "learning_rate": 0.0021200000000000004, | |
| "loss": 0.6859, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 971.0, | |
| "learning_rate": 0.0021160000000000003, | |
| "loss": 0.6802, | |
| "step": 12623 | |
| }, | |
| { | |
| "epoch": 972.0, | |
| "learning_rate": 0.002112, | |
| "loss": 0.6965, | |
| "step": 12636 | |
| }, | |
| { | |
| "epoch": 973.0, | |
| "learning_rate": 0.002108, | |
| "loss": 0.6941, | |
| "step": 12649 | |
| }, | |
| { | |
| "epoch": 974.0, | |
| "learning_rate": 0.002104, | |
| "loss": 0.6928, | |
| "step": 12662 | |
| }, | |
| { | |
| "epoch": 975.0, | |
| "learning_rate": 0.0021000000000000003, | |
| "loss": 0.6764, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 976.0, | |
| "learning_rate": 0.002096, | |
| "loss": 0.6569, | |
| "step": 12688 | |
| }, | |
| { | |
| "epoch": 977.0, | |
| "learning_rate": 0.002092, | |
| "loss": 0.6618, | |
| "step": 12701 | |
| }, | |
| { | |
| "epoch": 978.0, | |
| "learning_rate": 0.002088, | |
| "loss": 0.6719, | |
| "step": 12714 | |
| }, | |
| { | |
| "epoch": 979.0, | |
| "learning_rate": 0.002084, | |
| "loss": 0.6584, | |
| "step": 12727 | |
| }, | |
| { | |
| "epoch": 980.0, | |
| "learning_rate": 0.0020800000000000003, | |
| "loss": 0.6911, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 981.0, | |
| "learning_rate": 0.002076, | |
| "loss": 0.688, | |
| "step": 12753 | |
| }, | |
| { | |
| "epoch": 982.0, | |
| "learning_rate": 0.002072, | |
| "loss": 0.6741, | |
| "step": 12766 | |
| }, | |
| { | |
| "epoch": 983.0, | |
| "learning_rate": 0.002068, | |
| "loss": 0.6962, | |
| "step": 12779 | |
| }, | |
| { | |
| "epoch": 984.0, | |
| "learning_rate": 0.0020640000000000003, | |
| "loss": 0.6811, | |
| "step": 12792 | |
| }, | |
| { | |
| "epoch": 985.0, | |
| "learning_rate": 0.00206, | |
| "loss": 0.6717, | |
| "step": 12805 | |
| }, | |
| { | |
| "epoch": 986.0, | |
| "learning_rate": 0.002056, | |
| "loss": 0.6733, | |
| "step": 12818 | |
| }, | |
| { | |
| "epoch": 987.0, | |
| "learning_rate": 0.002052, | |
| "loss": 0.6813, | |
| "step": 12831 | |
| }, | |
| { | |
| "epoch": 988.0, | |
| "learning_rate": 0.002048, | |
| "loss": 0.6472, | |
| "step": 12844 | |
| }, | |
| { | |
| "epoch": 989.0, | |
| "learning_rate": 0.0020440000000000002, | |
| "loss": 0.6508, | |
| "step": 12857 | |
| }, | |
| { | |
| "epoch": 990.0, | |
| "learning_rate": 0.00204, | |
| "loss": 0.6576, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 991.0, | |
| "learning_rate": 0.002036, | |
| "loss": 0.6428, | |
| "step": 12883 | |
| }, | |
| { | |
| "epoch": 992.0, | |
| "learning_rate": 0.002032, | |
| "loss": 0.6505, | |
| "step": 12896 | |
| }, | |
| { | |
| "epoch": 993.0, | |
| "learning_rate": 0.002028, | |
| "loss": 0.6578, | |
| "step": 12909 | |
| }, | |
| { | |
| "epoch": 994.0, | |
| "learning_rate": 0.002024, | |
| "loss": 0.6689, | |
| "step": 12922 | |
| }, | |
| { | |
| "epoch": 995.0, | |
| "learning_rate": 0.00202, | |
| "loss": 0.6625, | |
| "step": 12935 | |
| }, | |
| { | |
| "epoch": 996.0, | |
| "learning_rate": 0.002016, | |
| "loss": 0.6894, | |
| "step": 12948 | |
| }, | |
| { | |
| "epoch": 997.0, | |
| "learning_rate": 0.002012, | |
| "loss": 0.6669, | |
| "step": 12961 | |
| }, | |
| { | |
| "epoch": 998.0, | |
| "learning_rate": 0.0020080000000000002, | |
| "loss": 0.6698, | |
| "step": 12974 | |
| }, | |
| { | |
| "epoch": 999.0, | |
| "learning_rate": 0.002004, | |
| "loss": 0.6861, | |
| "step": 12987 | |
| }, | |
| { | |
| "epoch": 1000.0, | |
| "learning_rate": 0.002, | |
| "loss": 0.7089, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1000.0, | |
| "step": 13000, | |
| "total_flos": 569573692465152.0, | |
| "train_loss": 0.655489089525663, | |
| "train_runtime": 67792.7328, | |
| "train_samples_per_second": 1.475, | |
| "train_steps_per_second": 0.192 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 13000, | |
| "num_train_epochs": 1000, | |
| "save_steps": 500, | |
| "total_flos": 569573692465152.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |