| { |
| "best_metric": 0.717534065246582, |
| "best_model_checkpoint": "./output/checkpoint-450", |
| "epoch": 156.52173913043478, |
| "eval_steps": 150, |
| "global_step": 3600, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.43478260869565216, |
| "grad_norm": 1.5021440982818604, |
| "learning_rate": 3e-06, |
| "loss": 0.9061, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 1.6870536804199219, |
| "learning_rate": 6e-06, |
| "loss": 0.9023, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.3043478260869565, |
| "grad_norm": 1.729659080505371, |
| "learning_rate": 9e-06, |
| "loss": 0.9004, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "grad_norm": 1.453600525856018, |
| "learning_rate": 1.2e-05, |
| "loss": 0.9091, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.1739130434782608, |
| "grad_norm": 1.3518075942993164, |
| "learning_rate": 1.5e-05, |
| "loss": 0.8362, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.608695652173913, |
| "grad_norm": 2.031172513961792, |
| "learning_rate": 1.8e-05, |
| "loss": 0.8893, |
| "step": 60 |
| }, |
| { |
| "epoch": 3.0434782608695654, |
| "grad_norm": 1.484531283378601, |
| "learning_rate": 2.1e-05, |
| "loss": 0.8915, |
| "step": 70 |
| }, |
| { |
| "epoch": 3.4782608695652173, |
| "grad_norm": 1.7294986248016357, |
| "learning_rate": 2.4e-05, |
| "loss": 0.8233, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.9130434782608696, |
| "grad_norm": 1.4242360591888428, |
| "learning_rate": 2.7000000000000002e-05, |
| "loss": 0.8527, |
| "step": 90 |
| }, |
| { |
| "epoch": 4.3478260869565215, |
| "grad_norm": 1.3656773567199707, |
| "learning_rate": 3e-05, |
| "loss": 0.8648, |
| "step": 100 |
| }, |
| { |
| "epoch": 4.782608695652174, |
| "grad_norm": 2.19753098487854, |
| "learning_rate": 2.999999702723963e-05, |
| "loss": 0.8225, |
| "step": 110 |
| }, |
| { |
| "epoch": 5.217391304347826, |
| "grad_norm": 1.0726382732391357, |
| "learning_rate": 2.9999988108959687e-05, |
| "loss": 0.7654, |
| "step": 120 |
| }, |
| { |
| "epoch": 5.6521739130434785, |
| "grad_norm": 1.5603922605514526, |
| "learning_rate": 2.9999973245163716e-05, |
| "loss": 0.7417, |
| "step": 130 |
| }, |
| { |
| "epoch": 6.086956521739131, |
| "grad_norm": 1.9068461656570435, |
| "learning_rate": 2.99999524358576e-05, |
| "loss": 0.7654, |
| "step": 140 |
| }, |
| { |
| "epoch": 6.521739130434782, |
| "grad_norm": 1.1220637559890747, |
| "learning_rate": 2.9999925681049593e-05, |
| "loss": 0.7857, |
| "step": 150 |
| }, |
| { |
| "epoch": 6.521739130434782, |
| "eval_loss": 0.7963114976882935, |
| "eval_runtime": 0.4908, |
| "eval_samples_per_second": 20.374, |
| "eval_steps_per_second": 20.374, |
| "step": 150 |
| }, |
| { |
| "epoch": 6.956521739130435, |
| "grad_norm": 1.5331261157989502, |
| "learning_rate": 2.9999892980750297e-05, |
| "loss": 0.6585, |
| "step": 160 |
| }, |
| { |
| "epoch": 7.391304347826087, |
| "grad_norm": 1.3447493314743042, |
| "learning_rate": 2.9999854334972675e-05, |
| "loss": 0.7388, |
| "step": 170 |
| }, |
| { |
| "epoch": 7.826086956521739, |
| "grad_norm": 1.7259607315063477, |
| "learning_rate": 2.999980974373204e-05, |
| "loss": 0.7293, |
| "step": 180 |
| }, |
| { |
| "epoch": 8.26086956521739, |
| "grad_norm": 1.5403547286987305, |
| "learning_rate": 2.9999759207046075e-05, |
| "loss": 0.6247, |
| "step": 190 |
| }, |
| { |
| "epoch": 8.695652173913043, |
| "grad_norm": 1.7431354522705078, |
| "learning_rate": 2.9999702724934804e-05, |
| "loss": 0.6765, |
| "step": 200 |
| }, |
| { |
| "epoch": 9.130434782608695, |
| "grad_norm": 1.0416122674942017, |
| "learning_rate": 2.999964029742062e-05, |
| "loss": 0.6523, |
| "step": 210 |
| }, |
| { |
| "epoch": 9.565217391304348, |
| "grad_norm": 1.2200145721435547, |
| "learning_rate": 2.9999571924528263e-05, |
| "loss": 0.5592, |
| "step": 220 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 1.526785969734192, |
| "learning_rate": 2.9999497606284837e-05, |
| "loss": 0.756, |
| "step": 230 |
| }, |
| { |
| "epoch": 10.434782608695652, |
| "grad_norm": 1.4215515851974487, |
| "learning_rate": 2.9999417342719796e-05, |
| "loss": 0.7117, |
| "step": 240 |
| }, |
| { |
| "epoch": 10.869565217391305, |
| "grad_norm": 0.9789811372756958, |
| "learning_rate": 2.9999331133864956e-05, |
| "loss": 0.5896, |
| "step": 250 |
| }, |
| { |
| "epoch": 11.304347826086957, |
| "grad_norm": 1.1944794654846191, |
| "learning_rate": 2.9999238979754485e-05, |
| "loss": 0.6547, |
| "step": 260 |
| }, |
| { |
| "epoch": 11.73913043478261, |
| "grad_norm": 1.050191044807434, |
| "learning_rate": 2.999914088042492e-05, |
| "loss": 0.6475, |
| "step": 270 |
| }, |
| { |
| "epoch": 12.173913043478262, |
| "grad_norm": 1.3121248483657837, |
| "learning_rate": 2.9999036835915132e-05, |
| "loss": 0.594, |
| "step": 280 |
| }, |
| { |
| "epoch": 12.608695652173914, |
| "grad_norm": 1.082655906677246, |
| "learning_rate": 2.9998926846266365e-05, |
| "loss": 0.6326, |
| "step": 290 |
| }, |
| { |
| "epoch": 13.043478260869565, |
| "grad_norm": 1.3888633251190186, |
| "learning_rate": 2.9998810911522213e-05, |
| "loss": 0.5806, |
| "step": 300 |
| }, |
| { |
| "epoch": 13.043478260869565, |
| "eval_loss": 0.7309322357177734, |
| "eval_runtime": 0.5145, |
| "eval_samples_per_second": 19.436, |
| "eval_steps_per_second": 19.436, |
| "step": 300 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.554, |
| "Start_State_samples_per_second": 18.05, |
| "Start_State_steps_per_second": 18.05, |
| "epoch": 13.043478260869565, |
| "step": 300 |
| }, |
| { |
| "SWA_loss": 0.7309322357177734, |
| "SWA_runtime": 0.5629, |
| "SWA_samples_per_second": 17.765, |
| "SWA_steps_per_second": 17.765, |
| "epoch": 13.043478260869565, |
| "step": 300 |
| }, |
| { |
| "EMA_loss": 0.7309322357177734, |
| "EMA_runtime": 0.5474, |
| "EMA_samples_per_second": 18.268, |
| "EMA_steps_per_second": 18.268, |
| "epoch": 13.043478260869565, |
| "step": 300 |
| }, |
| { |
| "epoch": 13.478260869565217, |
| "grad_norm": 1.7805578708648682, |
| "learning_rate": 2.9998689031728636e-05, |
| "loss": 0.5145, |
| "step": 310 |
| }, |
| { |
| "epoch": 13.91304347826087, |
| "grad_norm": 1.533318042755127, |
| "learning_rate": 2.9998561206933938e-05, |
| "loss": 0.6497, |
| "step": 320 |
| }, |
| { |
| "epoch": 14.347826086956522, |
| "grad_norm": 1.4810696840286255, |
| "learning_rate": 2.9998427437188786e-05, |
| "loss": 0.5741, |
| "step": 330 |
| }, |
| { |
| "epoch": 14.782608695652174, |
| "grad_norm": 1.3101780414581299, |
| "learning_rate": 2.99982877225462e-05, |
| "loss": 0.6013, |
| "step": 340 |
| }, |
| { |
| "epoch": 15.217391304347826, |
| "grad_norm": 0.9747373461723328, |
| "learning_rate": 2.9998142063061564e-05, |
| "loss": 0.4991, |
| "step": 350 |
| }, |
| { |
| "epoch": 15.652173913043478, |
| "grad_norm": 1.6347649097442627, |
| "learning_rate": 2.9997990458792603e-05, |
| "loss": 0.5624, |
| "step": 360 |
| }, |
| { |
| "epoch": 16.08695652173913, |
| "grad_norm": 1.6364760398864746, |
| "learning_rate": 2.9997832909799417e-05, |
| "loss": 0.667, |
| "step": 370 |
| }, |
| { |
| "epoch": 16.52173913043478, |
| "grad_norm": 0.9518026113510132, |
| "learning_rate": 2.9997669416144452e-05, |
| "loss": 0.513, |
| "step": 380 |
| }, |
| { |
| "epoch": 16.956521739130434, |
| "grad_norm": 0.9366481304168701, |
| "learning_rate": 2.999749997789251e-05, |
| "loss": 0.5798, |
| "step": 390 |
| }, |
| { |
| "epoch": 17.391304347826086, |
| "grad_norm": 1.1163969039916992, |
| "learning_rate": 2.9997324595110743e-05, |
| "loss": 0.518, |
| "step": 400 |
| }, |
| { |
| "epoch": 17.82608695652174, |
| "grad_norm": 1.2849133014678955, |
| "learning_rate": 2.9997143267868683e-05, |
| "loss": 0.5877, |
| "step": 410 |
| }, |
| { |
| "epoch": 18.26086956521739, |
| "grad_norm": 1.1642106771469116, |
| "learning_rate": 2.9996955996238192e-05, |
| "loss": 0.506, |
| "step": 420 |
| }, |
| { |
| "epoch": 18.695652173913043, |
| "grad_norm": 1.1996164321899414, |
| "learning_rate": 2.9996762780293503e-05, |
| "loss": 0.5315, |
| "step": 430 |
| }, |
| { |
| "epoch": 19.130434782608695, |
| "grad_norm": 1.214064121246338, |
| "learning_rate": 2.9996563620111197e-05, |
| "loss": 0.5334, |
| "step": 440 |
| }, |
| { |
| "epoch": 19.565217391304348, |
| "grad_norm": 1.4286197423934937, |
| "learning_rate": 2.9996358515770218e-05, |
| "loss": 0.5677, |
| "step": 450 |
| }, |
| { |
| "epoch": 19.565217391304348, |
| "eval_loss": 0.717534065246582, |
| "eval_runtime": 0.5321, |
| "eval_samples_per_second": 18.792, |
| "eval_steps_per_second": 18.792, |
| "step": 450 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4001, |
| "Start_State_samples_per_second": 24.995, |
| "Start_State_steps_per_second": 24.995, |
| "epoch": 19.565217391304348, |
| "step": 450 |
| }, |
| { |
| "Raw_Model_loss": 0.717534065246582, |
| "Raw_Model_runtime": 0.4009, |
| "Raw_Model_samples_per_second": 24.944, |
| "Raw_Model_steps_per_second": 24.944, |
| "epoch": 19.565217391304348, |
| "step": 450 |
| }, |
| { |
| "SWA_loss": 0.7233762741088867, |
| "SWA_runtime": 0.3946, |
| "SWA_samples_per_second": 25.341, |
| "SWA_steps_per_second": 25.341, |
| "epoch": 19.565217391304348, |
| "step": 450 |
| }, |
| { |
| "EMA_loss": 0.7309414744377136, |
| "EMA_runtime": 0.4041, |
| "EMA_samples_per_second": 24.748, |
| "EMA_steps_per_second": 24.748, |
| "epoch": 19.565217391304348, |
| "step": 450 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 2.118807792663574, |
| "learning_rate": 2.9996147467351856e-05, |
| "loss": 0.5147, |
| "step": 460 |
| }, |
| { |
| "epoch": 20.434782608695652, |
| "grad_norm": 1.2774548530578613, |
| "learning_rate": 2.9995930474939773e-05, |
| "loss": 0.4785, |
| "step": 470 |
| }, |
| { |
| "epoch": 20.869565217391305, |
| "grad_norm": 1.4731013774871826, |
| "learning_rate": 2.9995707538619975e-05, |
| "loss": 0.5703, |
| "step": 480 |
| }, |
| { |
| "epoch": 21.304347826086957, |
| "grad_norm": 1.3251285552978516, |
| "learning_rate": 2.9995478658480822e-05, |
| "loss": 0.5164, |
| "step": 490 |
| }, |
| { |
| "epoch": 21.73913043478261, |
| "grad_norm": 1.2412965297698975, |
| "learning_rate": 2.9995243834613043e-05, |
| "loss": 0.5204, |
| "step": 500 |
| }, |
| { |
| "epoch": 22.17391304347826, |
| "grad_norm": 1.7840219736099243, |
| "learning_rate": 2.9995003067109707e-05, |
| "loss": 0.4838, |
| "step": 510 |
| }, |
| { |
| "epoch": 22.608695652173914, |
| "grad_norm": 1.5308188199996948, |
| "learning_rate": 2.9994756356066246e-05, |
| "loss": 0.5616, |
| "step": 520 |
| }, |
| { |
| "epoch": 23.043478260869566, |
| "grad_norm": 1.7345212697982788, |
| "learning_rate": 2.999450370158046e-05, |
| "loss": 0.4929, |
| "step": 530 |
| }, |
| { |
| "epoch": 23.47826086956522, |
| "grad_norm": 1.3111943006515503, |
| "learning_rate": 2.9994245103752478e-05, |
| "loss": 0.4384, |
| "step": 540 |
| }, |
| { |
| "epoch": 23.91304347826087, |
| "grad_norm": 1.234527349472046, |
| "learning_rate": 2.999398056268481e-05, |
| "loss": 0.5266, |
| "step": 550 |
| }, |
| { |
| "epoch": 24.347826086956523, |
| "grad_norm": 1.4057211875915527, |
| "learning_rate": 2.9993710078482306e-05, |
| "loss": 0.5204, |
| "step": 560 |
| }, |
| { |
| "epoch": 24.782608695652176, |
| "grad_norm": 0.9548116326332092, |
| "learning_rate": 2.9993433651252185e-05, |
| "loss": 0.4428, |
| "step": 570 |
| }, |
| { |
| "epoch": 25.217391304347824, |
| "grad_norm": 1.7164983749389648, |
| "learning_rate": 2.9993151281104006e-05, |
| "loss": 0.5329, |
| "step": 580 |
| }, |
| { |
| "epoch": 25.652173913043477, |
| "grad_norm": 1.1313426494598389, |
| "learning_rate": 2.9992862968149695e-05, |
| "loss": 0.4733, |
| "step": 590 |
| }, |
| { |
| "epoch": 26.08695652173913, |
| "grad_norm": 1.1755690574645996, |
| "learning_rate": 2.9992568712503533e-05, |
| "loss": 0.4607, |
| "step": 600 |
| }, |
| { |
| "epoch": 26.08695652173913, |
| "eval_loss": 0.7199033498764038, |
| "eval_runtime": 0.3999, |
| "eval_samples_per_second": 25.009, |
| "eval_steps_per_second": 25.009, |
| "step": 600 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.3949, |
| "Start_State_samples_per_second": 25.321, |
| "Start_State_steps_per_second": 25.321, |
| "epoch": 26.08695652173913, |
| "step": 600 |
| }, |
| { |
| "Raw_Model_loss": 0.7199033498764038, |
| "Raw_Model_runtime": 0.3918, |
| "Raw_Model_samples_per_second": 25.525, |
| "Raw_Model_steps_per_second": 25.525, |
| "epoch": 26.08695652173913, |
| "step": 600 |
| }, |
| { |
| "SWA_loss": 0.7180979251861572, |
| "SWA_runtime": 0.3964, |
| "SWA_samples_per_second": 25.23, |
| "SWA_steps_per_second": 25.23, |
| "epoch": 26.08695652173913, |
| "step": 600 |
| }, |
| { |
| "EMA_loss": 0.7309598326683044, |
| "EMA_runtime": 0.3932, |
| "EMA_samples_per_second": 25.43, |
| "EMA_steps_per_second": 25.43, |
| "epoch": 26.08695652173913, |
| "step": 600 |
| }, |
| { |
| "epoch": 26.52173913043478, |
| "grad_norm": 1.095847249031067, |
| "learning_rate": 2.9992268514282142e-05, |
| "loss": 0.5118, |
| "step": 610 |
| }, |
| { |
| "epoch": 26.956521739130434, |
| "grad_norm": 1.3382961750030518, |
| "learning_rate": 2.999196237360452e-05, |
| "loss": 0.4316, |
| "step": 620 |
| }, |
| { |
| "epoch": 27.391304347826086, |
| "grad_norm": 1.2023630142211914, |
| "learning_rate": 2.9991650290592016e-05, |
| "loss": 0.4756, |
| "step": 630 |
| }, |
| { |
| "epoch": 27.82608695652174, |
| "grad_norm": 1.3882129192352295, |
| "learning_rate": 2.999133226536832e-05, |
| "loss": 0.5011, |
| "step": 640 |
| }, |
| { |
| "epoch": 28.26086956521739, |
| "grad_norm": 1.4160760641098022, |
| "learning_rate": 2.9991008298059493e-05, |
| "loss": 0.4106, |
| "step": 650 |
| }, |
| { |
| "epoch": 28.695652173913043, |
| "grad_norm": 1.5552334785461426, |
| "learning_rate": 2.9990678388793944e-05, |
| "loss": 0.5064, |
| "step": 660 |
| }, |
| { |
| "epoch": 29.130434782608695, |
| "grad_norm": 1.3141825199127197, |
| "learning_rate": 2.999034253770244e-05, |
| "loss": 0.4349, |
| "step": 670 |
| }, |
| { |
| "epoch": 29.565217391304348, |
| "grad_norm": 1.0743430852890015, |
| "learning_rate": 2.9990000744918097e-05, |
| "loss": 0.4704, |
| "step": 680 |
| }, |
| { |
| "epoch": 30.0, |
| "grad_norm": 2.4982922077178955, |
| "learning_rate": 2.9989653010576392e-05, |
| "loss": 0.4144, |
| "step": 690 |
| }, |
| { |
| "epoch": 30.434782608695652, |
| "grad_norm": 1.3296608924865723, |
| "learning_rate": 2.9989299334815158e-05, |
| "loss": 0.4766, |
| "step": 700 |
| }, |
| { |
| "epoch": 30.869565217391305, |
| "grad_norm": 1.62749445438385, |
| "learning_rate": 2.9988939717774578e-05, |
| "loss": 0.412, |
| "step": 710 |
| }, |
| { |
| "epoch": 31.304347826086957, |
| "grad_norm": 0.9021294116973877, |
| "learning_rate": 2.9988574159597194e-05, |
| "loss": 0.4246, |
| "step": 720 |
| }, |
| { |
| "epoch": 31.73913043478261, |
| "grad_norm": 1.641708254814148, |
| "learning_rate": 2.9988202660427907e-05, |
| "loss": 0.4827, |
| "step": 730 |
| }, |
| { |
| "epoch": 32.17391304347826, |
| "grad_norm": 1.1983932256698608, |
| "learning_rate": 2.9987825220413958e-05, |
| "loss": 0.4382, |
| "step": 740 |
| }, |
| { |
| "epoch": 32.608695652173914, |
| "grad_norm": 1.765030026435852, |
| "learning_rate": 2.998744183970496e-05, |
| "loss": 0.4731, |
| "step": 750 |
| }, |
| { |
| "epoch": 32.608695652173914, |
| "eval_loss": 0.7314910888671875, |
| "eval_runtime": 0.4917, |
| "eval_samples_per_second": 20.337, |
| "eval_steps_per_second": 20.337, |
| "step": 750 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4719, |
| "Start_State_samples_per_second": 21.19, |
| "Start_State_steps_per_second": 21.19, |
| "epoch": 32.608695652173914, |
| "step": 750 |
| }, |
| { |
| "Raw_Model_loss": 0.7314910888671875, |
| "Raw_Model_runtime": 0.4163, |
| "Raw_Model_samples_per_second": 24.018, |
| "Raw_Model_steps_per_second": 24.018, |
| "epoch": 32.608695652173914, |
| "step": 750 |
| }, |
| { |
| "SWA_loss": 0.7177615761756897, |
| "SWA_runtime": 0.3941, |
| "SWA_samples_per_second": 25.374, |
| "SWA_steps_per_second": 25.374, |
| "epoch": 32.608695652173914, |
| "step": 750 |
| }, |
| { |
| "EMA_loss": 0.7303470969200134, |
| "EMA_runtime": 0.407, |
| "EMA_samples_per_second": 24.567, |
| "EMA_steps_per_second": 24.567, |
| "epoch": 32.608695652173914, |
| "step": 750 |
| }, |
| { |
| "epoch": 33.04347826086956, |
| "grad_norm": 1.495551347732544, |
| "learning_rate": 2.998705251845287e-05, |
| "loss": 0.4299, |
| "step": 760 |
| }, |
| { |
| "epoch": 33.47826086956522, |
| "grad_norm": 1.644679069519043, |
| "learning_rate": 2.9986657256812e-05, |
| "loss": 0.4302, |
| "step": 770 |
| }, |
| { |
| "epoch": 33.91304347826087, |
| "grad_norm": 1.3021020889282227, |
| "learning_rate": 2.9986256054939022e-05, |
| "loss": 0.4078, |
| "step": 780 |
| }, |
| { |
| "epoch": 34.34782608695652, |
| "grad_norm": 1.483847975730896, |
| "learning_rate": 2.9985848912992956e-05, |
| "loss": 0.4026, |
| "step": 790 |
| }, |
| { |
| "epoch": 34.78260869565217, |
| "grad_norm": 1.5579402446746826, |
| "learning_rate": 2.9985435831135184e-05, |
| "loss": 0.3833, |
| "step": 800 |
| }, |
| { |
| "epoch": 35.21739130434783, |
| "grad_norm": 1.3132578134536743, |
| "learning_rate": 2.9985016809529437e-05, |
| "loss": 0.4742, |
| "step": 810 |
| }, |
| { |
| "epoch": 35.65217391304348, |
| "grad_norm": 1.332205891609192, |
| "learning_rate": 2.9984591848341806e-05, |
| "loss": 0.4028, |
| "step": 820 |
| }, |
| { |
| "epoch": 36.08695652173913, |
| "grad_norm": 1.0762503147125244, |
| "learning_rate": 2.9984160947740723e-05, |
| "loss": 0.4181, |
| "step": 830 |
| }, |
| { |
| "epoch": 36.52173913043478, |
| "grad_norm": 1.1693116426467896, |
| "learning_rate": 2.9983724107896993e-05, |
| "loss": 0.3803, |
| "step": 840 |
| }, |
| { |
| "epoch": 36.95652173913044, |
| "grad_norm": 1.4850109815597534, |
| "learning_rate": 2.9983281328983757e-05, |
| "loss": 0.4498, |
| "step": 850 |
| }, |
| { |
| "epoch": 37.391304347826086, |
| "grad_norm": 1.8984379768371582, |
| "learning_rate": 2.9982832611176523e-05, |
| "loss": 0.4182, |
| "step": 860 |
| }, |
| { |
| "epoch": 37.82608695652174, |
| "grad_norm": 1.2748432159423828, |
| "learning_rate": 2.998237795465315e-05, |
| "loss": 0.3716, |
| "step": 870 |
| }, |
| { |
| "epoch": 38.26086956521739, |
| "grad_norm": 1.268835186958313, |
| "learning_rate": 2.9981917359593843e-05, |
| "loss": 0.4011, |
| "step": 880 |
| }, |
| { |
| "epoch": 38.69565217391305, |
| "grad_norm": 1.446075439453125, |
| "learning_rate": 2.9981450826181172e-05, |
| "loss": 0.3551, |
| "step": 890 |
| }, |
| { |
| "epoch": 39.130434782608695, |
| "grad_norm": 1.9400171041488647, |
| "learning_rate": 2.9980978354600057e-05, |
| "loss": 0.4631, |
| "step": 900 |
| }, |
| { |
| "epoch": 39.130434782608695, |
| "eval_loss": 0.7510843276977539, |
| "eval_runtime": 0.4747, |
| "eval_samples_per_second": 21.065, |
| "eval_steps_per_second": 21.065, |
| "step": 900 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4104, |
| "Start_State_samples_per_second": 24.364, |
| "Start_State_steps_per_second": 24.364, |
| "epoch": 39.130434782608695, |
| "step": 900 |
| }, |
| { |
| "Raw_Model_loss": 0.7510843276977539, |
| "Raw_Model_runtime": 0.4105, |
| "Raw_Model_samples_per_second": 24.36, |
| "Raw_Model_steps_per_second": 24.36, |
| "epoch": 39.130434782608695, |
| "step": 900 |
| }, |
| { |
| "SWA_loss": 0.7209179997444153, |
| "SWA_runtime": 0.4135, |
| "SWA_samples_per_second": 24.182, |
| "SWA_steps_per_second": 24.182, |
| "epoch": 39.130434782608695, |
| "step": 900 |
| }, |
| { |
| "EMA_loss": 0.7315307259559631, |
| "EMA_runtime": 0.4077, |
| "EMA_samples_per_second": 24.527, |
| "EMA_steps_per_second": 24.527, |
| "epoch": 39.130434782608695, |
| "step": 900 |
| }, |
| { |
| "epoch": 39.56521739130435, |
| "grad_norm": 1.5488810539245605, |
| "learning_rate": 2.9980499945037765e-05, |
| "loss": 0.3835, |
| "step": 910 |
| }, |
| { |
| "epoch": 40.0, |
| "grad_norm": 3.038804769515991, |
| "learning_rate": 2.998001559768393e-05, |
| "loss": 0.3862, |
| "step": 920 |
| }, |
| { |
| "epoch": 40.43478260869565, |
| "grad_norm": 1.543023705482483, |
| "learning_rate": 2.9979525312730525e-05, |
| "loss": 0.4491, |
| "step": 930 |
| }, |
| { |
| "epoch": 40.869565217391305, |
| "grad_norm": 1.6729778051376343, |
| "learning_rate": 2.9979029090371885e-05, |
| "loss": 0.3409, |
| "step": 940 |
| }, |
| { |
| "epoch": 41.30434782608695, |
| "grad_norm": 2.224083662033081, |
| "learning_rate": 2.99785269308047e-05, |
| "loss": 0.3417, |
| "step": 950 |
| }, |
| { |
| "epoch": 41.73913043478261, |
| "grad_norm": 1.5069278478622437, |
| "learning_rate": 2.9978018834228007e-05, |
| "loss": 0.3647, |
| "step": 960 |
| }, |
| { |
| "epoch": 42.17391304347826, |
| "grad_norm": 1.5148930549621582, |
| "learning_rate": 2.9977504800843197e-05, |
| "loss": 0.4348, |
| "step": 970 |
| }, |
| { |
| "epoch": 42.608695652173914, |
| "grad_norm": 1.5450372695922852, |
| "learning_rate": 2.9976984830854022e-05, |
| "loss": 0.3751, |
| "step": 980 |
| }, |
| { |
| "epoch": 43.04347826086956, |
| "grad_norm": 1.6492244005203247, |
| "learning_rate": 2.997645892446658e-05, |
| "loss": 0.3672, |
| "step": 990 |
| }, |
| { |
| "epoch": 43.47826086956522, |
| "grad_norm": 1.4176095724105835, |
| "learning_rate": 2.9975927081889322e-05, |
| "loss": 0.3908, |
| "step": 1000 |
| }, |
| { |
| "epoch": 43.91304347826087, |
| "grad_norm": 1.192176342010498, |
| "learning_rate": 2.9975389303333047e-05, |
| "loss": 0.3461, |
| "step": 1010 |
| }, |
| { |
| "epoch": 44.34782608695652, |
| "grad_norm": 2.0878190994262695, |
| "learning_rate": 2.997484558901093e-05, |
| "loss": 0.3918, |
| "step": 1020 |
| }, |
| { |
| "epoch": 44.78260869565217, |
| "grad_norm": 1.6980842351913452, |
| "learning_rate": 2.9974295939138465e-05, |
| "loss": 0.3809, |
| "step": 1030 |
| }, |
| { |
| "epoch": 45.21739130434783, |
| "grad_norm": 1.2522655725479126, |
| "learning_rate": 2.9973740353933523e-05, |
| "loss": 0.2649, |
| "step": 1040 |
| }, |
| { |
| "epoch": 45.65217391304348, |
| "grad_norm": 1.678786039352417, |
| "learning_rate": 2.997317883361632e-05, |
| "loss": 0.3613, |
| "step": 1050 |
| }, |
| { |
| "epoch": 45.65217391304348, |
| "eval_loss": 0.777652382850647, |
| "eval_runtime": 0.507, |
| "eval_samples_per_second": 19.723, |
| "eval_steps_per_second": 19.723, |
| "step": 1050 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.452, |
| "Start_State_samples_per_second": 22.123, |
| "Start_State_steps_per_second": 22.123, |
| "epoch": 45.65217391304348, |
| "step": 1050 |
| }, |
| { |
| "Raw_Model_loss": 0.777652382850647, |
| "Raw_Model_runtime": 0.4426, |
| "Raw_Model_samples_per_second": 22.594, |
| "Raw_Model_steps_per_second": 22.594, |
| "epoch": 45.65217391304348, |
| "step": 1050 |
| }, |
| { |
| "SWA_loss": 0.7228068709373474, |
| "SWA_runtime": 0.4408, |
| "SWA_samples_per_second": 22.684, |
| "SWA_steps_per_second": 22.684, |
| "epoch": 45.65217391304348, |
| "step": 1050 |
| }, |
| { |
| "EMA_loss": 0.7306644320487976, |
| "EMA_runtime": 0.3993, |
| "EMA_samples_per_second": 25.044, |
| "EMA_steps_per_second": 25.044, |
| "epoch": 45.65217391304348, |
| "step": 1050 |
| }, |
| { |
| "epoch": 46.08695652173913, |
| "grad_norm": 1.7957395315170288, |
| "learning_rate": 2.997261137840943e-05, |
| "loss": 0.4103, |
| "step": 1060 |
| }, |
| { |
| "epoch": 46.52173913043478, |
| "grad_norm": 2.156790256500244, |
| "learning_rate": 2.9972037988537758e-05, |
| "loss": 0.3785, |
| "step": 1070 |
| }, |
| { |
| "epoch": 46.95652173913044, |
| "grad_norm": 1.9486017227172852, |
| "learning_rate": 2.9971458664228595e-05, |
| "loss": 0.3324, |
| "step": 1080 |
| }, |
| { |
| "epoch": 47.391304347826086, |
| "grad_norm": 2.1510581970214844, |
| "learning_rate": 2.997087340571156e-05, |
| "loss": 0.3368, |
| "step": 1090 |
| }, |
| { |
| "epoch": 47.82608695652174, |
| "grad_norm": 1.5172206163406372, |
| "learning_rate": 2.997028221321863e-05, |
| "loss": 0.3563, |
| "step": 1100 |
| }, |
| { |
| "epoch": 48.26086956521739, |
| "grad_norm": 2.3161354064941406, |
| "learning_rate": 2.9969685086984132e-05, |
| "loss": 0.3734, |
| "step": 1110 |
| }, |
| { |
| "epoch": 48.69565217391305, |
| "grad_norm": 1.6685658693313599, |
| "learning_rate": 2.9969082027244755e-05, |
| "loss": 0.3001, |
| "step": 1120 |
| }, |
| { |
| "epoch": 49.130434782608695, |
| "grad_norm": 1.843396782875061, |
| "learning_rate": 2.996847303423953e-05, |
| "loss": 0.4154, |
| "step": 1130 |
| }, |
| { |
| "epoch": 49.56521739130435, |
| "grad_norm": 1.3093624114990234, |
| "learning_rate": 2.9967858108209838e-05, |
| "loss": 0.3713, |
| "step": 1140 |
| }, |
| { |
| "epoch": 50.0, |
| "grad_norm": 2.944302797317505, |
| "learning_rate": 2.9967237249399417e-05, |
| "loss": 0.292, |
| "step": 1150 |
| }, |
| { |
| "epoch": 50.43478260869565, |
| "grad_norm": 1.5263242721557617, |
| "learning_rate": 2.996661045805436e-05, |
| "loss": 0.2961, |
| "step": 1160 |
| }, |
| { |
| "epoch": 50.869565217391305, |
| "grad_norm": 1.8892343044281006, |
| "learning_rate": 2.9965977734423106e-05, |
| "loss": 0.3417, |
| "step": 1170 |
| }, |
| { |
| "epoch": 51.30434782608695, |
| "grad_norm": 1.9361391067504883, |
| "learning_rate": 2.9965339078756445e-05, |
| "loss": 0.3541, |
| "step": 1180 |
| }, |
| { |
| "epoch": 51.73913043478261, |
| "grad_norm": 1.1996322870254517, |
| "learning_rate": 2.9964694491307514e-05, |
| "loss": 0.2802, |
| "step": 1190 |
| }, |
| { |
| "epoch": 52.17391304347826, |
| "grad_norm": 2.5254251956939697, |
| "learning_rate": 2.996404397233182e-05, |
| "loss": 0.4085, |
| "step": 1200 |
| }, |
| { |
| "epoch": 52.17391304347826, |
| "eval_loss": 0.8014206886291504, |
| "eval_runtime": 0.508, |
| "eval_samples_per_second": 19.686, |
| "eval_steps_per_second": 19.686, |
| "step": 1200 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4, |
| "Start_State_samples_per_second": 25.001, |
| "Start_State_steps_per_second": 25.001, |
| "epoch": 52.17391304347826, |
| "step": 1200 |
| }, |
| { |
| "Raw_Model_loss": 0.8014206886291504, |
| "Raw_Model_runtime": 0.4013, |
| "Raw_Model_samples_per_second": 24.922, |
| "Raw_Model_steps_per_second": 24.922, |
| "epoch": 52.17391304347826, |
| "step": 1200 |
| }, |
| { |
| "SWA_loss": 0.7281149625778198, |
| "SWA_runtime": 0.4076, |
| "SWA_samples_per_second": 24.535, |
| "SWA_steps_per_second": 24.535, |
| "epoch": 52.17391304347826, |
| "step": 1200 |
| }, |
| { |
| "EMA_loss": 0.7309959530830383, |
| "EMA_runtime": 0.415, |
| "EMA_samples_per_second": 24.094, |
| "EMA_steps_per_second": 24.094, |
| "epoch": 52.17391304347826, |
| "step": 1200 |
| }, |
| { |
| "epoch": 52.608695652173914, |
| "grad_norm": 1.616297721862793, |
| "learning_rate": 1.4982021986165911e-06, |
| "loss": 0.2865, |
| "step": 1210 |
| }, |
| { |
| "epoch": 53.04347826086956, |
| "grad_norm": 1.7724196910858154, |
| "learning_rate": 2.9964043972331822e-06, |
| "loss": 0.3649, |
| "step": 1220 |
| }, |
| { |
| "epoch": 53.47826086956522, |
| "grad_norm": 1.3953560590744019, |
| "learning_rate": 4.494606595849773e-06, |
| "loss": 0.2719, |
| "step": 1230 |
| }, |
| { |
| "epoch": 53.91304347826087, |
| "grad_norm": 1.9502956867218018, |
| "learning_rate": 5.9928087944663644e-06, |
| "loss": 0.334, |
| "step": 1240 |
| }, |
| { |
| "epoch": 54.34782608695652, |
| "grad_norm": 1.9493101835250854, |
| "learning_rate": 7.491010993082955e-06, |
| "loss": 0.385, |
| "step": 1250 |
| }, |
| { |
| "epoch": 54.78260869565217, |
| "grad_norm": 1.1656595468521118, |
| "learning_rate": 8.989213191699545e-06, |
| "loss": 0.284, |
| "step": 1260 |
| }, |
| { |
| "epoch": 55.21739130434783, |
| "grad_norm": 1.5772318840026855, |
| "learning_rate": 1.0487415390316136e-05, |
| "loss": 0.3105, |
| "step": 1270 |
| }, |
| { |
| "epoch": 55.65217391304348, |
| "grad_norm": 1.708022117614746, |
| "learning_rate": 1.1985617588932729e-05, |
| "loss": 0.3553, |
| "step": 1280 |
| }, |
| { |
| "epoch": 56.08695652173913, |
| "grad_norm": 2.282125473022461, |
| "learning_rate": 1.348381978754932e-05, |
| "loss": 0.2844, |
| "step": 1290 |
| }, |
| { |
| "epoch": 56.52173913043478, |
| "grad_norm": 1.458001971244812, |
| "learning_rate": 1.498202198616591e-05, |
| "loss": 0.3387, |
| "step": 1300 |
| }, |
| { |
| "epoch": 56.95652173913044, |
| "grad_norm": 1.9748072624206543, |
| "learning_rate": 1.4982020501567203e-05, |
| "loss": 0.3318, |
| "step": 1310 |
| }, |
| { |
| "epoch": 57.391304347826086, |
| "grad_norm": 1.4179987907409668, |
| "learning_rate": 1.4982016047771664e-05, |
| "loss": 0.3109, |
| "step": 1320 |
| }, |
| { |
| "epoch": 57.82608695652174, |
| "grad_norm": 2.815448522567749, |
| "learning_rate": 1.4982008624781062e-05, |
| "loss": 0.3369, |
| "step": 1330 |
| }, |
| { |
| "epoch": 58.26086956521739, |
| "grad_norm": 1.4394376277923584, |
| "learning_rate": 1.4981998232598337e-05, |
| "loss": 0.3303, |
| "step": 1340 |
| }, |
| { |
| "epoch": 58.69565217391305, |
| "grad_norm": 1.8707002401351929, |
| "learning_rate": 1.4981984871227611e-05, |
| "loss": 0.3077, |
| "step": 1350 |
| }, |
| { |
| "epoch": 58.69565217391305, |
| "eval_loss": 0.8195747137069702, |
| "eval_runtime": 0.4109, |
| "eval_samples_per_second": 24.335, |
| "eval_steps_per_second": 24.335, |
| "step": 1350 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.3897, |
| "Start_State_samples_per_second": 25.658, |
| "Start_State_steps_per_second": 25.658, |
| "epoch": 58.69565217391305, |
| "step": 1350 |
| }, |
| { |
| "Raw_Model_loss": 0.8195747137069702, |
| "Raw_Model_runtime": 0.3979, |
| "Raw_Model_samples_per_second": 25.132, |
| "Raw_Model_steps_per_second": 25.132, |
| "epoch": 58.69565217391305, |
| "step": 1350 |
| }, |
| { |
| "SWA_loss": 0.7320815324783325, |
| "SWA_runtime": 0.395, |
| "SWA_samples_per_second": 25.319, |
| "SWA_steps_per_second": 25.319, |
| "epoch": 58.69565217391305, |
| "step": 1350 |
| }, |
| { |
| "EMA_loss": 0.7313606142997742, |
| "EMA_runtime": 0.3882, |
| "EMA_samples_per_second": 25.759, |
| "EMA_steps_per_second": 25.759, |
| "epoch": 58.69565217391305, |
| "step": 1350 |
| }, |
| { |
| "epoch": 59.130434782608695, |
| "grad_norm": 1.5965052843093872, |
| "learning_rate": 1.4981968540674177e-05, |
| "loss": 0.3206, |
| "step": 1360 |
| }, |
| { |
| "epoch": 59.56521739130435, |
| "grad_norm": 1.3822482824325562, |
| "learning_rate": 1.4981949240944509e-05, |
| "loss": 0.3011, |
| "step": 1370 |
| }, |
| { |
| "epoch": 60.0, |
| "grad_norm": 1.6288312673568726, |
| "learning_rate": 1.4981926972046258e-05, |
| "loss": 0.3095, |
| "step": 1380 |
| }, |
| { |
| "epoch": 60.43478260869565, |
| "grad_norm": 1.9036870002746582, |
| "learning_rate": 1.498190173398825e-05, |
| "loss": 0.3173, |
| "step": 1390 |
| }, |
| { |
| "epoch": 60.869565217391305, |
| "grad_norm": 1.5387356281280518, |
| "learning_rate": 1.4981873526780487e-05, |
| "loss": 0.3054, |
| "step": 1400 |
| }, |
| { |
| "epoch": 61.30434782608695, |
| "grad_norm": 1.4343056678771973, |
| "learning_rate": 1.4981842350434152e-05, |
| "loss": 0.3046, |
| "step": 1410 |
| }, |
| { |
| "epoch": 61.73913043478261, |
| "grad_norm": 1.4938664436340332, |
| "learning_rate": 1.49818082049616e-05, |
| "loss": 0.3205, |
| "step": 1420 |
| }, |
| { |
| "epoch": 62.17391304347826, |
| "grad_norm": 2.177480459213257, |
| "learning_rate": 1.4981771090376367e-05, |
| "loss": 0.2865, |
| "step": 1430 |
| }, |
| { |
| "epoch": 62.608695652173914, |
| "grad_norm": 1.8865878582000732, |
| "learning_rate": 1.4981731006693164e-05, |
| "loss": 0.3213, |
| "step": 1440 |
| }, |
| { |
| "epoch": 63.04347826086956, |
| "grad_norm": 1.3152176141738892, |
| "learning_rate": 1.4981687953927875e-05, |
| "loss": 0.3125, |
| "step": 1450 |
| }, |
| { |
| "epoch": 63.47826086956522, |
| "grad_norm": 1.9965901374816895, |
| "learning_rate": 1.498164193209757e-05, |
| "loss": 0.345, |
| "step": 1460 |
| }, |
| { |
| "epoch": 63.91304347826087, |
| "grad_norm": 1.6480698585510254, |
| "learning_rate": 1.498159294122049e-05, |
| "loss": 0.2924, |
| "step": 1470 |
| }, |
| { |
| "epoch": 64.34782608695652, |
| "grad_norm": 1.8093769550323486, |
| "learning_rate": 1.4981540981316052e-05, |
| "loss": 0.2688, |
| "step": 1480 |
| }, |
| { |
| "epoch": 64.78260869565217, |
| "grad_norm": 1.529961347579956, |
| "learning_rate": 1.4981486052404848e-05, |
| "loss": 0.3585, |
| "step": 1490 |
| }, |
| { |
| "epoch": 65.21739130434783, |
| "grad_norm": 1.4079116582870483, |
| "learning_rate": 1.4981428154508652e-05, |
| "loss": 0.269, |
| "step": 1500 |
| }, |
| { |
| "epoch": 65.21739130434783, |
| "eval_loss": 0.8343552350997925, |
| "eval_runtime": 0.4105, |
| "eval_samples_per_second": 24.363, |
| "eval_steps_per_second": 24.363, |
| "step": 1500 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4152, |
| "Start_State_samples_per_second": 24.085, |
| "Start_State_steps_per_second": 24.085, |
| "epoch": 65.21739130434783, |
| "step": 1500 |
| }, |
| { |
| "Raw_Model_loss": 0.8343552350997925, |
| "Raw_Model_runtime": 0.399, |
| "Raw_Model_samples_per_second": 25.06, |
| "Raw_Model_steps_per_second": 25.06, |
| "epoch": 65.21739130434783, |
| "step": 1500 |
| }, |
| { |
| "SWA_loss": 0.7395024299621582, |
| "SWA_runtime": 0.4083, |
| "SWA_samples_per_second": 24.489, |
| "SWA_steps_per_second": 24.489, |
| "epoch": 65.21739130434783, |
| "step": 1500 |
| }, |
| { |
| "EMA_loss": 0.7315851449966431, |
| "EMA_runtime": 0.4003, |
| "EMA_samples_per_second": 24.979, |
| "EMA_steps_per_second": 24.979, |
| "epoch": 65.21739130434783, |
| "step": 1500 |
| }, |
| { |
| "epoch": 65.65217391304348, |
| "grad_norm": 2.2492856979370117, |
| "learning_rate": 1.4981367287650419e-05, |
| "loss": 0.3161, |
| "step": 1510 |
| }, |
| { |
| "epoch": 66.08695652173913, |
| "grad_norm": 1.7571766376495361, |
| "learning_rate": 1.4981303451854267e-05, |
| "loss": 0.2947, |
| "step": 1520 |
| }, |
| { |
| "epoch": 66.52173913043478, |
| "grad_norm": 1.7509160041809082, |
| "learning_rate": 1.4981236647145501e-05, |
| "loss": 0.3107, |
| "step": 1530 |
| }, |
| { |
| "epoch": 66.95652173913044, |
| "grad_norm": 2.094277858734131, |
| "learning_rate": 1.4981166873550601e-05, |
| "loss": 0.3051, |
| "step": 1540 |
| }, |
| { |
| "epoch": 67.3913043478261, |
| "grad_norm": 1.7601019144058228, |
| "learning_rate": 1.4981094131097224e-05, |
| "loss": 0.2711, |
| "step": 1550 |
| }, |
| { |
| "epoch": 67.82608695652173, |
| "grad_norm": 2.0073230266571045, |
| "learning_rate": 1.49810184198142e-05, |
| "loss": 0.3434, |
| "step": 1560 |
| }, |
| { |
| "epoch": 68.26086956521739, |
| "grad_norm": 2.084998846054077, |
| "learning_rate": 1.498093973973154e-05, |
| "loss": 0.2506, |
| "step": 1570 |
| }, |
| { |
| "epoch": 68.69565217391305, |
| "grad_norm": 1.8126795291900635, |
| "learning_rate": 1.4980858090880429e-05, |
| "loss": 0.286, |
| "step": 1580 |
| }, |
| { |
| "epoch": 69.1304347826087, |
| "grad_norm": 1.9416148662567139, |
| "learning_rate": 1.4980773473293232e-05, |
| "loss": 0.3681, |
| "step": 1590 |
| }, |
| { |
| "epoch": 69.56521739130434, |
| "grad_norm": 1.978805422782898, |
| "learning_rate": 1.4980685887003486e-05, |
| "loss": 0.3073, |
| "step": 1600 |
| }, |
| { |
| "epoch": 70.0, |
| "grad_norm": 1.6534956693649292, |
| "learning_rate": 1.498059533204591e-05, |
| "loss": 0.2691, |
| "step": 1610 |
| }, |
| { |
| "epoch": 70.43478260869566, |
| "grad_norm": 2.2284836769104004, |
| "learning_rate": 1.4980501808456398e-05, |
| "loss": 0.3139, |
| "step": 1620 |
| }, |
| { |
| "epoch": 70.8695652173913, |
| "grad_norm": 1.9585868120193481, |
| "learning_rate": 1.4980405316272018e-05, |
| "loss": 0.2997, |
| "step": 1630 |
| }, |
| { |
| "epoch": 71.30434782608695, |
| "grad_norm": 2.346238851547241, |
| "learning_rate": 1.4980305855531015e-05, |
| "loss": 0.2891, |
| "step": 1640 |
| }, |
| { |
| "epoch": 71.73913043478261, |
| "grad_norm": 1.851641058921814, |
| "learning_rate": 1.4980203426272815e-05, |
| "loss": 0.2627, |
| "step": 1650 |
| }, |
| { |
| "epoch": 71.73913043478261, |
| "eval_loss": 0.8489276766777039, |
| "eval_runtime": 0.4811, |
| "eval_samples_per_second": 20.784, |
| "eval_steps_per_second": 20.784, |
| "step": 1650 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4395, |
| "Start_State_samples_per_second": 22.752, |
| "Start_State_steps_per_second": 22.752, |
| "epoch": 71.73913043478261, |
| "step": 1650 |
| }, |
| { |
| "Raw_Model_loss": 0.8489276766777039, |
| "Raw_Model_runtime": 0.4362, |
| "Raw_Model_samples_per_second": 22.928, |
| "Raw_Model_steps_per_second": 22.928, |
| "epoch": 71.73913043478261, |
| "step": 1650 |
| }, |
| { |
| "SWA_loss": 0.7444645166397095, |
| "SWA_runtime": 0.4365, |
| "SWA_samples_per_second": 22.911, |
| "SWA_steps_per_second": 22.911, |
| "epoch": 71.73913043478261, |
| "step": 1650 |
| }, |
| { |
| "EMA_loss": 0.7310957312583923, |
| "EMA_runtime": 0.4342, |
| "EMA_samples_per_second": 23.032, |
| "EMA_steps_per_second": 23.032, |
| "epoch": 71.73913043478261, |
| "step": 1650 |
| }, |
| { |
| "epoch": 72.17391304347827, |
| "grad_norm": 1.65473210811615, |
| "learning_rate": 1.4980098028538014e-05, |
| "loss": 0.328, |
| "step": 1660 |
| }, |
| { |
| "epoch": 72.6086956521739, |
| "grad_norm": 2.0884604454040527, |
| "learning_rate": 1.4979989662368391e-05, |
| "loss": 0.2959, |
| "step": 1670 |
| }, |
| { |
| "epoch": 73.04347826086956, |
| "grad_norm": 1.906488299369812, |
| "learning_rate": 1.4979878327806899e-05, |
| "loss": 0.3098, |
| "step": 1680 |
| }, |
| { |
| "epoch": 73.47826086956522, |
| "grad_norm": 2.01023530960083, |
| "learning_rate": 1.4979764024897668e-05, |
| "loss": 0.2878, |
| "step": 1690 |
| }, |
| { |
| "epoch": 73.91304347826087, |
| "grad_norm": 1.8358246088027954, |
| "learning_rate": 1.4979646753686002e-05, |
| "loss": 0.2796, |
| "step": 1700 |
| }, |
| { |
| "epoch": 74.34782608695652, |
| "grad_norm": 1.3833634853363037, |
| "learning_rate": 1.4979526514218385e-05, |
| "loss": 0.2769, |
| "step": 1710 |
| }, |
| { |
| "epoch": 74.78260869565217, |
| "grad_norm": 1.5111050605773926, |
| "learning_rate": 1.4979403306542473e-05, |
| "loss": 0.3278, |
| "step": 1720 |
| }, |
| { |
| "epoch": 75.21739130434783, |
| "grad_norm": 1.5712664127349854, |
| "learning_rate": 1.4979277130707107e-05, |
| "loss": 0.2338, |
| "step": 1730 |
| }, |
| { |
| "epoch": 75.65217391304348, |
| "grad_norm": 1.660670280456543, |
| "learning_rate": 1.4979147986762295e-05, |
| "loss": 0.3144, |
| "step": 1740 |
| }, |
| { |
| "epoch": 76.08695652173913, |
| "grad_norm": 1.8221240043640137, |
| "learning_rate": 1.4979015874759227e-05, |
| "loss": 0.2694, |
| "step": 1750 |
| }, |
| { |
| "epoch": 76.52173913043478, |
| "grad_norm": 1.8922370672225952, |
| "learning_rate": 1.4978880794750266e-05, |
| "loss": 0.2665, |
| "step": 1760 |
| }, |
| { |
| "epoch": 76.95652173913044, |
| "grad_norm": 1.296356201171875, |
| "learning_rate": 1.4978742746788957e-05, |
| "loss": 0.3007, |
| "step": 1770 |
| }, |
| { |
| "epoch": 77.3913043478261, |
| "grad_norm": 1.8244571685791016, |
| "learning_rate": 1.4978601730930014e-05, |
| "loss": 0.2842, |
| "step": 1780 |
| }, |
| { |
| "epoch": 77.82608695652173, |
| "grad_norm": 1.8345180749893188, |
| "learning_rate": 1.4978457747229335e-05, |
| "loss": 0.2714, |
| "step": 1790 |
| }, |
| { |
| "epoch": 78.26086956521739, |
| "grad_norm": 1.850252389907837, |
| "learning_rate": 1.497831079574399e-05, |
| "loss": 0.3055, |
| "step": 1800 |
| }, |
| { |
| "epoch": 78.26086956521739, |
| "eval_loss": 0.8643280267715454, |
| "eval_runtime": 0.4359, |
| "eval_samples_per_second": 22.94, |
| "eval_steps_per_second": 22.94, |
| "step": 1800 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4203, |
| "Start_State_samples_per_second": 23.79, |
| "Start_State_steps_per_second": 23.79, |
| "epoch": 78.26086956521739, |
| "step": 1800 |
| }, |
| { |
| "Raw_Model_loss": 0.8643280267715454, |
| "Raw_Model_runtime": 0.4033, |
| "Raw_Model_samples_per_second": 24.798, |
| "Raw_Model_steps_per_second": 24.798, |
| "epoch": 78.26086956521739, |
| "step": 1800 |
| }, |
| { |
| "SWA_loss": 0.7512942552566528, |
| "SWA_runtime": 0.3891, |
| "SWA_samples_per_second": 25.698, |
| "SWA_steps_per_second": 25.698, |
| "epoch": 78.26086956521739, |
| "step": 1800 |
| }, |
| { |
| "EMA_loss": 0.7310723066329956, |
| "EMA_runtime": 0.3904, |
| "EMA_samples_per_second": 25.613, |
| "EMA_steps_per_second": 25.613, |
| "epoch": 78.26086956521739, |
| "step": 1800 |
| }, |
| { |
| "epoch": 78.69565217391305, |
| "grad_norm": 1.2641412019729614, |
| "learning_rate": 1.4978160876532222e-05, |
| "loss": 0.2824, |
| "step": 1810 |
| }, |
| { |
| "epoch": 79.1304347826087, |
| "grad_norm": 1.9765238761901855, |
| "learning_rate": 1.4978007989653455e-05, |
| "loss": 0.2406, |
| "step": 1820 |
| }, |
| { |
| "epoch": 79.56521739130434, |
| "grad_norm": 1.5835498571395874, |
| "learning_rate": 1.4977852135168293e-05, |
| "loss": 0.2607, |
| "step": 1830 |
| }, |
| { |
| "epoch": 80.0, |
| "grad_norm": 1.8932580947875977, |
| "learning_rate": 1.4977693313138507e-05, |
| "loss": 0.3036, |
| "step": 1840 |
| }, |
| { |
| "epoch": 80.43478260869566, |
| "grad_norm": 2.1030030250549316, |
| "learning_rate": 1.4977531523627054e-05, |
| "loss": 0.2799, |
| "step": 1850 |
| }, |
| { |
| "epoch": 80.8695652173913, |
| "grad_norm": 1.2366570234298706, |
| "learning_rate": 1.4977366766698058e-05, |
| "loss": 0.2792, |
| "step": 1860 |
| }, |
| { |
| "epoch": 81.30434782608695, |
| "grad_norm": 1.5485888719558716, |
| "learning_rate": 1.4977199042416822e-05, |
| "loss": 0.2311, |
| "step": 1870 |
| }, |
| { |
| "epoch": 81.73913043478261, |
| "grad_norm": 1.5375139713287354, |
| "learning_rate": 1.4977028350849831e-05, |
| "loss": 0.3059, |
| "step": 1880 |
| }, |
| { |
| "epoch": 82.17391304347827, |
| "grad_norm": 1.6247549057006836, |
| "learning_rate": 1.4976854692064739e-05, |
| "loss": 0.2147, |
| "step": 1890 |
| }, |
| { |
| "epoch": 82.6086956521739, |
| "grad_norm": 1.8154581785202026, |
| "learning_rate": 1.497667806613038e-05, |
| "loss": 0.2594, |
| "step": 1900 |
| }, |
| { |
| "epoch": 83.04347826086956, |
| "grad_norm": 1.579021692276001, |
| "learning_rate": 1.497649847311676e-05, |
| "loss": 0.3002, |
| "step": 1910 |
| }, |
| { |
| "epoch": 83.47826086956522, |
| "grad_norm": 1.4831469058990479, |
| "learning_rate": 1.4976315913095068e-05, |
| "loss": 0.265, |
| "step": 1920 |
| }, |
| { |
| "epoch": 83.91304347826087, |
| "grad_norm": 2.305431842803955, |
| "learning_rate": 1.4976130386137666e-05, |
| "loss": 0.3039, |
| "step": 1930 |
| }, |
| { |
| "epoch": 84.34782608695652, |
| "grad_norm": 1.720330834388733, |
| "learning_rate": 1.4975941892318084e-05, |
| "loss": 0.2642, |
| "step": 1940 |
| }, |
| { |
| "epoch": 84.78260869565217, |
| "grad_norm": 2.2541563510894775, |
| "learning_rate": 1.497575043171104e-05, |
| "loss": 0.2798, |
| "step": 1950 |
| }, |
| { |
| "epoch": 84.78260869565217, |
| "eval_loss": 0.8888376355171204, |
| "eval_runtime": 0.4413, |
| "eval_samples_per_second": 22.658, |
| "eval_steps_per_second": 22.658, |
| "step": 1950 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4289, |
| "Start_State_samples_per_second": 23.318, |
| "Start_State_steps_per_second": 23.318, |
| "epoch": 84.78260869565217, |
| "step": 1950 |
| }, |
| { |
| "Raw_Model_loss": 0.8888376355171204, |
| "Raw_Model_runtime": 0.427, |
| "Raw_Model_samples_per_second": 23.417, |
| "Raw_Model_steps_per_second": 23.417, |
| "epoch": 84.78260869565217, |
| "step": 1950 |
| }, |
| { |
| "SWA_loss": 0.7555862665176392, |
| "SWA_runtime": 0.402, |
| "SWA_samples_per_second": 24.875, |
| "SWA_steps_per_second": 24.875, |
| "epoch": 84.78260869565217, |
| "step": 1950 |
| }, |
| { |
| "EMA_loss": 0.7317630052566528, |
| "EMA_runtime": 0.3962, |
| "EMA_samples_per_second": 25.24, |
| "EMA_steps_per_second": 25.24, |
| "epoch": 84.78260869565217, |
| "step": 1950 |
| }, |
| { |
| "epoch": 85.21739130434783, |
| "grad_norm": 2.295459270477295, |
| "learning_rate": 7.487875215855521e-07, |
| "loss": 0.2648, |
| "step": 1960 |
| }, |
| { |
| "epoch": 85.65217391304348, |
| "grad_norm": 1.9124552011489868, |
| "learning_rate": 1.4975750431711041e-06, |
| "loss": 0.2705, |
| "step": 1970 |
| }, |
| { |
| "epoch": 86.08695652173913, |
| "grad_norm": 2.192692756652832, |
| "learning_rate": 2.2463625647566557e-06, |
| "loss": 0.2532, |
| "step": 1980 |
| }, |
| { |
| "epoch": 86.52173913043478, |
| "grad_norm": 1.792695164680481, |
| "learning_rate": 2.9951500863422082e-06, |
| "loss": 0.2765, |
| "step": 1990 |
| }, |
| { |
| "epoch": 86.95652173913044, |
| "grad_norm": 2.0215790271759033, |
| "learning_rate": 3.74393760792776e-06, |
| "loss": 0.2769, |
| "step": 2000 |
| }, |
| { |
| "epoch": 87.3913043478261, |
| "grad_norm": 1.4278439283370972, |
| "learning_rate": 4.4927251295133115e-06, |
| "loss": 0.278, |
| "step": 2010 |
| }, |
| { |
| "epoch": 87.82608695652173, |
| "grad_norm": 1.9748132228851318, |
| "learning_rate": 5.241512651098863e-06, |
| "loss": 0.2587, |
| "step": 2020 |
| }, |
| { |
| "epoch": 88.26086956521739, |
| "grad_norm": 2.0187323093414307, |
| "learning_rate": 5.9903001726844164e-06, |
| "loss": 0.2613, |
| "step": 2030 |
| }, |
| { |
| "epoch": 88.69565217391305, |
| "grad_norm": 1.7434452772140503, |
| "learning_rate": 6.739087694269968e-06, |
| "loss": 0.2851, |
| "step": 2040 |
| }, |
| { |
| "epoch": 89.1304347826087, |
| "grad_norm": 1.828153371810913, |
| "learning_rate": 7.48787521585552e-06, |
| "loss": 0.2918, |
| "step": 2050 |
| }, |
| { |
| "epoch": 89.56521739130434, |
| "grad_norm": 1.5711168050765991, |
| "learning_rate": 7.487874473866896e-06, |
| "loss": 0.247, |
| "step": 2060 |
| }, |
| { |
| "epoch": 90.0, |
| "grad_norm": 1.6228244304656982, |
| "learning_rate": 7.487872247901318e-06, |
| "loss": 0.2522, |
| "step": 2070 |
| }, |
| { |
| "epoch": 90.43478260869566, |
| "grad_norm": 1.863221526145935, |
| "learning_rate": 7.4878685379596685e-06, |
| "loss": 0.2577, |
| "step": 2080 |
| }, |
| { |
| "epoch": 90.8695652173913, |
| "grad_norm": 1.7543621063232422, |
| "learning_rate": 7.487863344043418e-06, |
| "loss": 0.283, |
| "step": 2090 |
| }, |
| { |
| "epoch": 91.30434782608695, |
| "grad_norm": 1.765681266784668, |
| "learning_rate": 7.487856666154626e-06, |
| "loss": 0.2727, |
| "step": 2100 |
| }, |
| { |
| "epoch": 91.30434782608695, |
| "eval_loss": 0.8941524624824524, |
| "eval_runtime": 0.5508, |
| "eval_samples_per_second": 18.155, |
| "eval_steps_per_second": 18.155, |
| "step": 2100 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4064, |
| "Start_State_samples_per_second": 24.603, |
| "Start_State_steps_per_second": 24.603, |
| "epoch": 91.30434782608695, |
| "step": 2100 |
| }, |
| { |
| "Raw_Model_loss": 0.8941524624824524, |
| "Raw_Model_runtime": 0.4788, |
| "Raw_Model_samples_per_second": 20.886, |
| "Raw_Model_steps_per_second": 20.886, |
| "epoch": 91.30434782608695, |
| "step": 2100 |
| }, |
| { |
| "SWA_loss": 0.7625434994697571, |
| "SWA_runtime": 0.5432, |
| "SWA_samples_per_second": 18.408, |
| "SWA_steps_per_second": 18.408, |
| "epoch": 91.30434782608695, |
| "step": 2100 |
| }, |
| { |
| "EMA_loss": 0.7306564450263977, |
| "EMA_runtime": 0.4703, |
| "EMA_samples_per_second": 21.264, |
| "EMA_steps_per_second": 21.264, |
| "epoch": 91.30434782608695, |
| "step": 2100 |
| }, |
| { |
| "epoch": 91.73913043478261, |
| "grad_norm": 2.2282097339630127, |
| "learning_rate": 7.487848504295937e-06, |
| "loss": 0.2597, |
| "step": 2110 |
| }, |
| { |
| "epoch": 92.17391304347827, |
| "grad_norm": 2.146618127822876, |
| "learning_rate": 7.4878388584705885e-06, |
| "loss": 0.2901, |
| "step": 2120 |
| }, |
| { |
| "epoch": 92.6086956521739, |
| "grad_norm": 1.9365864992141724, |
| "learning_rate": 7.487827728682402e-06, |
| "loss": 0.2796, |
| "step": 2130 |
| }, |
| { |
| "epoch": 93.04347826086956, |
| "grad_norm": 1.677370309829712, |
| "learning_rate": 7.487815114935791e-06, |
| "loss": 0.2375, |
| "step": 2140 |
| }, |
| { |
| "epoch": 93.47826086956522, |
| "grad_norm": 1.871509075164795, |
| "learning_rate": 7.487801017235753e-06, |
| "loss": 0.289, |
| "step": 2150 |
| }, |
| { |
| "epoch": 93.91304347826087, |
| "grad_norm": 2.1130902767181396, |
| "learning_rate": 7.4877854355878785e-06, |
| "loss": 0.2698, |
| "step": 2160 |
| }, |
| { |
| "epoch": 94.34782608695652, |
| "grad_norm": 1.9688533544540405, |
| "learning_rate": 7.487768369998342e-06, |
| "loss": 0.2168, |
| "step": 2170 |
| }, |
| { |
| "epoch": 94.78260869565217, |
| "grad_norm": 2.1728529930114746, |
| "learning_rate": 7.4877498204739075e-06, |
| "loss": 0.2961, |
| "step": 2180 |
| }, |
| { |
| "epoch": 95.21739130434783, |
| "grad_norm": 2.192168712615967, |
| "learning_rate": 7.487729787021927e-06, |
| "loss": 0.2599, |
| "step": 2190 |
| }, |
| { |
| "epoch": 95.65217391304348, |
| "grad_norm": 2.4115936756134033, |
| "learning_rate": 7.487708269650342e-06, |
| "loss": 0.2587, |
| "step": 2200 |
| }, |
| { |
| "epoch": 96.08695652173913, |
| "grad_norm": 2.353425979614258, |
| "learning_rate": 7.487685268367682e-06, |
| "loss": 0.259, |
| "step": 2210 |
| }, |
| { |
| "epoch": 96.52173913043478, |
| "grad_norm": 1.855171799659729, |
| "learning_rate": 7.487660783183063e-06, |
| "loss": 0.2681, |
| "step": 2220 |
| }, |
| { |
| "epoch": 96.95652173913044, |
| "grad_norm": 2.1836190223693848, |
| "learning_rate": 7.48763481410619e-06, |
| "loss": 0.2607, |
| "step": 2230 |
| }, |
| { |
| "epoch": 97.3913043478261, |
| "grad_norm": 1.6038516759872437, |
| "learning_rate": 7.487607361147356e-06, |
| "loss": 0.2881, |
| "step": 2240 |
| }, |
| { |
| "epoch": 97.82608695652173, |
| "grad_norm": 1.3469552993774414, |
| "learning_rate": 7.487578424317443e-06, |
| "loss": 0.2524, |
| "step": 2250 |
| }, |
| { |
| "epoch": 97.82608695652173, |
| "eval_loss": 0.9057046175003052, |
| "eval_runtime": 0.4015, |
| "eval_samples_per_second": 24.909, |
| "eval_steps_per_second": 24.909, |
| "step": 2250 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.3966, |
| "Start_State_samples_per_second": 25.217, |
| "Start_State_steps_per_second": 25.217, |
| "epoch": 97.82608695652173, |
| "step": 2250 |
| }, |
| { |
| "Raw_Model_loss": 0.9057046175003052, |
| "Raw_Model_runtime": 0.3945, |
| "Raw_Model_samples_per_second": 25.347, |
| "Raw_Model_steps_per_second": 25.347, |
| "epoch": 97.82608695652173, |
| "step": 2250 |
| }, |
| { |
| "SWA_loss": 0.7665841579437256, |
| "SWA_runtime": 0.3965, |
| "SWA_samples_per_second": 25.221, |
| "SWA_steps_per_second": 25.221, |
| "epoch": 97.82608695652173, |
| "step": 2250 |
| }, |
| { |
| "EMA_loss": 0.7307609915733337, |
| "EMA_runtime": 0.402, |
| "EMA_samples_per_second": 24.875, |
| "EMA_steps_per_second": 24.875, |
| "epoch": 97.82608695652173, |
| "step": 2250 |
| }, |
| { |
| "epoch": 98.26086956521739, |
| "grad_norm": 1.9246830940246582, |
| "learning_rate": 7.487548003627922e-06, |
| "loss": 0.2415, |
| "step": 2260 |
| }, |
| { |
| "epoch": 98.69565217391305, |
| "grad_norm": 1.7473000288009644, |
| "learning_rate": 7.487516099090849e-06, |
| "loss": 0.278, |
| "step": 2270 |
| }, |
| { |
| "epoch": 99.1304347826087, |
| "grad_norm": 2.0333516597747803, |
| "learning_rate": 7.48748271071887e-06, |
| "loss": 0.2488, |
| "step": 2280 |
| }, |
| { |
| "epoch": 99.56521739130434, |
| "grad_norm": 2.3631269931793213, |
| "learning_rate": 7.48744783852522e-06, |
| "loss": 0.2882, |
| "step": 2290 |
| }, |
| { |
| "epoch": 100.0, |
| "grad_norm": 2.6425907611846924, |
| "learning_rate": 7.487411482523721e-06, |
| "loss": 0.2322, |
| "step": 2300 |
| }, |
| { |
| "epoch": 100.43478260869566, |
| "grad_norm": 2.703728437423706, |
| "learning_rate": 7.4873736427287825e-06, |
| "loss": 0.2371, |
| "step": 2310 |
| }, |
| { |
| "epoch": 100.8695652173913, |
| "grad_norm": 1.7555862665176392, |
| "learning_rate": 7.487334319155404e-06, |
| "loss": 0.2697, |
| "step": 2320 |
| }, |
| { |
| "epoch": 101.30434782608695, |
| "grad_norm": 2.5154976844787598, |
| "learning_rate": 7.487293511819172e-06, |
| "loss": 0.2417, |
| "step": 2330 |
| }, |
| { |
| "epoch": 101.73913043478261, |
| "grad_norm": 1.7718055248260498, |
| "learning_rate": 7.4872512207362605e-06, |
| "loss": 0.2446, |
| "step": 2340 |
| }, |
| { |
| "epoch": 102.17391304347827, |
| "grad_norm": 1.7671442031860352, |
| "learning_rate": 7.487207445923432e-06, |
| "loss": 0.2936, |
| "step": 2350 |
| }, |
| { |
| "epoch": 102.6086956521739, |
| "grad_norm": 2.0610148906707764, |
| "learning_rate": 7.487162187398039e-06, |
| "loss": 0.2845, |
| "step": 2360 |
| }, |
| { |
| "epoch": 103.04347826086956, |
| "grad_norm": 1.9395049810409546, |
| "learning_rate": 7.487115445178019e-06, |
| "loss": 0.2163, |
| "step": 2370 |
| }, |
| { |
| "epoch": 103.47826086956522, |
| "grad_norm": 2.1225855350494385, |
| "learning_rate": 7.487067219281901e-06, |
| "loss": 0.2913, |
| "step": 2380 |
| }, |
| { |
| "epoch": 103.91304347826087, |
| "grad_norm": 2.034578561782837, |
| "learning_rate": 7.4870175097287985e-06, |
| "loss": 0.2417, |
| "step": 2390 |
| }, |
| { |
| "epoch": 104.34782608695652, |
| "grad_norm": 1.9769914150238037, |
| "learning_rate": 7.486966316538416e-06, |
| "loss": 0.2563, |
| "step": 2400 |
| }, |
| { |
| "epoch": 104.34782608695652, |
| "eval_loss": 0.9094018936157227, |
| "eval_runtime": 0.5284, |
| "eval_samples_per_second": 18.926, |
| "eval_steps_per_second": 18.926, |
| "step": 2400 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.491, |
| "Start_State_samples_per_second": 20.366, |
| "Start_State_steps_per_second": 20.366, |
| "epoch": 104.34782608695652, |
| "step": 2400 |
| }, |
| { |
| "Raw_Model_loss": 0.9094018936157227, |
| "Raw_Model_runtime": 0.5034, |
| "Raw_Model_samples_per_second": 19.866, |
| "Raw_Model_steps_per_second": 19.866, |
| "epoch": 104.34782608695652, |
| "step": 2400 |
| }, |
| { |
| "SWA_loss": 0.7745841145515442, |
| "SWA_runtime": 0.5911, |
| "SWA_samples_per_second": 16.917, |
| "SWA_steps_per_second": 16.917, |
| "epoch": 104.34782608695652, |
| "step": 2400 |
| }, |
| { |
| "EMA_loss": 0.7307760119438171, |
| "EMA_runtime": 0.4346, |
| "EMA_samples_per_second": 23.011, |
| "EMA_steps_per_second": 23.011, |
| "epoch": 104.34782608695652, |
| "step": 2400 |
| }, |
| { |
| "epoch": 104.78260869565217, |
| "grad_norm": 2.087158679962158, |
| "learning_rate": 7.486913639731043e-06, |
| "loss": 0.2497, |
| "step": 2410 |
| }, |
| { |
| "epoch": 105.21739130434783, |
| "grad_norm": 1.996799349784851, |
| "learning_rate": 7.48685947932756e-06, |
| "loss": 0.2635, |
| "step": 2420 |
| }, |
| { |
| "epoch": 105.65217391304348, |
| "grad_norm": 1.9105130434036255, |
| "learning_rate": 7.4868038353494355e-06, |
| "loss": 0.2602, |
| "step": 2430 |
| }, |
| { |
| "epoch": 106.08695652173913, |
| "grad_norm": 2.1657402515411377, |
| "learning_rate": 7.486746707818724e-06, |
| "loss": 0.214, |
| "step": 2440 |
| }, |
| { |
| "epoch": 106.52173913043478, |
| "grad_norm": 1.444199800491333, |
| "learning_rate": 7.486688096758069e-06, |
| "loss": 0.2819, |
| "step": 2450 |
| }, |
| { |
| "epoch": 106.95652173913044, |
| "grad_norm": 1.8629169464111328, |
| "learning_rate": 7.486628002190702e-06, |
| "loss": 0.2444, |
| "step": 2460 |
| }, |
| { |
| "epoch": 107.3913043478261, |
| "grad_norm": 2.290212631225586, |
| "learning_rate": 7.486566424140442e-06, |
| "loss": 0.304, |
| "step": 2470 |
| }, |
| { |
| "epoch": 107.82608695652173, |
| "grad_norm": 2.3259527683258057, |
| "learning_rate": 7.486503362631699e-06, |
| "loss": 0.219, |
| "step": 2480 |
| }, |
| { |
| "epoch": 108.26086956521739, |
| "grad_norm": 2.0435678958892822, |
| "learning_rate": 7.486438817689465e-06, |
| "loss": 0.2709, |
| "step": 2490 |
| }, |
| { |
| "epoch": 108.69565217391305, |
| "grad_norm": 1.6399531364440918, |
| "learning_rate": 7.486372789339326e-06, |
| "loss": 0.2456, |
| "step": 2500 |
| }, |
| { |
| "epoch": 109.1304347826087, |
| "grad_norm": 1.6286495923995972, |
| "learning_rate": 7.486305277607452e-06, |
| "loss": 0.2435, |
| "step": 2510 |
| }, |
| { |
| "epoch": 109.56521739130434, |
| "grad_norm": 1.3312675952911377, |
| "learning_rate": 7.486236282520606e-06, |
| "loss": 0.2313, |
| "step": 2520 |
| }, |
| { |
| "epoch": 110.0, |
| "grad_norm": 3.1992104053497314, |
| "learning_rate": 7.48616580410613e-06, |
| "loss": 0.2876, |
| "step": 2530 |
| }, |
| { |
| "epoch": 110.43478260869566, |
| "grad_norm": 1.7260243892669678, |
| "learning_rate": 7.486093842391963e-06, |
| "loss": 0.2455, |
| "step": 2540 |
| }, |
| { |
| "epoch": 110.8695652173913, |
| "grad_norm": 1.857021450996399, |
| "learning_rate": 7.486020397406629e-06, |
| "loss": 0.2697, |
| "step": 2550 |
| }, |
| { |
| "epoch": 110.8695652173913, |
| "eval_loss": 0.9266101121902466, |
| "eval_runtime": 0.4485, |
| "eval_samples_per_second": 22.298, |
| "eval_steps_per_second": 22.298, |
| "step": 2550 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4407, |
| "Start_State_samples_per_second": 22.69, |
| "Start_State_steps_per_second": 22.69, |
| "epoch": 110.8695652173913, |
| "step": 2550 |
| }, |
| { |
| "Raw_Model_loss": 0.9266101121902466, |
| "Raw_Model_runtime": 0.4403, |
| "Raw_Model_samples_per_second": 22.71, |
| "Raw_Model_steps_per_second": 22.71, |
| "epoch": 110.8695652173913, |
| "step": 2550 |
| }, |
| { |
| "SWA_loss": 0.7769848108291626, |
| "SWA_runtime": 0.4455, |
| "SWA_samples_per_second": 22.448, |
| "SWA_steps_per_second": 22.448, |
| "epoch": 110.8695652173913, |
| "step": 2550 |
| }, |
| { |
| "EMA_loss": 0.7314194440841675, |
| "EMA_runtime": 0.4451, |
| "EMA_samples_per_second": 22.468, |
| "EMA_steps_per_second": 22.468, |
| "epoch": 110.8695652173913, |
| "step": 2550 |
| }, |
| { |
| "epoch": 111.30434782608695, |
| "grad_norm": 2.4148638248443604, |
| "learning_rate": 7.485945469179237e-06, |
| "loss": 0.282, |
| "step": 2560 |
| }, |
| { |
| "epoch": 111.73913043478261, |
| "grad_norm": 2.007262945175171, |
| "learning_rate": 7.485869057739486e-06, |
| "loss": 0.228, |
| "step": 2570 |
| }, |
| { |
| "epoch": 112.17391304347827, |
| "grad_norm": 2.0865132808685303, |
| "learning_rate": 7.485791163117665e-06, |
| "loss": 0.2463, |
| "step": 2580 |
| }, |
| { |
| "epoch": 112.6086956521739, |
| "grad_norm": 1.6724177598953247, |
| "learning_rate": 7.485711785344648e-06, |
| "loss": 0.2463, |
| "step": 2590 |
| }, |
| { |
| "epoch": 113.04347826086956, |
| "grad_norm": 2.1320908069610596, |
| "learning_rate": 7.485630924451897e-06, |
| "loss": 0.2661, |
| "step": 2600 |
| }, |
| { |
| "epoch": 113.47826086956522, |
| "grad_norm": 1.8488856554031372, |
| "learning_rate": 7.485548580471464e-06, |
| "loss": 0.2261, |
| "step": 2610 |
| }, |
| { |
| "epoch": 113.91304347826087, |
| "grad_norm": 2.1878151893615723, |
| "learning_rate": 7.485464753435987e-06, |
| "loss": 0.2756, |
| "step": 2620 |
| }, |
| { |
| "epoch": 114.34782608695652, |
| "grad_norm": 1.984470009803772, |
| "learning_rate": 7.485379443378693e-06, |
| "loss": 0.2451, |
| "step": 2630 |
| }, |
| { |
| "epoch": 114.78260869565217, |
| "grad_norm": 2.4623303413391113, |
| "learning_rate": 7.485292650333394e-06, |
| "loss": 0.2287, |
| "step": 2640 |
| }, |
| { |
| "epoch": 115.21739130434783, |
| "grad_norm": 1.7331453561782837, |
| "learning_rate": 7.485204374334494e-06, |
| "loss": 0.2553, |
| "step": 2650 |
| }, |
| { |
| "epoch": 115.65217391304348, |
| "grad_norm": 1.9090930223464966, |
| "learning_rate": 7.485114615416982e-06, |
| "loss": 0.2721, |
| "step": 2660 |
| }, |
| { |
| "epoch": 116.08695652173913, |
| "grad_norm": 2.4040467739105225, |
| "learning_rate": 7.485023373616437e-06, |
| "loss": 0.2153, |
| "step": 2670 |
| }, |
| { |
| "epoch": 116.52173913043478, |
| "grad_norm": 2.5749056339263916, |
| "learning_rate": 7.484930648969023e-06, |
| "loss": 0.245, |
| "step": 2680 |
| }, |
| { |
| "epoch": 116.95652173913044, |
| "grad_norm": 1.6020243167877197, |
| "learning_rate": 7.484836441511492e-06, |
| "loss": 0.2443, |
| "step": 2690 |
| }, |
| { |
| "epoch": 117.3913043478261, |
| "grad_norm": 1.6441881656646729, |
| "learning_rate": 7.484740751281187e-06, |
| "loss": 0.2361, |
| "step": 2700 |
| }, |
| { |
| "epoch": 117.3913043478261, |
| "eval_loss": 0.9320739507675171, |
| "eval_runtime": 0.408, |
| "eval_samples_per_second": 24.509, |
| "eval_steps_per_second": 24.509, |
| "step": 2700 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.418, |
| "Start_State_samples_per_second": 23.925, |
| "Start_State_steps_per_second": 23.925, |
| "epoch": 117.3913043478261, |
| "step": 2700 |
| }, |
| { |
| "Raw_Model_loss": 0.9320739507675171, |
| "Raw_Model_runtime": 0.402, |
| "Raw_Model_samples_per_second": 24.874, |
| "Raw_Model_steps_per_second": 24.874, |
| "epoch": 117.3913043478261, |
| "step": 2700 |
| }, |
| { |
| "SWA_loss": 0.7840545177459717, |
| "SWA_runtime": 0.401, |
| "SWA_samples_per_second": 24.935, |
| "SWA_steps_per_second": 24.935, |
| "epoch": 117.3913043478261, |
| "step": 2700 |
| }, |
| { |
| "EMA_loss": 0.730692982673645, |
| "EMA_runtime": 0.3918, |
| "EMA_samples_per_second": 25.525, |
| "EMA_steps_per_second": 25.525, |
| "epoch": 117.3913043478261, |
| "step": 2700 |
| }, |
| { |
| "epoch": 117.82608695652173, |
| "grad_norm": 3.2310845851898193, |
| "learning_rate": 3.7931296624941244e-07, |
| "loss": 0.2406, |
| "step": 2710 |
| }, |
| { |
| "epoch": 118.26086956521739, |
| "grad_norm": 1.8407368659973145, |
| "learning_rate": 7.586259324988249e-07, |
| "loss": 0.2518, |
| "step": 2720 |
| }, |
| { |
| "epoch": 118.69565217391305, |
| "grad_norm": 2.2956159114837646, |
| "learning_rate": 1.1379388987482372e-06, |
| "loss": 0.2412, |
| "step": 2730 |
| }, |
| { |
| "epoch": 119.1304347826087, |
| "grad_norm": 2.297415256500244, |
| "learning_rate": 1.5172518649976497e-06, |
| "loss": 0.2602, |
| "step": 2740 |
| }, |
| { |
| "epoch": 119.56521739130434, |
| "grad_norm": 2.2018797397613525, |
| "learning_rate": 1.8965648312470621e-06, |
| "loss": 0.2596, |
| "step": 2750 |
| }, |
| { |
| "epoch": 120.0, |
| "grad_norm": 3.6682052612304688, |
| "learning_rate": 2.2758777974964743e-06, |
| "loss": 0.219, |
| "step": 2760 |
| }, |
| { |
| "epoch": 120.43478260869566, |
| "grad_norm": 1.9333362579345703, |
| "learning_rate": 2.6551907637458867e-06, |
| "loss": 0.2545, |
| "step": 2770 |
| }, |
| { |
| "epoch": 120.8695652173913, |
| "grad_norm": 1.7708905935287476, |
| "learning_rate": 3.0345037299952995e-06, |
| "loss": 0.2189, |
| "step": 2780 |
| }, |
| { |
| "epoch": 121.30434782608695, |
| "grad_norm": 1.4095892906188965, |
| "learning_rate": 3.413816696244712e-06, |
| "loss": 0.2728, |
| "step": 2790 |
| }, |
| { |
| "epoch": 121.73913043478261, |
| "grad_norm": 1.991544246673584, |
| "learning_rate": 3.7931296624941243e-06, |
| "loss": 0.2699, |
| "step": 2800 |
| }, |
| { |
| "epoch": 122.17391304347827, |
| "grad_norm": 2.028014898300171, |
| "learning_rate": 3.793129286625273e-06, |
| "loss": 0.2196, |
| "step": 2810 |
| }, |
| { |
| "epoch": 122.6086956521739, |
| "grad_norm": 1.7729160785675049, |
| "learning_rate": 3.7931281590188667e-06, |
| "loss": 0.2634, |
| "step": 2820 |
| }, |
| { |
| "epoch": 123.04347826086956, |
| "grad_norm": 1.902854323387146, |
| "learning_rate": 3.7931262796753532e-06, |
| "loss": 0.251, |
| "step": 2830 |
| }, |
| { |
| "epoch": 123.47826086956522, |
| "grad_norm": 2.2296345233917236, |
| "learning_rate": 3.7931236485954773e-06, |
| "loss": 0.2424, |
| "step": 2840 |
| }, |
| { |
| "epoch": 123.91304347826087, |
| "grad_norm": 2.3609299659729004, |
| "learning_rate": 3.793120265780282e-06, |
| "loss": 0.2388, |
| "step": 2850 |
| }, |
| { |
| "epoch": 123.91304347826087, |
| "eval_loss": 0.9395554661750793, |
| "eval_runtime": 0.3971, |
| "eval_samples_per_second": 25.184, |
| "eval_steps_per_second": 25.184, |
| "step": 2850 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4171, |
| "Start_State_samples_per_second": 23.977, |
| "Start_State_steps_per_second": 23.977, |
| "epoch": 123.91304347826087, |
| "step": 2850 |
| }, |
| { |
| "Raw_Model_loss": 0.9395554661750793, |
| "Raw_Model_runtime": 0.3896, |
| "Raw_Model_samples_per_second": 25.669, |
| "Raw_Model_steps_per_second": 25.669, |
| "epoch": 123.91304347826087, |
| "step": 2850 |
| }, |
| { |
| "SWA_loss": 0.7867599725723267, |
| "SWA_runtime": 0.4887, |
| "SWA_samples_per_second": 20.462, |
| "SWA_steps_per_second": 20.462, |
| "epoch": 123.91304347826087, |
| "step": 2850 |
| }, |
| { |
| "EMA_loss": 0.7314862608909607, |
| "EMA_runtime": 0.4923, |
| "EMA_samples_per_second": 20.313, |
| "EMA_steps_per_second": 20.313, |
| "epoch": 123.91304347826087, |
| "step": 2850 |
| }, |
| { |
| "epoch": 124.34782608695652, |
| "grad_norm": 1.6401152610778809, |
| "learning_rate": 3.793116131231107e-06, |
| "loss": 0.2257, |
| "step": 2860 |
| }, |
| { |
| "epoch": 124.78260869565217, |
| "grad_norm": 1.6049269437789917, |
| "learning_rate": 3.793111244949593e-06, |
| "loss": 0.2303, |
| "step": 2870 |
| }, |
| { |
| "epoch": 125.21739130434783, |
| "grad_norm": 2.0744292736053467, |
| "learning_rate": 3.793105606937675e-06, |
| "loss": 0.2692, |
| "step": 2880 |
| }, |
| { |
| "epoch": 125.65217391304348, |
| "grad_norm": 2.102421998977661, |
| "learning_rate": 3.7930992171975892e-06, |
| "loss": 0.2458, |
| "step": 2890 |
| }, |
| { |
| "epoch": 126.08695652173913, |
| "grad_norm": 2.300477981567383, |
| "learning_rate": 3.793092075731867e-06, |
| "loss": 0.2518, |
| "step": 2900 |
| }, |
| { |
| "epoch": 126.52173913043478, |
| "grad_norm": 1.6764642000198364, |
| "learning_rate": 3.79308418254334e-06, |
| "loss": 0.2022, |
| "step": 2910 |
| }, |
| { |
| "epoch": 126.95652173913044, |
| "grad_norm": 1.5686938762664795, |
| "learning_rate": 3.7930755376351365e-06, |
| "loss": 0.2903, |
| "step": 2920 |
| }, |
| { |
| "epoch": 127.3913043478261, |
| "grad_norm": 2.0804359912872314, |
| "learning_rate": 3.7930661410106833e-06, |
| "loss": 0.2556, |
| "step": 2930 |
| }, |
| { |
| "epoch": 127.82608695652173, |
| "grad_norm": 2.6569416522979736, |
| "learning_rate": 3.793055992673704e-06, |
| "loss": 0.2196, |
| "step": 2940 |
| }, |
| { |
| "epoch": 128.2608695652174, |
| "grad_norm": 2.325507164001465, |
| "learning_rate": 3.7930450926282215e-06, |
| "loss": 0.2961, |
| "step": 2950 |
| }, |
| { |
| "epoch": 128.69565217391303, |
| "grad_norm": 1.6577781438827515, |
| "learning_rate": 3.793033440878557e-06, |
| "loss": 0.2414, |
| "step": 2960 |
| }, |
| { |
| "epoch": 129.1304347826087, |
| "grad_norm": 1.6468480825424194, |
| "learning_rate": 3.7930210374293287e-06, |
| "loss": 0.2031, |
| "step": 2970 |
| }, |
| { |
| "epoch": 129.56521739130434, |
| "grad_norm": 1.8844521045684814, |
| "learning_rate": 3.793007882285452e-06, |
| "loss": 0.2411, |
| "step": 2980 |
| }, |
| { |
| "epoch": 130.0, |
| "grad_norm": 5.029874801635742, |
| "learning_rate": 3.7929939754521417e-06, |
| "loss": 0.2465, |
| "step": 2990 |
| }, |
| { |
| "epoch": 130.43478260869566, |
| "grad_norm": 2.3793535232543945, |
| "learning_rate": 3.79297931693491e-06, |
| "loss": 0.2374, |
| "step": 3000 |
| }, |
| { |
| "epoch": 130.43478260869566, |
| "eval_loss": 0.9428585171699524, |
| "eval_runtime": 0.4348, |
| "eval_samples_per_second": 22.999, |
| "eval_steps_per_second": 22.999, |
| "step": 3000 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4391, |
| "Start_State_samples_per_second": 22.776, |
| "Start_State_steps_per_second": 22.776, |
| "epoch": 130.43478260869566, |
| "step": 3000 |
| }, |
| { |
| "Raw_Model_loss": 0.9428585171699524, |
| "Raw_Model_runtime": 0.4353, |
| "Raw_Model_samples_per_second": 22.971, |
| "Raw_Model_steps_per_second": 22.971, |
| "epoch": 130.43478260869566, |
| "step": 3000 |
| }, |
| { |
| "SWA_loss": 0.7945135831832886, |
| "SWA_runtime": 0.4368, |
| "SWA_samples_per_second": 22.892, |
| "SWA_steps_per_second": 22.892, |
| "epoch": 130.43478260869566, |
| "step": 3000 |
| }, |
| { |
| "EMA_loss": 0.7300070524215698, |
| "EMA_runtime": 0.4353, |
| "EMA_samples_per_second": 22.971, |
| "EMA_steps_per_second": 22.971, |
| "epoch": 130.43478260869566, |
| "step": 3000 |
| }, |
| { |
| "epoch": 130.8695652173913, |
| "grad_norm": 2.055912733078003, |
| "learning_rate": 3.7929639067395674e-06, |
| "loss": 0.2305, |
| "step": 3010 |
| }, |
| { |
| "epoch": 131.30434782608697, |
| "grad_norm": 1.8568785190582275, |
| "learning_rate": 3.7929477448722217e-06, |
| "loss": 0.2706, |
| "step": 3020 |
| }, |
| { |
| "epoch": 131.7391304347826, |
| "grad_norm": 1.9422987699508667, |
| "learning_rate": 3.792930831339279e-06, |
| "loss": 0.2616, |
| "step": 3030 |
| }, |
| { |
| "epoch": 132.17391304347825, |
| "grad_norm": 1.81191885471344, |
| "learning_rate": 3.7929131661474433e-06, |
| "loss": 0.2272, |
| "step": 3040 |
| }, |
| { |
| "epoch": 132.6086956521739, |
| "grad_norm": 2.1437313556671143, |
| "learning_rate": 3.7928947493037164e-06, |
| "loss": 0.253, |
| "step": 3050 |
| }, |
| { |
| "epoch": 133.04347826086956, |
| "grad_norm": 2.685347318649292, |
| "learning_rate": 3.792875580815398e-06, |
| "loss": 0.2152, |
| "step": 3060 |
| }, |
| { |
| "epoch": 133.47826086956522, |
| "grad_norm": 1.2992076873779297, |
| "learning_rate": 3.7928556606900864e-06, |
| "loss": 0.2486, |
| "step": 3070 |
| }, |
| { |
| "epoch": 133.91304347826087, |
| "grad_norm": 2.3356173038482666, |
| "learning_rate": 3.7928349889356773e-06, |
| "loss": 0.2736, |
| "step": 3080 |
| }, |
| { |
| "epoch": 134.34782608695653, |
| "grad_norm": 1.9858746528625488, |
| "learning_rate": 3.7928135655603634e-06, |
| "loss": 0.254, |
| "step": 3090 |
| }, |
| { |
| "epoch": 134.7826086956522, |
| "grad_norm": 1.929052710533142, |
| "learning_rate": 3.792791390572637e-06, |
| "loss": 0.2063, |
| "step": 3100 |
| }, |
| { |
| "epoch": 135.2173913043478, |
| "grad_norm": 2.71032977104187, |
| "learning_rate": 3.7927684639812876e-06, |
| "loss": 0.2441, |
| "step": 3110 |
| }, |
| { |
| "epoch": 135.65217391304347, |
| "grad_norm": 1.8756812810897827, |
| "learning_rate": 3.7927447857954023e-06, |
| "loss": 0.2854, |
| "step": 3120 |
| }, |
| { |
| "epoch": 136.08695652173913, |
| "grad_norm": 2.36094069480896, |
| "learning_rate": 3.792720356024367e-06, |
| "loss": 0.2128, |
| "step": 3130 |
| }, |
| { |
| "epoch": 136.52173913043478, |
| "grad_norm": 2.351156711578369, |
| "learning_rate": 3.7926951746778637e-06, |
| "loss": 0.2385, |
| "step": 3140 |
| }, |
| { |
| "epoch": 136.95652173913044, |
| "grad_norm": 2.7988734245300293, |
| "learning_rate": 3.7926692417658747e-06, |
| "loss": 0.2336, |
| "step": 3150 |
| }, |
| { |
| "epoch": 136.95652173913044, |
| "eval_loss": 0.9436905980110168, |
| "eval_runtime": 0.4896, |
| "eval_samples_per_second": 20.427, |
| "eval_steps_per_second": 20.427, |
| "step": 3150 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.578, |
| "Start_State_samples_per_second": 17.301, |
| "Start_State_steps_per_second": 17.301, |
| "epoch": 136.95652173913044, |
| "step": 3150 |
| }, |
| { |
| "Raw_Model_loss": 0.9436905980110168, |
| "Raw_Model_runtime": 0.5409, |
| "Raw_Model_samples_per_second": 18.487, |
| "Raw_Model_steps_per_second": 18.487, |
| "epoch": 136.95652173913044, |
| "step": 3150 |
| }, |
| { |
| "SWA_loss": 0.7969022393226624, |
| "SWA_runtime": 0.616, |
| "SWA_samples_per_second": 16.235, |
| "SWA_steps_per_second": 16.235, |
| "epoch": 136.95652173913044, |
| "step": 3150 |
| }, |
| { |
| "EMA_loss": 0.7306665182113647, |
| "EMA_runtime": 0.5117, |
| "EMA_samples_per_second": 19.544, |
| "EMA_steps_per_second": 19.544, |
| "epoch": 136.95652173913044, |
| "step": 3150 |
| }, |
| { |
| "epoch": 137.3913043478261, |
| "grad_norm": 1.775488257408142, |
| "learning_rate": 3.792642557298678e-06, |
| "loss": 0.2772, |
| "step": 3160 |
| }, |
| { |
| "epoch": 137.82608695652175, |
| "grad_norm": 1.9294140338897705, |
| "learning_rate": 3.7926151212868503e-06, |
| "loss": 0.2351, |
| "step": 3170 |
| }, |
| { |
| "epoch": 138.2608695652174, |
| "grad_norm": 1.642681360244751, |
| "learning_rate": 3.792586933741268e-06, |
| "loss": 0.2272, |
| "step": 3180 |
| }, |
| { |
| "epoch": 138.69565217391303, |
| "grad_norm": 2.080634593963623, |
| "learning_rate": 3.792557994673102e-06, |
| "loss": 0.2754, |
| "step": 3190 |
| }, |
| { |
| "epoch": 139.1304347826087, |
| "grad_norm": 1.3820661306381226, |
| "learning_rate": 3.792528304093824e-06, |
| "loss": 0.2258, |
| "step": 3200 |
| }, |
| { |
| "epoch": 139.56521739130434, |
| "grad_norm": 2.019350051879883, |
| "learning_rate": 3.7924978620152023e-06, |
| "loss": 0.2705, |
| "step": 3210 |
| }, |
| { |
| "epoch": 140.0, |
| "grad_norm": 2.975282907485962, |
| "learning_rate": 3.7924666684493018e-06, |
| "loss": 0.2302, |
| "step": 3220 |
| }, |
| { |
| "epoch": 140.43478260869566, |
| "grad_norm": 2.264106273651123, |
| "learning_rate": 3.792434723408488e-06, |
| "loss": 0.2315, |
| "step": 3230 |
| }, |
| { |
| "epoch": 140.8695652173913, |
| "grad_norm": 1.7037856578826904, |
| "learning_rate": 3.7924020269054226e-06, |
| "loss": 0.2381, |
| "step": 3240 |
| }, |
| { |
| "epoch": 141.30434782608697, |
| "grad_norm": 1.9553606510162354, |
| "learning_rate": 3.7923685789530654e-06, |
| "loss": 0.2367, |
| "step": 3250 |
| }, |
| { |
| "epoch": 141.7391304347826, |
| "grad_norm": 1.9915337562561035, |
| "learning_rate": 3.7923343795646736e-06, |
| "loss": 0.2491, |
| "step": 3260 |
| }, |
| { |
| "epoch": 142.17391304347825, |
| "grad_norm": 1.7067251205444336, |
| "learning_rate": 3.7922994287538036e-06, |
| "loss": 0.2579, |
| "step": 3270 |
| }, |
| { |
| "epoch": 142.6086956521739, |
| "grad_norm": 2.5622429847717285, |
| "learning_rate": 3.792263726534308e-06, |
| "loss": 0.2607, |
| "step": 3280 |
| }, |
| { |
| "epoch": 143.04347826086956, |
| "grad_norm": 1.2580666542053223, |
| "learning_rate": 3.7922272729203387e-06, |
| "loss": 0.2155, |
| "step": 3290 |
| }, |
| { |
| "epoch": 143.47826086956522, |
| "grad_norm": 1.8073185682296753, |
| "learning_rate": 3.792190067926345e-06, |
| "loss": 0.2478, |
| "step": 3300 |
| }, |
| { |
| "epoch": 143.47826086956522, |
| "eval_loss": 0.9493485689163208, |
| "eval_runtime": 0.41, |
| "eval_samples_per_second": 24.39, |
| "eval_steps_per_second": 24.39, |
| "step": 3300 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.3933, |
| "Start_State_samples_per_second": 25.423, |
| "Start_State_steps_per_second": 25.423, |
| "epoch": 143.47826086956522, |
| "step": 3300 |
| }, |
| { |
| "Raw_Model_loss": 0.9493485689163208, |
| "Raw_Model_runtime": 0.3998, |
| "Raw_Model_samples_per_second": 25.013, |
| "Raw_Model_steps_per_second": 25.013, |
| "epoch": 143.47826086956522, |
| "step": 3300 |
| }, |
| { |
| "SWA_loss": 0.8028978109359741, |
| "SWA_runtime": 0.4094, |
| "SWA_samples_per_second": 24.424, |
| "SWA_steps_per_second": 24.424, |
| "epoch": 143.47826086956522, |
| "step": 3300 |
| }, |
| { |
| "EMA_loss": 0.7308684587478638, |
| "EMA_runtime": 0.3971, |
| "EMA_samples_per_second": 25.181, |
| "EMA_steps_per_second": 25.181, |
| "epoch": 143.47826086956522, |
| "step": 3300 |
| }, |
| { |
| "epoch": 143.91304347826087, |
| "grad_norm": 1.8851526975631714, |
| "learning_rate": 3.7921521115670724e-06, |
| "loss": 0.2538, |
| "step": 3310 |
| }, |
| { |
| "epoch": 144.34782608695653, |
| "grad_norm": 1.569898247718811, |
| "learning_rate": 3.7921134038575663e-06, |
| "loss": 0.2145, |
| "step": 3320 |
| }, |
| { |
| "epoch": 144.7826086956522, |
| "grad_norm": 1.718190312385559, |
| "learning_rate": 3.79207394481317e-06, |
| "loss": 0.2708, |
| "step": 3330 |
| }, |
| { |
| "epoch": 145.2173913043478, |
| "grad_norm": 2.9095687866210938, |
| "learning_rate": 3.7920337344495226e-06, |
| "loss": 0.2084, |
| "step": 3340 |
| }, |
| { |
| "epoch": 145.65217391304347, |
| "grad_norm": 1.8533018827438354, |
| "learning_rate": 3.791992772782563e-06, |
| "loss": 0.2381, |
| "step": 3350 |
| }, |
| { |
| "epoch": 146.08695652173913, |
| "grad_norm": 1.9780678749084473, |
| "learning_rate": 3.791951059828527e-06, |
| "loss": 0.2651, |
| "step": 3360 |
| }, |
| { |
| "epoch": 146.52173913043478, |
| "grad_norm": 1.834191083908081, |
| "learning_rate": 3.791908595603947e-06, |
| "loss": 0.2269, |
| "step": 3370 |
| }, |
| { |
| "epoch": 146.95652173913044, |
| "grad_norm": 1.6292699575424194, |
| "learning_rate": 3.7918653801256568e-06, |
| "loss": 0.2159, |
| "step": 3380 |
| }, |
| { |
| "epoch": 147.3913043478261, |
| "grad_norm": 1.5715214014053345, |
| "learning_rate": 3.791821413410784e-06, |
| "loss": 0.2288, |
| "step": 3390 |
| }, |
| { |
| "epoch": 147.82608695652175, |
| "grad_norm": 1.5430243015289307, |
| "learning_rate": 3.791776695476756e-06, |
| "loss": 0.2538, |
| "step": 3400 |
| }, |
| { |
| "epoch": 148.2608695652174, |
| "grad_norm": 1.466277837753296, |
| "learning_rate": 3.791731226341297e-06, |
| "loss": 0.2156, |
| "step": 3410 |
| }, |
| { |
| "epoch": 148.69565217391303, |
| "grad_norm": 1.8279281854629517, |
| "learning_rate": 3.7916850060224308e-06, |
| "loss": 0.2498, |
| "step": 3420 |
| }, |
| { |
| "epoch": 149.1304347826087, |
| "grad_norm": 1.7966867685317993, |
| "learning_rate": 3.791638034538477e-06, |
| "loss": 0.2716, |
| "step": 3430 |
| }, |
| { |
| "epoch": 149.56521739130434, |
| "grad_norm": 2.2440056800842285, |
| "learning_rate": 3.7915903119080527e-06, |
| "loss": 0.265, |
| "step": 3440 |
| }, |
| { |
| "epoch": 150.0, |
| "grad_norm": 3.2762231826782227, |
| "learning_rate": 3.7915418381500747e-06, |
| "loss": 0.2208, |
| "step": 3450 |
| }, |
| { |
| "epoch": 150.0, |
| "eval_loss": 0.9505823850631714, |
| "eval_runtime": 0.4422, |
| "eval_samples_per_second": 22.615, |
| "eval_steps_per_second": 22.615, |
| "step": 3450 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4072, |
| "Start_State_samples_per_second": 24.558, |
| "Start_State_steps_per_second": 24.558, |
| "epoch": 150.0, |
| "step": 3450 |
| }, |
| { |
| "Raw_Model_loss": 0.9505823850631714, |
| "Raw_Model_runtime": 0.4153, |
| "Raw_Model_samples_per_second": 24.076, |
| "Raw_Model_steps_per_second": 24.076, |
| "epoch": 150.0, |
| "step": 3450 |
| }, |
| { |
| "SWA_loss": 0.8045159578323364, |
| "SWA_runtime": 0.402, |
| "SWA_samples_per_second": 24.876, |
| "SWA_steps_per_second": 24.876, |
| "epoch": 150.0, |
| "step": 3450 |
| }, |
| { |
| "EMA_loss": 0.7316843271255493, |
| "EMA_runtime": 0.4, |
| "EMA_samples_per_second": 25.002, |
| "EMA_steps_per_second": 25.002, |
| "epoch": 150.0, |
| "step": 3450 |
| }, |
| { |
| "epoch": 150.43478260869566, |
| "grad_norm": 1.7541120052337646, |
| "learning_rate": 2.4672082280509036e-07, |
| "loss": 0.214, |
| "step": 3460 |
| }, |
| { |
| "epoch": 150.8695652173913, |
| "grad_norm": 2.0008656978607178, |
| "learning_rate": 4.934416456101807e-07, |
| "loss": 0.2627, |
| "step": 3470 |
| }, |
| { |
| "epoch": 151.30434782608697, |
| "grad_norm": 1.6539170742034912, |
| "learning_rate": 7.40162468415271e-07, |
| "loss": 0.2, |
| "step": 3480 |
| }, |
| { |
| "epoch": 151.7391304347826, |
| "grad_norm": 2.369926691055298, |
| "learning_rate": 9.868832912203614e-07, |
| "loss": 0.2478, |
| "step": 3490 |
| }, |
| { |
| "epoch": 152.17391304347825, |
| "grad_norm": 2.07112979888916, |
| "learning_rate": 1.2336041140254517e-06, |
| "loss": 0.2427, |
| "step": 3500 |
| }, |
| { |
| "epoch": 152.6086956521739, |
| "grad_norm": 1.6030749082565308, |
| "learning_rate": 1.480324936830542e-06, |
| "loss": 0.2402, |
| "step": 3510 |
| }, |
| { |
| "epoch": 153.04347826086956, |
| "grad_norm": 1.5949645042419434, |
| "learning_rate": 1.7270457596356322e-06, |
| "loss": 0.2072, |
| "step": 3520 |
| }, |
| { |
| "epoch": 153.47826086956522, |
| "grad_norm": 2.338641881942749, |
| "learning_rate": 1.973766582440723e-06, |
| "loss": 0.2506, |
| "step": 3530 |
| }, |
| { |
| "epoch": 153.91304347826087, |
| "grad_norm": 2.719093084335327, |
| "learning_rate": 2.220487405245813e-06, |
| "loss": 0.2321, |
| "step": 3540 |
| }, |
| { |
| "epoch": 154.34782608695653, |
| "grad_norm": 2.292358636856079, |
| "learning_rate": 2.4672082280509034e-06, |
| "loss": 0.2404, |
| "step": 3550 |
| }, |
| { |
| "epoch": 154.7826086956522, |
| "grad_norm": 2.0019381046295166, |
| "learning_rate": 2.4672079835702752e-06, |
| "loss": 0.2343, |
| "step": 3560 |
| }, |
| { |
| "epoch": 155.2173913043478, |
| "grad_norm": 1.6779125928878784, |
| "learning_rate": 2.4672072501284865e-06, |
| "loss": 0.1963, |
| "step": 3570 |
| }, |
| { |
| "epoch": 155.65217391304347, |
| "grad_norm": 2.0632243156433105, |
| "learning_rate": 2.467206027725829e-06, |
| "loss": 0.267, |
| "step": 3580 |
| }, |
| { |
| "epoch": 156.08695652173913, |
| "grad_norm": 1.6089539527893066, |
| "learning_rate": 2.467204316362787e-06, |
| "loss": 0.2034, |
| "step": 3590 |
| }, |
| { |
| "epoch": 156.52173913043478, |
| "grad_norm": 2.475633382797241, |
| "learning_rate": 2.4672021160400387e-06, |
| "loss": 0.2685, |
| "step": 3600 |
| }, |
| { |
| "epoch": 156.52173913043478, |
| "eval_loss": 0.9592596292495728, |
| "eval_runtime": 0.4813, |
| "eval_samples_per_second": 20.778, |
| "eval_steps_per_second": 20.778, |
| "step": 3600 |
| }, |
| { |
| "Start_State_loss": 0.7309322357177734, |
| "Start_State_runtime": 0.4223, |
| "Start_State_samples_per_second": 23.679, |
| "Start_State_steps_per_second": 23.679, |
| "epoch": 156.52173913043478, |
| "step": 3600 |
| }, |
| { |
| "Raw_Model_loss": 0.9592596292495728, |
| "Raw_Model_runtime": 0.3944, |
| "Raw_Model_samples_per_second": 25.356, |
| "Raw_Model_steps_per_second": 25.356, |
| "epoch": 156.52173913043478, |
| "step": 3600 |
| }, |
| { |
| "SWA_loss": 0.8119293451309204, |
| "SWA_runtime": 0.3904, |
| "SWA_samples_per_second": 25.615, |
| "SWA_steps_per_second": 25.615, |
| "epoch": 156.52173913043478, |
| "step": 3600 |
| }, |
| { |
| "EMA_loss": 0.7311049103736877, |
| "EMA_runtime": 0.4017, |
| "EMA_samples_per_second": 24.896, |
| "EMA_steps_per_second": 24.896, |
| "epoch": 156.52173913043478, |
| "step": 3600 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 50000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2174, |
| "save_steps": 150, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.28760054861906e+16, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|