| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 1150, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.91304347826087e-05, | |
| "loss": 0.1114, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.82608695652174e-05, | |
| "loss": 0.0247, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 9.739130434782609e-05, | |
| "loss": 0.0153, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 9.652173913043479e-05, | |
| "loss": 0.0139, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 9.565217391304348e-05, | |
| "loss": 0.0117, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.478260869565218e-05, | |
| "loss": 0.0029, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 9.391304347826087e-05, | |
| "loss": 0.0031, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.304347826086957e-05, | |
| "loss": 0.0175, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 9.217391304347827e-05, | |
| "loss": 0.0141, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.130434782608696e-05, | |
| "loss": 0.0073, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.043478260869566e-05, | |
| "loss": 0.0067, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.005401695612818003, | |
| "eval_mse": 0.005401696544140577, | |
| "eval_runtime": 184.1257, | |
| "eval_samples_per_second": 1.249, | |
| "eval_steps_per_second": 0.158, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 8.956521739130435e-05, | |
| "loss": 0.0041, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 8.869565217391305e-05, | |
| "loss": 0.009, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 8.782608695652174e-05, | |
| "loss": 0.0031, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 8.695652173913044e-05, | |
| "loss": 0.0133, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 8.608695652173914e-05, | |
| "loss": 0.0022, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 8.521739130434783e-05, | |
| "loss": 0.0019, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 8.434782608695653e-05, | |
| "loss": 0.0023, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.347826086956521e-05, | |
| "loss": 0.0018, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 8.260869565217392e-05, | |
| "loss": 0.0018, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 8.173913043478262e-05, | |
| "loss": 0.0036, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 8.086956521739131e-05, | |
| "loss": 0.0067, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 8e-05, | |
| "loss": 0.0079, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.006907076574862003, | |
| "eval_mse": 0.006907076574862003, | |
| "eval_runtime": 2268.8659, | |
| "eval_samples_per_second": 0.101, | |
| "eval_steps_per_second": 0.013, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 7.91304347826087e-05, | |
| "loss": 0.0031, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 7.82608695652174e-05, | |
| "loss": 0.0066, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 7.73913043478261e-05, | |
| "loss": 0.0032, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 7.652173913043479e-05, | |
| "loss": 0.0028, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 7.565217391304347e-05, | |
| "loss": 0.0067, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.478260869565218e-05, | |
| "loss": 0.0029, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 7.391304347826086e-05, | |
| "loss": 0.0017, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 7.304347826086957e-05, | |
| "loss": 0.0019, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 7.217391304347827e-05, | |
| "loss": 0.013, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 7.130434782608696e-05, | |
| "loss": 0.0048, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.043478260869566e-05, | |
| "loss": 0.0033, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.005840361583977938, | |
| "eval_mse": 0.005840362515300512, | |
| "eval_runtime": 179.4443, | |
| "eval_samples_per_second": 1.282, | |
| "eval_steps_per_second": 0.162, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 6.956521739130436e-05, | |
| "loss": 0.0062, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 6.869565217391305e-05, | |
| "loss": 0.0019, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 6.782608695652173e-05, | |
| "loss": 0.008, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 6.695652173913044e-05, | |
| "loss": 0.0033, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 6.608695652173912e-05, | |
| "loss": 0.0034, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 6.521739130434783e-05, | |
| "loss": 0.002, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 6.434782608695652e-05, | |
| "loss": 0.0012, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 6.347826086956523e-05, | |
| "loss": 0.0166, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 6.260869565217392e-05, | |
| "loss": 0.0039, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 6.173913043478262e-05, | |
| "loss": 0.0016, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 6.086956521739131e-05, | |
| "loss": 0.0016, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 6e-05, | |
| "loss": 0.0011, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.005455708596855402, | |
| "eval_mse": 0.005455708596855402, | |
| "eval_runtime": 69.7545, | |
| "eval_samples_per_second": 3.297, | |
| "eval_steps_per_second": 0.416, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 5.9130434782608704e-05, | |
| "loss": 0.003, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 5.826086956521739e-05, | |
| "loss": 0.001, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 5.739130434782609e-05, | |
| "loss": 0.0047, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 5.652173913043478e-05, | |
| "loss": 0.0019, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 5.565217391304348e-05, | |
| "loss": 0.0019, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 5.478260869565217e-05, | |
| "loss": 0.0116, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 5.391304347826087e-05, | |
| "loss": 0.0073, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 5.3043478260869574e-05, | |
| "loss": 0.0022, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 5.217391304347826e-05, | |
| "loss": 0.0012, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 5.1304347826086966e-05, | |
| "loss": 0.0082, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 5.0434782608695655e-05, | |
| "loss": 0.003, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.008183675818145275, | |
| "eval_mse": 0.008183675818145275, | |
| "eval_runtime": 76.479, | |
| "eval_samples_per_second": 3.007, | |
| "eval_steps_per_second": 0.379, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 4.956521739130435e-05, | |
| "loss": 0.0178, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "learning_rate": 4.8695652173913046e-05, | |
| "loss": 0.0035, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 4.782608695652174e-05, | |
| "loss": 0.0048, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 4.695652173913044e-05, | |
| "loss": 0.0013, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 4.608695652173913e-05, | |
| "loss": 0.0058, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 4.521739130434783e-05, | |
| "loss": 0.006, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 5.57, | |
| "learning_rate": 4.4347826086956525e-05, | |
| "loss": 0.0053, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 0.0011, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 4.2608695652173916e-05, | |
| "loss": 0.0012, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 4.1739130434782605e-05, | |
| "loss": 0.0011, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 4.086956521739131e-05, | |
| "loss": 0.0017, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0012, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.00548972561955452, | |
| "eval_mse": 0.00548972561955452, | |
| "eval_runtime": 68.4873, | |
| "eval_samples_per_second": 3.358, | |
| "eval_steps_per_second": 0.423, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 3.91304347826087e-05, | |
| "loss": 0.0025, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 3.8260869565217395e-05, | |
| "loss": 0.0024, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 3.739130434782609e-05, | |
| "loss": 0.0016, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 3.6521739130434786e-05, | |
| "loss": 0.0051, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 3.565217391304348e-05, | |
| "loss": 0.0046, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 3.478260869565218e-05, | |
| "loss": 0.0031, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 3.3913043478260867e-05, | |
| "loss": 0.0012, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 3.304347826086956e-05, | |
| "loss": 0.0078, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 3.217391304347826e-05, | |
| "loss": 0.0045, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 3.130434782608696e-05, | |
| "loss": 0.0014, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 3.0434782608695656e-05, | |
| "loss": 0.0015, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.005614197812974453, | |
| "eval_mse": 0.005614197812974453, | |
| "eval_runtime": 59.7135, | |
| "eval_samples_per_second": 3.852, | |
| "eval_steps_per_second": 0.486, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 2.9565217391304352e-05, | |
| "loss": 0.0055, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 7.13, | |
| "learning_rate": 2.8695652173913044e-05, | |
| "loss": 0.0027, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 2.782608695652174e-05, | |
| "loss": 0.001, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 2.6956521739130436e-05, | |
| "loss": 0.0033, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 2.608695652173913e-05, | |
| "loss": 0.0018, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 2.5217391304347827e-05, | |
| "loss": 0.0013, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 2.4347826086956523e-05, | |
| "loss": 0.001, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 2.347826086956522e-05, | |
| "loss": 0.0032, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 2.2608695652173914e-05, | |
| "loss": 0.0011, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.0007, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 2.0869565217391303e-05, | |
| "loss": 0.0012, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0008, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.005982376169413328, | |
| "eval_mse": 0.005982376169413328, | |
| "eval_runtime": 60.492, | |
| "eval_samples_per_second": 3.802, | |
| "eval_steps_per_second": 0.479, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 1.9130434782608697e-05, | |
| "loss": 0.0011, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 1.8260869565217393e-05, | |
| "loss": 0.0009, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 0.0027, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 8.35, | |
| "learning_rate": 1.652173913043478e-05, | |
| "loss": 0.0028, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 8.43, | |
| "learning_rate": 1.565217391304348e-05, | |
| "loss": 0.0012, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 1.4782608695652176e-05, | |
| "loss": 0.0008, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 1.391304347826087e-05, | |
| "loss": 0.0047, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 1.3043478260869566e-05, | |
| "loss": 0.0013, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 1.2173913043478261e-05, | |
| "loss": 0.0009, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 8.87, | |
| "learning_rate": 1.1304347826086957e-05, | |
| "loss": 0.0009, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 1.0434782608695651e-05, | |
| "loss": 0.0092, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.005765838548541069, | |
| "eval_mse": 0.005765838548541069, | |
| "eval_runtime": 61.2576, | |
| "eval_samples_per_second": 3.755, | |
| "eval_steps_per_second": 0.473, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 9.565217391304349e-06, | |
| "loss": 0.0008, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 0.0023, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 7.82608695652174e-06, | |
| "loss": 0.0011, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 6.956521739130435e-06, | |
| "loss": 0.0011, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 6.086956521739131e-06, | |
| "loss": 0.0066, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 5.217391304347826e-06, | |
| "loss": 0.0006, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 4.347826086956522e-06, | |
| "loss": 0.001, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 9.65, | |
| "learning_rate": 3.4782608695652175e-06, | |
| "loss": 0.0026, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 9.74, | |
| "learning_rate": 2.608695652173913e-06, | |
| "loss": 0.0048, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 1.7391304347826088e-06, | |
| "loss": 0.0009, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 8.695652173913044e-07, | |
| "loss": 0.0011, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0012, | |
| "step": 1150 | |
| } | |
| ], | |
| "max_steps": 1150, | |
| "num_train_epochs": 10, | |
| "total_flos": 0.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |