| { | |
| "best_metric": 0.24075740575790405, | |
| "best_model_checkpoint": "./results/checkpoint-6750", | |
| "epoch": 3.0, | |
| "global_step": 6750, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9992592592592596e-05, | |
| "loss": 6.945, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9925925925925926e-05, | |
| "loss": 1.4118, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9851851851851855e-05, | |
| "loss": 0.7118, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 0.6554884910583496, | |
| "eval_runtime": 6.9152, | |
| "eval_samples_per_second": 144.754, | |
| "eval_steps_per_second": 36.297, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.977777777777778e-05, | |
| "loss": 0.8383, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.970370370370371e-05, | |
| "loss": 0.6623, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.962962962962963e-05, | |
| "loss": 0.6282, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 0.5736271739006042, | |
| "eval_runtime": 6.8557, | |
| "eval_samples_per_second": 146.01, | |
| "eval_steps_per_second": 36.612, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.955555555555556e-05, | |
| "loss": 0.5446, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9481481481481485e-05, | |
| "loss": 0.5983, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 0.5498507022857666, | |
| "eval_runtime": 6.8866, | |
| "eval_samples_per_second": 145.356, | |
| "eval_steps_per_second": 36.448, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.940740740740741e-05, | |
| "loss": 0.5406, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.933333333333334e-05, | |
| "loss": 0.618, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.925925925925926e-05, | |
| "loss": 0.6346, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 0.5268338918685913, | |
| "eval_runtime": 6.9999, | |
| "eval_samples_per_second": 143.001, | |
| "eval_steps_per_second": 35.857, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.918518518518519e-05, | |
| "loss": 0.53, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9111111111111114e-05, | |
| "loss": 0.4268, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 0.5140772461891174, | |
| "eval_runtime": 7.002, | |
| "eval_samples_per_second": 142.959, | |
| "eval_steps_per_second": 35.847, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.903703703703704e-05, | |
| "loss": 0.5474, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.896296296296297e-05, | |
| "loss": 0.5773, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 0.5611, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 0.5012452006340027, | |
| "eval_runtime": 7.2224, | |
| "eval_samples_per_second": 138.597, | |
| "eval_steps_per_second": 34.753, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.881481481481482e-05, | |
| "loss": 0.5421, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.874074074074074e-05, | |
| "loss": 0.5056, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 0.492546945810318, | |
| "eval_runtime": 7.468, | |
| "eval_samples_per_second": 134.038, | |
| "eval_steps_per_second": 33.61, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.866666666666667e-05, | |
| "loss": 0.5086, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.8592592592592596e-05, | |
| "loss": 0.4423, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.851851851851852e-05, | |
| "loss": 0.5391, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 0.4847224950790405, | |
| "eval_runtime": 7.3642, | |
| "eval_samples_per_second": 135.928, | |
| "eval_steps_per_second": 34.084, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.844444444444445e-05, | |
| "loss": 0.4785, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.837037037037037e-05, | |
| "loss": 0.4303, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 0.4781314730644226, | |
| "eval_runtime": 7.4029, | |
| "eval_samples_per_second": 135.217, | |
| "eval_steps_per_second": 33.906, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.82962962962963e-05, | |
| "loss": 0.5914, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.8222222222222225e-05, | |
| "loss": 0.4892, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 0.5442, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 0.4688411355018616, | |
| "eval_runtime": 7.5871, | |
| "eval_samples_per_second": 131.935, | |
| "eval_steps_per_second": 33.083, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.807407407407408e-05, | |
| "loss": 0.4823, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.4739, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 0.462319940328598, | |
| "eval_runtime": 7.2477, | |
| "eval_samples_per_second": 138.112, | |
| "eval_steps_per_second": 34.632, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.792592592592593e-05, | |
| "loss": 0.5542, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.7851851851851854e-05, | |
| "loss": 0.4593, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 0.4388, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 0.4547964036464691, | |
| "eval_runtime": 7.2941, | |
| "eval_samples_per_second": 137.235, | |
| "eval_steps_per_second": 34.412, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.770370370370371e-05, | |
| "loss": 0.4762, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.762962962962963e-05, | |
| "loss": 0.428, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 0.45131900906562805, | |
| "eval_runtime": 7.2669, | |
| "eval_samples_per_second": 137.748, | |
| "eval_steps_per_second": 34.54, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.755555555555556e-05, | |
| "loss": 0.4719, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.7481481481481483e-05, | |
| "loss": 0.4293, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.740740740740741e-05, | |
| "loss": 0.4112, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 0.4421839714050293, | |
| "eval_runtime": 7.3019, | |
| "eval_samples_per_second": 137.087, | |
| "eval_steps_per_second": 34.374, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.7333333333333336e-05, | |
| "loss": 0.3981, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.7259259259259266e-05, | |
| "loss": 0.501, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 0.43612053990364075, | |
| "eval_runtime": 7.1757, | |
| "eval_samples_per_second": 139.499, | |
| "eval_steps_per_second": 34.979, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.718518518518519e-05, | |
| "loss": 0.4066, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.711111111111111e-05, | |
| "loss": 0.4892, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.703703703703704e-05, | |
| "loss": 0.4584, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 0.4388324022293091, | |
| "eval_runtime": 7.2341, | |
| "eval_samples_per_second": 138.372, | |
| "eval_steps_per_second": 34.697, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.6962962962962966e-05, | |
| "loss": 0.432, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.6888888888888895e-05, | |
| "loss": 0.4717, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 0.42829033732414246, | |
| "eval_runtime": 7.261, | |
| "eval_samples_per_second": 137.86, | |
| "eval_steps_per_second": 34.568, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.681481481481482e-05, | |
| "loss": 0.399, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.674074074074074e-05, | |
| "loss": 0.4021, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.4122, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 0.4233491122722626, | |
| "eval_runtime": 7.2994, | |
| "eval_samples_per_second": 137.134, | |
| "eval_steps_per_second": 34.386, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.6592592592592595e-05, | |
| "loss": 0.4539, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.6518518518518525e-05, | |
| "loss": 0.5041, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 0.4170687794685364, | |
| "eval_runtime": 7.1967, | |
| "eval_samples_per_second": 139.092, | |
| "eval_steps_per_second": 34.877, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.644444444444445e-05, | |
| "loss": 0.5674, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.637037037037038e-05, | |
| "loss": 0.4226, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.4153, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 0.41686439514160156, | |
| "eval_runtime": 7.2526, | |
| "eval_samples_per_second": 138.019, | |
| "eval_steps_per_second": 34.608, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.6222222222222224e-05, | |
| "loss": 0.3956, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.6148148148148154e-05, | |
| "loss": 0.4524, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 0.41372087597846985, | |
| "eval_runtime": 7.2599, | |
| "eval_samples_per_second": 137.88, | |
| "eval_steps_per_second": 34.573, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.607407407407408e-05, | |
| "loss": 0.4319, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.4098, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.592592592592593e-05, | |
| "loss": 0.511, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 0.406745046377182, | |
| "eval_runtime": 7.4218, | |
| "eval_samples_per_second": 134.872, | |
| "eval_steps_per_second": 33.819, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.585185185185185e-05, | |
| "loss": 0.5086, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.577777777777778e-05, | |
| "loss": 0.4096, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 0.404786080121994, | |
| "eval_runtime": 7.39, | |
| "eval_samples_per_second": 135.453, | |
| "eval_steps_per_second": 33.965, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.5703703703703706e-05, | |
| "loss": 0.4281, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.5629629629629636e-05, | |
| "loss": 0.4779, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 0.405, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 0.40296873450279236, | |
| "eval_runtime": 7.4985, | |
| "eval_samples_per_second": 133.493, | |
| "eval_steps_per_second": 33.473, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.548148148148149e-05, | |
| "loss": 0.4144, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.540740740740741e-05, | |
| "loss": 0.3645, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 0.39786386489868164, | |
| "eval_runtime": 7.5248, | |
| "eval_samples_per_second": 133.027, | |
| "eval_steps_per_second": 33.356, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5333333333333335e-05, | |
| "loss": 0.3679, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5259259259259265e-05, | |
| "loss": 0.3724, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.518518518518519e-05, | |
| "loss": 0.4452, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 0.39415648579597473, | |
| "eval_runtime": 7.2333, | |
| "eval_samples_per_second": 138.388, | |
| "eval_steps_per_second": 34.701, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.511111111111112e-05, | |
| "loss": 0.325, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.503703703703704e-05, | |
| "loss": 0.4001, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 0.38874566555023193, | |
| "eval_runtime": 7.2318, | |
| "eval_samples_per_second": 138.417, | |
| "eval_steps_per_second": 34.708, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.496296296296297e-05, | |
| "loss": 0.3631, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.4888888888888894e-05, | |
| "loss": 0.4206, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.481481481481482e-05, | |
| "loss": 0.4616, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 0.38908329606056213, | |
| "eval_runtime": 7.2894, | |
| "eval_samples_per_second": 137.323, | |
| "eval_steps_per_second": 34.434, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.474074074074075e-05, | |
| "loss": 0.4723, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.466666666666667e-05, | |
| "loss": 0.3344, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 0.38453349471092224, | |
| "eval_runtime": 7.2247, | |
| "eval_samples_per_second": 138.552, | |
| "eval_steps_per_second": 34.742, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.4592592592592594e-05, | |
| "loss": 0.4402, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.4518518518518523e-05, | |
| "loss": 0.389, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.3899, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 0.3842224180698395, | |
| "eval_runtime": 7.3669, | |
| "eval_samples_per_second": 135.879, | |
| "eval_steps_per_second": 34.071, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.4370370370370376e-05, | |
| "loss": 0.3759, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.42962962962963e-05, | |
| "loss": 0.3662, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 0.3829396665096283, | |
| "eval_runtime": 7.3321, | |
| "eval_samples_per_second": 136.523, | |
| "eval_steps_per_second": 34.233, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.422222222222222e-05, | |
| "loss": 0.4342, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.414814814814815e-05, | |
| "loss": 0.4007, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.4074074074074076e-05, | |
| "loss": 0.3931, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 0.37909042835235596, | |
| "eval_runtime": 7.3121, | |
| "eval_samples_per_second": 136.896, | |
| "eval_steps_per_second": 34.327, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.3937, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.392592592592593e-05, | |
| "loss": 0.3655, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 0.37680765986442566, | |
| "eval_runtime": 7.2101, | |
| "eval_samples_per_second": 138.832, | |
| "eval_steps_per_second": 34.812, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.385185185185185e-05, | |
| "loss": 0.3454, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.377777777777778e-05, | |
| "loss": 0.3279, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.3703703703703705e-05, | |
| "loss": 0.4125, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 0.3775666058063507, | |
| "eval_runtime": 7.2081, | |
| "eval_samples_per_second": 138.872, | |
| "eval_steps_per_second": 34.822, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.3629629629629635e-05, | |
| "loss": 0.3324, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.355555555555556e-05, | |
| "loss": 0.3848, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 0.373475044965744, | |
| "eval_runtime": 7.1966, | |
| "eval_samples_per_second": 139.093, | |
| "eval_steps_per_second": 34.877, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.348148148148148e-05, | |
| "loss": 0.5328, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.340740740740741e-05, | |
| "loss": 0.3791, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.3907, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.3697744607925415, | |
| "eval_runtime": 7.2557, | |
| "eval_samples_per_second": 137.961, | |
| "eval_steps_per_second": 34.594, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.325925925925926e-05, | |
| "loss": 0.3595, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.318518518518519e-05, | |
| "loss": 0.4298, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 0.3690047562122345, | |
| "eval_runtime": 7.1998, | |
| "eval_samples_per_second": 139.032, | |
| "eval_steps_per_second": 34.862, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.311111111111111e-05, | |
| "loss": 0.368, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.303703703703704e-05, | |
| "loss": 0.384, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.296296296296296e-05, | |
| "loss": 0.4369, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 0.3619600534439087, | |
| "eval_runtime": 7.2818, | |
| "eval_samples_per_second": 137.467, | |
| "eval_steps_per_second": 34.47, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.2888888888888886e-05, | |
| "loss": 0.3569, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.2814814814814816e-05, | |
| "loss": 0.3332, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_loss": 0.3631249666213989, | |
| "eval_runtime": 7.2547, | |
| "eval_samples_per_second": 137.98, | |
| "eval_steps_per_second": 34.598, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.274074074074074e-05, | |
| "loss": 0.3758, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.266666666666667e-05, | |
| "loss": 0.385, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.259259259259259e-05, | |
| "loss": 0.4328, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 0.35931944847106934, | |
| "eval_runtime": 7.3342, | |
| "eval_samples_per_second": 136.484, | |
| "eval_steps_per_second": 34.223, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.2518518518518515e-05, | |
| "loss": 0.4093, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.2444444444444445e-05, | |
| "loss": 0.3752, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 0.3549325466156006, | |
| "eval_runtime": 7.1796, | |
| "eval_samples_per_second": 139.424, | |
| "eval_steps_per_second": 34.96, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.237037037037037e-05, | |
| "loss": 0.3473, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 4.22962962962963e-05, | |
| "loss": 0.3823, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.222222222222222e-05, | |
| "loss": 0.4004, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 0.3550175428390503, | |
| "eval_runtime": 7.3, | |
| "eval_samples_per_second": 137.122, | |
| "eval_steps_per_second": 34.383, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.2148148148148145e-05, | |
| "loss": 0.3484, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.2074074074074075e-05, | |
| "loss": 0.3552, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 0.3549079895019531, | |
| "eval_runtime": 7.6065, | |
| "eval_samples_per_second": 131.599, | |
| "eval_steps_per_second": 32.998, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.3847, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.192592592592593e-05, | |
| "loss": 0.3861, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.185185185185185e-05, | |
| "loss": 0.3719, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 0.3503241539001465, | |
| "eval_runtime": 7.7017, | |
| "eval_samples_per_second": 129.971, | |
| "eval_steps_per_second": 32.59, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.177777777777778e-05, | |
| "loss": 0.3661, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.1703703703703704e-05, | |
| "loss": 0.3762, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 0.3482719659805298, | |
| "eval_runtime": 7.5987, | |
| "eval_samples_per_second": 131.732, | |
| "eval_steps_per_second": 33.032, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 4.162962962962963e-05, | |
| "loss": 0.3803, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.155555555555556e-05, | |
| "loss": 0.2975, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.148148148148148e-05, | |
| "loss": 0.3397, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 0.34838753938674927, | |
| "eval_runtime": 7.5913, | |
| "eval_samples_per_second": 131.861, | |
| "eval_steps_per_second": 33.064, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.140740740740741e-05, | |
| "loss": 0.3218, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.133333333333333e-05, | |
| "loss": 0.345, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_loss": 0.3447699248790741, | |
| "eval_runtime": 7.6124, | |
| "eval_samples_per_second": 131.496, | |
| "eval_steps_per_second": 32.973, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.1259259259259256e-05, | |
| "loss": 0.3635, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.1185185185185186e-05, | |
| "loss": 0.3337, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.111111111111111e-05, | |
| "loss": 0.3892, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 0.3436849117279053, | |
| "eval_runtime": 7.6409, | |
| "eval_samples_per_second": 131.006, | |
| "eval_steps_per_second": 32.85, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.103703703703704e-05, | |
| "loss": 0.3792, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.096296296296296e-05, | |
| "loss": 0.3062, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 0.34092003107070923, | |
| "eval_runtime": 7.6331, | |
| "eval_samples_per_second": 131.14, | |
| "eval_steps_per_second": 32.883, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.088888888888889e-05, | |
| "loss": 0.3764, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.0814814814814815e-05, | |
| "loss": 0.3541, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 0.3728, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 0.3405691683292389, | |
| "eval_runtime": 7.5692, | |
| "eval_samples_per_second": 132.247, | |
| "eval_steps_per_second": 33.161, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.066666666666667e-05, | |
| "loss": 0.3013, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.059259259259259e-05, | |
| "loss": 0.321, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 0.3399699926376343, | |
| "eval_runtime": 7.588, | |
| "eval_samples_per_second": 131.919, | |
| "eval_steps_per_second": 33.079, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.051851851851852e-05, | |
| "loss": 0.3404, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.0444444444444444e-05, | |
| "loss": 0.381, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.0370370370370374e-05, | |
| "loss": 0.314, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 0.3377434313297272, | |
| "eval_runtime": 7.5595, | |
| "eval_samples_per_second": 132.417, | |
| "eval_steps_per_second": 33.203, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.02962962962963e-05, | |
| "loss": 0.3016, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.022222222222222e-05, | |
| "loss": 0.4416, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 0.33633822202682495, | |
| "eval_runtime": 7.5093, | |
| "eval_samples_per_second": 133.302, | |
| "eval_steps_per_second": 33.425, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.014814814814815e-05, | |
| "loss": 0.348, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.007407407407407e-05, | |
| "loss": 0.3685, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4e-05, | |
| "loss": 0.362, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 0.33278197050094604, | |
| "eval_runtime": 7.5483, | |
| "eval_samples_per_second": 132.612, | |
| "eval_steps_per_second": 33.252, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.9925925925925926e-05, | |
| "loss": 0.3961, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.985185185185185e-05, | |
| "loss": 0.2903, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_loss": 0.33177217841148376, | |
| "eval_runtime": 7.574, | |
| "eval_samples_per_second": 132.162, | |
| "eval_steps_per_second": 33.14, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.977777777777778e-05, | |
| "loss": 0.3527, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.97037037037037e-05, | |
| "loss": 0.3297, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.962962962962963e-05, | |
| "loss": 0.3362, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 0.3336848020553589, | |
| "eval_runtime": 7.5409, | |
| "eval_samples_per_second": 132.743, | |
| "eval_steps_per_second": 33.285, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.9555555555555556e-05, | |
| "loss": 0.3714, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.9481481481481485e-05, | |
| "loss": 0.3278, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 0.32720068097114563, | |
| "eval_runtime": 7.5879, | |
| "eval_samples_per_second": 131.921, | |
| "eval_steps_per_second": 33.079, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.940740740740741e-05, | |
| "loss": 0.3328, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.933333333333333e-05, | |
| "loss": 0.2908, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.925925925925926e-05, | |
| "loss": 0.358, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 0.3258882761001587, | |
| "eval_runtime": 7.6673, | |
| "eval_samples_per_second": 130.554, | |
| "eval_steps_per_second": 32.736, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.9185185185185185e-05, | |
| "loss": 0.4164, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.9111111111111115e-05, | |
| "loss": 0.3103, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_loss": 0.3261792063713074, | |
| "eval_runtime": 7.6042, | |
| "eval_samples_per_second": 131.637, | |
| "eval_steps_per_second": 33.008, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.903703703703704e-05, | |
| "loss": 0.4174, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.896296296296296e-05, | |
| "loss": 0.3545, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.3297, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 0.32423877716064453, | |
| "eval_runtime": 7.6588, | |
| "eval_samples_per_second": 130.7, | |
| "eval_steps_per_second": 32.773, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.8814814814814814e-05, | |
| "loss": 0.3856, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.8740740740740744e-05, | |
| "loss": 0.3598, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 0.3228550851345062, | |
| "eval_runtime": 7.6093, | |
| "eval_samples_per_second": 131.55, | |
| "eval_steps_per_second": 32.986, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.866666666666667e-05, | |
| "loss": 0.3218, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 3.85925925925926e-05, | |
| "loss": 0.3577, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.851851851851852e-05, | |
| "loss": 0.3343, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 0.32196611166000366, | |
| "eval_runtime": 7.6896, | |
| "eval_samples_per_second": 130.175, | |
| "eval_steps_per_second": 32.641, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.844444444444444e-05, | |
| "loss": 0.2857, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.837037037037037e-05, | |
| "loss": 0.3324, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 0.32020455598831177, | |
| "eval_runtime": 7.512, | |
| "eval_samples_per_second": 133.254, | |
| "eval_steps_per_second": 33.413, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 3.8296296296296296e-05, | |
| "loss": 0.3791, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.8222222222222226e-05, | |
| "loss": 0.3563, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.814814814814815e-05, | |
| "loss": 0.3744, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 0.31763964891433716, | |
| "eval_runtime": 7.5475, | |
| "eval_samples_per_second": 132.626, | |
| "eval_steps_per_second": 33.256, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.807407407407408e-05, | |
| "loss": 0.3689, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.3385, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 0.3172718584537506, | |
| "eval_runtime": 7.4165, | |
| "eval_samples_per_second": 134.969, | |
| "eval_steps_per_second": 33.843, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.7925925925925925e-05, | |
| "loss": 0.3142, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.7851851851851855e-05, | |
| "loss": 0.369, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.777777777777778e-05, | |
| "loss": 0.3264, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 0.31627902388572693, | |
| "eval_runtime": 7.4047, | |
| "eval_samples_per_second": 135.184, | |
| "eval_steps_per_second": 33.897, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.770370370370371e-05, | |
| "loss": 0.3481, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.762962962962963e-05, | |
| "loss": 0.3162, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 0.31443050503730774, | |
| "eval_runtime": 7.2703, | |
| "eval_samples_per_second": 137.683, | |
| "eval_steps_per_second": 34.524, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.7555555555555554e-05, | |
| "loss": 0.3458, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.7481481481481484e-05, | |
| "loss": 0.3641, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.740740740740741e-05, | |
| "loss": 0.3399, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 0.3139602839946747, | |
| "eval_runtime": 7.5549, | |
| "eval_samples_per_second": 132.496, | |
| "eval_steps_per_second": 33.223, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.733333333333334e-05, | |
| "loss": 0.3082, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.725925925925926e-05, | |
| "loss": 0.3544, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 0.3116389811038971, | |
| "eval_runtime": 7.3628, | |
| "eval_samples_per_second": 135.955, | |
| "eval_steps_per_second": 34.091, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.718518518518519e-05, | |
| "loss": 0.3356, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.7111111111111113e-05, | |
| "loss": 0.383, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.3839, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_loss": 0.3124999403953552, | |
| "eval_runtime": 7.3086, | |
| "eval_samples_per_second": 136.962, | |
| "eval_steps_per_second": 34.343, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 3.6962962962962966e-05, | |
| "loss": 0.3378, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.688888888888889e-05, | |
| "loss": 0.3034, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_loss": 0.31177276372909546, | |
| "eval_runtime": 7.2443, | |
| "eval_samples_per_second": 138.177, | |
| "eval_steps_per_second": 34.648, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.681481481481482e-05, | |
| "loss": 0.3384, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.674074074074074e-05, | |
| "loss": 0.3401, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.2989, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.3089355528354645, | |
| "eval_runtime": 7.1728, | |
| "eval_samples_per_second": 139.556, | |
| "eval_steps_per_second": 34.993, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 3.6592592592592596e-05, | |
| "loss": 0.3398, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.651851851851852e-05, | |
| "loss": 0.3055, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 0.30923065543174744, | |
| "eval_runtime": 7.208, | |
| "eval_samples_per_second": 138.874, | |
| "eval_steps_per_second": 34.822, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.644444444444445e-05, | |
| "loss": 0.3088, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.637037037037037e-05, | |
| "loss": 0.3016, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 3.62962962962963e-05, | |
| "loss": 0.3143, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 0.3072822690010071, | |
| "eval_runtime": 7.2053, | |
| "eval_samples_per_second": 138.926, | |
| "eval_steps_per_second": 34.836, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.6222222222222225e-05, | |
| "loss": 0.2985, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 3.614814814814815e-05, | |
| "loss": 0.3805, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 0.3062894940376282, | |
| "eval_runtime": 7.2009, | |
| "eval_samples_per_second": 139.01, | |
| "eval_steps_per_second": 34.857, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.607407407407408e-05, | |
| "loss": 0.2831, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.3258, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.592592592592593e-05, | |
| "loss": 0.2625, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 0.30678972601890564, | |
| "eval_runtime": 7.1985, | |
| "eval_samples_per_second": 139.057, | |
| "eval_steps_per_second": 34.869, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.5851851851851854e-05, | |
| "loss": 0.2923, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.577777777777778e-05, | |
| "loss": 0.3276, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_loss": 0.30508390069007874, | |
| "eval_runtime": 7.2926, | |
| "eval_samples_per_second": 137.262, | |
| "eval_steps_per_second": 34.418, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.570370370370371e-05, | |
| "loss": 0.3521, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.562962962962963e-05, | |
| "loss": 0.283, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 0.3364, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 0.3016437888145447, | |
| "eval_runtime": 7.3704, | |
| "eval_samples_per_second": 135.814, | |
| "eval_steps_per_second": 34.055, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.548148148148148e-05, | |
| "loss": 0.2882, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.540740740740741e-05, | |
| "loss": 0.3353, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 0.30082637071609497, | |
| "eval_runtime": 7.6161, | |
| "eval_samples_per_second": 131.432, | |
| "eval_steps_per_second": 32.957, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.5333333333333336e-05, | |
| "loss": 0.2675, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.525925925925926e-05, | |
| "loss": 0.3182, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.518518518518519e-05, | |
| "loss": 0.2932, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_loss": 0.2999679744243622, | |
| "eval_runtime": 7.5872, | |
| "eval_samples_per_second": 131.933, | |
| "eval_steps_per_second": 33.082, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.511111111111111e-05, | |
| "loss": 0.3029, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.503703703703704e-05, | |
| "loss": 0.2434, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 0.2994518280029297, | |
| "eval_runtime": 7.5416, | |
| "eval_samples_per_second": 132.731, | |
| "eval_steps_per_second": 33.282, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.4962962962962965e-05, | |
| "loss": 0.3236, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.4888888888888895e-05, | |
| "loss": 0.2967, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.481481481481482e-05, | |
| "loss": 0.3572, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_loss": 0.29705023765563965, | |
| "eval_runtime": 7.5563, | |
| "eval_samples_per_second": 132.473, | |
| "eval_steps_per_second": 33.218, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.474074074074074e-05, | |
| "loss": 0.3384, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.466666666666667e-05, | |
| "loss": 0.2816, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 0.29650408029556274, | |
| "eval_runtime": 7.3924, | |
| "eval_samples_per_second": 135.409, | |
| "eval_steps_per_second": 33.954, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.4592592592592594e-05, | |
| "loss": 0.3361, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.4518518518518524e-05, | |
| "loss": 0.3125, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.444444444444445e-05, | |
| "loss": 0.3801, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 0.2950206696987152, | |
| "eval_runtime": 7.3773, | |
| "eval_samples_per_second": 135.687, | |
| "eval_steps_per_second": 34.023, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.437037037037037e-05, | |
| "loss": 0.2684, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.42962962962963e-05, | |
| "loss": 0.3387, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 0.29483914375305176, | |
| "eval_runtime": 7.5911, | |
| "eval_samples_per_second": 131.865, | |
| "eval_steps_per_second": 33.065, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.4222222222222224e-05, | |
| "loss": 0.3394, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.4148148148148153e-05, | |
| "loss": 0.2852, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.4074074074074077e-05, | |
| "loss": 0.2903, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 0.2979108989238739, | |
| "eval_runtime": 7.666, | |
| "eval_samples_per_second": 130.576, | |
| "eval_steps_per_second": 32.742, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 0.3144, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.392592592592593e-05, | |
| "loss": 0.3238, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 0.2956538200378418, | |
| "eval_runtime": 7.6479, | |
| "eval_samples_per_second": 130.886, | |
| "eval_steps_per_second": 32.82, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.385185185185185e-05, | |
| "loss": 0.2848, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.377777777777778e-05, | |
| "loss": 0.2742, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.3703703703703706e-05, | |
| "loss": 0.3392, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_loss": 0.29324308037757874, | |
| "eval_runtime": 7.6322, | |
| "eval_samples_per_second": 131.155, | |
| "eval_steps_per_second": 32.887, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.3629629629629636e-05, | |
| "loss": 0.2332, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.355555555555556e-05, | |
| "loss": 0.2754, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 0.29224133491516113, | |
| "eval_runtime": 7.5764, | |
| "eval_samples_per_second": 132.12, | |
| "eval_steps_per_second": 33.129, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.348148148148148e-05, | |
| "loss": 0.2778, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.340740740740741e-05, | |
| "loss": 0.2646, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.3542, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.2917466461658478, | |
| "eval_runtime": 7.5812, | |
| "eval_samples_per_second": 132.038, | |
| "eval_steps_per_second": 33.108, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.3259259259259265e-05, | |
| "loss": 0.3091, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.318518518518519e-05, | |
| "loss": 0.314, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_loss": 0.29272007942199707, | |
| "eval_runtime": 7.5219, | |
| "eval_samples_per_second": 133.079, | |
| "eval_steps_per_second": 33.369, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.311111111111112e-05, | |
| "loss": 0.269, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.303703703703704e-05, | |
| "loss": 0.2504, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 3.2962962962962964e-05, | |
| "loss": 0.3201, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 0.29086679220199585, | |
| "eval_runtime": 7.5125, | |
| "eval_samples_per_second": 133.245, | |
| "eval_steps_per_second": 33.411, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.2888888888888894e-05, | |
| "loss": 0.2967, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.281481481481482e-05, | |
| "loss": 0.2799, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 0.28955137729644775, | |
| "eval_runtime": 7.3165, | |
| "eval_samples_per_second": 136.814, | |
| "eval_steps_per_second": 34.306, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.274074074074075e-05, | |
| "loss": 0.2535, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.266666666666667e-05, | |
| "loss": 0.2763, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 3.25925925925926e-05, | |
| "loss": 0.2356, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_loss": 0.2900914251804352, | |
| "eval_runtime": 7.3036, | |
| "eval_samples_per_second": 137.055, | |
| "eval_steps_per_second": 34.366, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.251851851851852e-05, | |
| "loss": 0.3099, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 3.2444444444444446e-05, | |
| "loss": 0.271, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 0.28874123096466064, | |
| "eval_runtime": 7.2862, | |
| "eval_samples_per_second": 137.382, | |
| "eval_steps_per_second": 34.448, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.2370370370370376e-05, | |
| "loss": 0.32, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.22962962962963e-05, | |
| "loss": 0.28, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.222222222222223e-05, | |
| "loss": 0.2246, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_loss": 0.2897484004497528, | |
| "eval_runtime": 7.2105, | |
| "eval_samples_per_second": 138.825, | |
| "eval_steps_per_second": 34.81, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 3.214814814814815e-05, | |
| "loss": 0.3076, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.2074074074074075e-05, | |
| "loss": 0.2824, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 0.2881883680820465, | |
| "eval_runtime": 7.5403, | |
| "eval_samples_per_second": 132.752, | |
| "eval_steps_per_second": 33.288, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.3322, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.192592592592593e-05, | |
| "loss": 0.2299, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.185185185185185e-05, | |
| "loss": 0.2516, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 0.287548303604126, | |
| "eval_runtime": 7.3579, | |
| "eval_samples_per_second": 136.045, | |
| "eval_steps_per_second": 34.113, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 3.177777777777778e-05, | |
| "loss": 0.2652, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.1703703703703705e-05, | |
| "loss": 0.2465, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_loss": 0.2856321632862091, | |
| "eval_runtime": 7.7527, | |
| "eval_samples_per_second": 129.116, | |
| "eval_steps_per_second": 32.376, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 3.1629629629629634e-05, | |
| "loss": 0.301, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.155555555555556e-05, | |
| "loss": 0.1987, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.148148148148148e-05, | |
| "loss": 0.3417, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 0.2860187292098999, | |
| "eval_runtime": 7.5819, | |
| "eval_samples_per_second": 132.024, | |
| "eval_steps_per_second": 33.105, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.140740740740741e-05, | |
| "loss": 0.2464, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.1333333333333334e-05, | |
| "loss": 0.2418, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 0.28458064794540405, | |
| "eval_runtime": 7.684, | |
| "eval_samples_per_second": 130.271, | |
| "eval_steps_per_second": 32.665, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 3.1259259259259264e-05, | |
| "loss": 0.2884, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.118518518518519e-05, | |
| "loss": 0.2396, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.111111111111111e-05, | |
| "loss": 0.2625, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 0.28442564606666565, | |
| "eval_runtime": 7.6251, | |
| "eval_samples_per_second": 131.277, | |
| "eval_steps_per_second": 32.918, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.103703703703704e-05, | |
| "loss": 0.2286, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 3.096296296296296e-05, | |
| "loss": 0.3023, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 0.28357136249542236, | |
| "eval_runtime": 7.6709, | |
| "eval_samples_per_second": 130.493, | |
| "eval_steps_per_second": 32.721, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.088888888888889e-05, | |
| "loss": 0.2631, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3.0814814814814816e-05, | |
| "loss": 0.2656, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.074074074074074e-05, | |
| "loss": 0.2301, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 0.2838016450405121, | |
| "eval_runtime": 7.4972, | |
| "eval_samples_per_second": 133.516, | |
| "eval_steps_per_second": 33.479, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.066666666666667e-05, | |
| "loss": 0.2708, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.059259259259259e-05, | |
| "loss": 0.2638, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 0.2819526791572571, | |
| "eval_runtime": 7.5414, | |
| "eval_samples_per_second": 132.734, | |
| "eval_steps_per_second": 33.283, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.0518518518518515e-05, | |
| "loss": 0.2786, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 3.044444444444445e-05, | |
| "loss": 0.3328, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.037037037037037e-05, | |
| "loss": 0.2835, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 0.2829004228115082, | |
| "eval_runtime": 7.3311, | |
| "eval_samples_per_second": 136.541, | |
| "eval_steps_per_second": 34.238, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.02962962962963e-05, | |
| "loss": 0.3523, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0222222222222225e-05, | |
| "loss": 0.2512, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_loss": 0.28273966908454895, | |
| "eval_runtime": 7.3473, | |
| "eval_samples_per_second": 136.24, | |
| "eval_steps_per_second": 34.162, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 3.0148148148148148e-05, | |
| "loss": 0.2382, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3.0074074074074078e-05, | |
| "loss": 0.2668, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 3e-05, | |
| "loss": 0.2472, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 0.28208473324775696, | |
| "eval_runtime": 7.2579, | |
| "eval_samples_per_second": 137.92, | |
| "eval_steps_per_second": 34.583, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.992592592592593e-05, | |
| "loss": 0.2332, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9851851851851854e-05, | |
| "loss": 0.266, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_loss": 0.281426340341568, | |
| "eval_runtime": 7.2593, | |
| "eval_samples_per_second": 137.892, | |
| "eval_steps_per_second": 34.576, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.9777777777777777e-05, | |
| "loss": 0.2908, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.9703703703703707e-05, | |
| "loss": 0.2299, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.2284, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 0.2816203832626343, | |
| "eval_runtime": 7.2174, | |
| "eval_samples_per_second": 138.694, | |
| "eval_steps_per_second": 34.777, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.955555555555556e-05, | |
| "loss": 0.2821, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.9481481481481483e-05, | |
| "loss": 0.3079, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_loss": 0.2785053551197052, | |
| "eval_runtime": 7.3, | |
| "eval_samples_per_second": 137.124, | |
| "eval_steps_per_second": 34.384, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.9407407407407413e-05, | |
| "loss": 0.1826, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.9333333333333336e-05, | |
| "loss": 0.2438, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 2.925925925925926e-05, | |
| "loss": 0.2014, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 0.2803582549095154, | |
| "eval_runtime": 7.196, | |
| "eval_samples_per_second": 139.106, | |
| "eval_steps_per_second": 34.881, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.918518518518519e-05, | |
| "loss": 0.2979, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9111111111111112e-05, | |
| "loss": 0.2223, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 0.27921053767204285, | |
| "eval_runtime": 7.2996, | |
| "eval_samples_per_second": 137.132, | |
| "eval_steps_per_second": 34.386, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.9037037037037042e-05, | |
| "loss": 0.2515, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 2.8962962962962965e-05, | |
| "loss": 0.2771, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.8888888888888888e-05, | |
| "loss": 0.2611, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 0.2792360186576843, | |
| "eval_runtime": 7.1956, | |
| "eval_samples_per_second": 139.112, | |
| "eval_steps_per_second": 34.882, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 2.8814814814814818e-05, | |
| "loss": 0.2867, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.874074074074074e-05, | |
| "loss": 0.2492, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 0.2788338363170624, | |
| "eval_runtime": 7.2868, | |
| "eval_samples_per_second": 137.371, | |
| "eval_steps_per_second": 34.446, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8666666666666668e-05, | |
| "loss": 0.26, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8592592592592594e-05, | |
| "loss": 0.3693, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.851851851851852e-05, | |
| "loss": 0.2686, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 0.27813464403152466, | |
| "eval_runtime": 7.1901, | |
| "eval_samples_per_second": 139.219, | |
| "eval_steps_per_second": 34.909, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 2.8444444444444447e-05, | |
| "loss": 0.2091, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.837037037037037e-05, | |
| "loss": 0.2701, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 0.27804136276245117, | |
| "eval_runtime": 7.2533, | |
| "eval_samples_per_second": 138.006, | |
| "eval_steps_per_second": 34.605, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8296296296296297e-05, | |
| "loss": 0.2795, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.8222222222222223e-05, | |
| "loss": 0.3346, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 2.814814814814815e-05, | |
| "loss": 0.2254, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 0.27507713437080383, | |
| "eval_runtime": 7.2159, | |
| "eval_samples_per_second": 138.721, | |
| "eval_steps_per_second": 34.784, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.8074074074074076e-05, | |
| "loss": 0.2507, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 0.2942, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 0.2757761776447296, | |
| "eval_runtime": 7.2423, | |
| "eval_samples_per_second": 138.216, | |
| "eval_steps_per_second": 34.658, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.7925925925925926e-05, | |
| "loss": 0.2678, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7851851851851853e-05, | |
| "loss": 0.2459, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.2446, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 0.2748652994632721, | |
| "eval_runtime": 7.2844, | |
| "eval_samples_per_second": 137.416, | |
| "eval_steps_per_second": 34.457, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7703703703703706e-05, | |
| "loss": 0.2278, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.7629629629629632e-05, | |
| "loss": 0.2337, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 0.2740112841129303, | |
| "eval_runtime": 7.3832, | |
| "eval_samples_per_second": 135.578, | |
| "eval_steps_per_second": 33.996, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7555555555555555e-05, | |
| "loss": 0.2335, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7481481481481482e-05, | |
| "loss": 0.2023, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.7407407407407408e-05, | |
| "loss": 0.2166, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 0.27378755807876587, | |
| "eval_runtime": 7.4184, | |
| "eval_samples_per_second": 134.934, | |
| "eval_steps_per_second": 33.835, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.733333333333333e-05, | |
| "loss": 0.2161, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 2.725925925925926e-05, | |
| "loss": 0.3003, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_loss": 0.2731979191303253, | |
| "eval_runtime": 13.3786, | |
| "eval_samples_per_second": 74.821, | |
| "eval_steps_per_second": 18.761, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7185185185185184e-05, | |
| "loss": 0.2498, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 2.7111111111111114e-05, | |
| "loss": 0.2749, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.7037037037037037e-05, | |
| "loss": 0.2422, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 0.27196845412254333, | |
| "eval_runtime": 7.5816, | |
| "eval_samples_per_second": 132.03, | |
| "eval_steps_per_second": 33.107, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.696296296296296e-05, | |
| "loss": 0.2468, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.688888888888889e-05, | |
| "loss": 0.263, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_loss": 0.2727610766887665, | |
| "eval_runtime": 7.6161, | |
| "eval_samples_per_second": 131.432, | |
| "eval_steps_per_second": 32.956, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 2.6814814814814814e-05, | |
| "loss": 0.2436, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6740740740740743e-05, | |
| "loss": 0.27, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.2462, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 0.2711770832538605, | |
| "eval_runtime": 7.4596, | |
| "eval_samples_per_second": 134.19, | |
| "eval_steps_per_second": 33.648, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.659259259259259e-05, | |
| "loss": 0.3066, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.651851851851852e-05, | |
| "loss": 0.2262, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 0.27057451009750366, | |
| "eval_runtime": 7.4675, | |
| "eval_samples_per_second": 134.047, | |
| "eval_steps_per_second": 33.612, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 2.6444444444444443e-05, | |
| "loss": 0.2187, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.6370370370370373e-05, | |
| "loss": 0.2992, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 2.6296296296296296e-05, | |
| "loss": 0.3181, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 0.27069294452667236, | |
| "eval_runtime": 7.2871, | |
| "eval_samples_per_second": 137.367, | |
| "eval_steps_per_second": 34.445, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.6222222222222226e-05, | |
| "loss": 0.275, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.614814814814815e-05, | |
| "loss": 0.2337, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_loss": 0.2696068286895752, | |
| "eval_runtime": 7.3083, | |
| "eval_samples_per_second": 136.967, | |
| "eval_steps_per_second": 34.344, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.6074074074074072e-05, | |
| "loss": 0.2596, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.2511, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 0.2223, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_loss": 0.26948779821395874, | |
| "eval_runtime": 7.2594, | |
| "eval_samples_per_second": 137.89, | |
| "eval_steps_per_second": 34.576, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5851851851851855e-05, | |
| "loss": 0.266, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5777777777777778e-05, | |
| "loss": 0.2594, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_loss": 0.2680164873600006, | |
| "eval_runtime": 7.3879, | |
| "eval_samples_per_second": 135.492, | |
| "eval_steps_per_second": 33.975, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.5703703703703708e-05, | |
| "loss": 0.2623, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.562962962962963e-05, | |
| "loss": 0.2028, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5555555555555554e-05, | |
| "loss": 0.2226, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 0.27027028799057007, | |
| "eval_runtime": 7.2794, | |
| "eval_samples_per_second": 137.512, | |
| "eval_steps_per_second": 34.481, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5481481481481484e-05, | |
| "loss": 0.211, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5407407407407407e-05, | |
| "loss": 0.2037, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_loss": 0.2683195471763611, | |
| "eval_runtime": 7.3164, | |
| "eval_samples_per_second": 136.815, | |
| "eval_steps_per_second": 34.306, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5333333333333337e-05, | |
| "loss": 0.1988, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.525925925925926e-05, | |
| "loss": 0.2262, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5185185185185183e-05, | |
| "loss": 0.2538, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 0.26811686158180237, | |
| "eval_runtime": 7.2097, | |
| "eval_samples_per_second": 138.841, | |
| "eval_steps_per_second": 34.814, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 2.5111111111111113e-05, | |
| "loss": 0.2029, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.5037037037037036e-05, | |
| "loss": 0.2094, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 0.269042432308197, | |
| "eval_runtime": 7.2591, | |
| "eval_samples_per_second": 137.896, | |
| "eval_steps_per_second": 34.577, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.4962962962962963e-05, | |
| "loss": 0.195, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.488888888888889e-05, | |
| "loss": 0.2639, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 2.4814814814814816e-05, | |
| "loss": 0.2591, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 0.26725488901138306, | |
| "eval_runtime": 7.2092, | |
| "eval_samples_per_second": 138.851, | |
| "eval_steps_per_second": 34.817, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4740740740740742e-05, | |
| "loss": 0.2043, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.466666666666667e-05, | |
| "loss": 0.2274, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 0.2678578794002533, | |
| "eval_runtime": 7.2753, | |
| "eval_samples_per_second": 137.588, | |
| "eval_steps_per_second": 34.5, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4592592592592595e-05, | |
| "loss": 0.2352, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.451851851851852e-05, | |
| "loss": 0.2365, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 2.4444444444444445e-05, | |
| "loss": 0.2526, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 0.266437292098999, | |
| "eval_runtime": 7.2076, | |
| "eval_samples_per_second": 138.88, | |
| "eval_steps_per_second": 34.824, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.437037037037037e-05, | |
| "loss": 0.1861, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 2.4296296296296298e-05, | |
| "loss": 0.2659, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 0.265876829624176, | |
| "eval_runtime": 7.5122, | |
| "eval_samples_per_second": 133.25, | |
| "eval_steps_per_second": 33.412, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4222222222222224e-05, | |
| "loss": 0.2159, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.414814814814815e-05, | |
| "loss": 0.2809, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.4074074074074074e-05, | |
| "loss": 0.2144, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 0.2642614245414734, | |
| "eval_runtime": 7.468, | |
| "eval_samples_per_second": 134.039, | |
| "eval_steps_per_second": 33.61, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.2122, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.3925925925925927e-05, | |
| "loss": 0.2432, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 0.2640276551246643, | |
| "eval_runtime": 7.5832, | |
| "eval_samples_per_second": 132.003, | |
| "eval_steps_per_second": 33.1, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.3851851851851854e-05, | |
| "loss": 0.2441, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.377777777777778e-05, | |
| "loss": 0.2647, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3703703703703707e-05, | |
| "loss": 0.2852, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_loss": 0.26369205117225647, | |
| "eval_runtime": 7.3907, | |
| "eval_samples_per_second": 135.44, | |
| "eval_steps_per_second": 33.962, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.3629629629629633e-05, | |
| "loss": 0.2321, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3555555555555556e-05, | |
| "loss": 0.3375, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 0.2633427381515503, | |
| "eval_runtime": 7.3881, | |
| "eval_samples_per_second": 135.489, | |
| "eval_steps_per_second": 33.974, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 2.3481481481481483e-05, | |
| "loss": 0.2219, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.340740740740741e-05, | |
| "loss": 0.2004, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.272, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.26376578211784363, | |
| "eval_runtime": 7.2236, | |
| "eval_samples_per_second": 138.574, | |
| "eval_steps_per_second": 34.747, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3259259259259262e-05, | |
| "loss": 0.2887, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.318518518518519e-05, | |
| "loss": 0.2502, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 0.2631487250328064, | |
| "eval_runtime": 7.2851, | |
| "eval_samples_per_second": 137.403, | |
| "eval_steps_per_second": 34.454, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 2.3111111111111112e-05, | |
| "loss": 0.206, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.303703703703704e-05, | |
| "loss": 0.2368, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.2962962962962965e-05, | |
| "loss": 0.2131, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 0.2649364173412323, | |
| "eval_runtime": 7.1872, | |
| "eval_samples_per_second": 139.275, | |
| "eval_steps_per_second": 34.923, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.288888888888889e-05, | |
| "loss": 0.2137, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 2.2814814814814818e-05, | |
| "loss": 0.2313, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_loss": 0.2636246979236603, | |
| "eval_runtime": 7.2744, | |
| "eval_samples_per_second": 137.606, | |
| "eval_steps_per_second": 34.505, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2740740740740744e-05, | |
| "loss": 0.239, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2666666666666668e-05, | |
| "loss": 0.2354, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 2.2592592592592594e-05, | |
| "loss": 0.1919, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 0.262579083442688, | |
| "eval_runtime": 7.2981, | |
| "eval_samples_per_second": 137.159, | |
| "eval_steps_per_second": 34.392, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.251851851851852e-05, | |
| "loss": 0.2836, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.2444444444444447e-05, | |
| "loss": 0.2761, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 0.26134076714515686, | |
| "eval_runtime": 7.3981, | |
| "eval_samples_per_second": 135.305, | |
| "eval_steps_per_second": 33.928, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2370370370370374e-05, | |
| "loss": 0.1932, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 2.2296296296296297e-05, | |
| "loss": 0.2248, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.2151, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 0.26156380772590637, | |
| "eval_runtime": 7.2684, | |
| "eval_samples_per_second": 137.72, | |
| "eval_steps_per_second": 34.533, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.214814814814815e-05, | |
| "loss": 0.2614, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.2074074074074076e-05, | |
| "loss": 0.2725, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 0.26058655977249146, | |
| "eval_runtime": 7.2639, | |
| "eval_samples_per_second": 137.804, | |
| "eval_steps_per_second": 34.554, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.2402, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 2.1925925925925926e-05, | |
| "loss": 0.2283, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.1851851851851852e-05, | |
| "loss": 0.2552, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_loss": 0.26024872064590454, | |
| "eval_runtime": 7.191, | |
| "eval_samples_per_second": 139.201, | |
| "eval_steps_per_second": 34.905, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 2.177777777777778e-05, | |
| "loss": 0.2615, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.1703703703703705e-05, | |
| "loss": 0.2094, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 0.2611374258995056, | |
| "eval_runtime": 7.1864, | |
| "eval_samples_per_second": 139.291, | |
| "eval_steps_per_second": 34.927, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.162962962962963e-05, | |
| "loss": 0.2267, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.1555555555555555e-05, | |
| "loss": 0.2807, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 2.148148148148148e-05, | |
| "loss": 0.2948, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_loss": 0.25876420736312866, | |
| "eval_runtime": 7.2079, | |
| "eval_samples_per_second": 138.875, | |
| "eval_steps_per_second": 34.823, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1407407407407408e-05, | |
| "loss": 0.261, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1333333333333335e-05, | |
| "loss": 0.3131, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_loss": 0.25907066464424133, | |
| "eval_runtime": 7.2187, | |
| "eval_samples_per_second": 138.668, | |
| "eval_steps_per_second": 34.771, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1259259259259258e-05, | |
| "loss": 0.2667, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.1185185185185184e-05, | |
| "loss": 0.216, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.111111111111111e-05, | |
| "loss": 0.237, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 0.2582587003707886, | |
| "eval_runtime": 8.5503, | |
| "eval_samples_per_second": 117.072, | |
| "eval_steps_per_second": 29.356, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.1037037037037037e-05, | |
| "loss": 0.3102, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 2.0962962962962964e-05, | |
| "loss": 0.2385, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_loss": 0.2583344876766205, | |
| "eval_runtime": 7.2821, | |
| "eval_samples_per_second": 137.461, | |
| "eval_steps_per_second": 34.468, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.088888888888889e-05, | |
| "loss": 0.3235, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0814814814814813e-05, | |
| "loss": 0.2154, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.074074074074074e-05, | |
| "loss": 0.1842, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 0.2580818831920624, | |
| "eval_runtime": 7.213, | |
| "eval_samples_per_second": 138.777, | |
| "eval_steps_per_second": 34.798, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0666666666666666e-05, | |
| "loss": 0.2051, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.0592592592592593e-05, | |
| "loss": 0.2291, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_loss": 0.25752562284469604, | |
| "eval_runtime": 7.2125, | |
| "eval_samples_per_second": 138.787, | |
| "eval_steps_per_second": 34.801, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.051851851851852e-05, | |
| "loss": 0.1973, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0444444444444446e-05, | |
| "loss": 0.2864, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.037037037037037e-05, | |
| "loss": 0.2124, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_loss": 0.2576400637626648, | |
| "eval_runtime": 7.2072, | |
| "eval_samples_per_second": 138.89, | |
| "eval_steps_per_second": 34.827, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.0296296296296296e-05, | |
| "loss": 0.3026, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.0222222222222222e-05, | |
| "loss": 0.2713, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 0.256246954202652, | |
| "eval_runtime": 7.2163, | |
| "eval_samples_per_second": 138.714, | |
| "eval_steps_per_second": 34.782, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.014814814814815e-05, | |
| "loss": 0.2531, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2.0074074074074075e-05, | |
| "loss": 0.2241, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 2e-05, | |
| "loss": 0.2111, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 0.25542324781417847, | |
| "eval_runtime": 7.2106, | |
| "eval_samples_per_second": 138.823, | |
| "eval_steps_per_second": 34.81, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.9925925925925925e-05, | |
| "loss": 0.2535, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.985185185185185e-05, | |
| "loss": 0.2385, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_loss": 0.2562381625175476, | |
| "eval_runtime": 7.1846, | |
| "eval_samples_per_second": 139.325, | |
| "eval_steps_per_second": 34.936, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9777777777777778e-05, | |
| "loss": 0.212, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9703703703703704e-05, | |
| "loss": 0.2094, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.962962962962963e-05, | |
| "loss": 0.2499, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 0.2557750344276428, | |
| "eval_runtime": 7.2707, | |
| "eval_samples_per_second": 137.676, | |
| "eval_steps_per_second": 34.522, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.9555555555555557e-05, | |
| "loss": 0.2507, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.948148148148148e-05, | |
| "loss": 0.233, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 0.25592681765556335, | |
| "eval_runtime": 7.3242, | |
| "eval_samples_per_second": 136.671, | |
| "eval_steps_per_second": 34.27, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9407407407407407e-05, | |
| "loss": 0.2772, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.9333333333333333e-05, | |
| "loss": 0.2487, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.925925925925926e-05, | |
| "loss": 0.2538, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_loss": 0.25490960478782654, | |
| "eval_runtime": 7.3569, | |
| "eval_samples_per_second": 136.062, | |
| "eval_steps_per_second": 34.117, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9185185185185186e-05, | |
| "loss": 0.2681, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.9111111111111113e-05, | |
| "loss": 0.3042, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 0.2556719183921814, | |
| "eval_runtime": 7.4839, | |
| "eval_samples_per_second": 133.753, | |
| "eval_steps_per_second": 33.539, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.903703703703704e-05, | |
| "loss": 0.2017, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 1.8962962962962963e-05, | |
| "loss": 0.2162, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 0.1989, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_loss": 0.25471413135528564, | |
| "eval_runtime": 7.4955, | |
| "eval_samples_per_second": 133.547, | |
| "eval_steps_per_second": 33.487, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.8814814814814816e-05, | |
| "loss": 0.2227, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8740740740740742e-05, | |
| "loss": 0.198, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 0.2542085349559784, | |
| "eval_runtime": 7.5713, | |
| "eval_samples_per_second": 132.211, | |
| "eval_steps_per_second": 33.152, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.866666666666667e-05, | |
| "loss": 0.2839, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.8592592592592595e-05, | |
| "loss": 0.2969, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.2634, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 0.25297510623931885, | |
| "eval_runtime": 7.6201, | |
| "eval_samples_per_second": 131.363, | |
| "eval_steps_per_second": 32.939, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8444444444444445e-05, | |
| "loss": 0.2182, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.837037037037037e-05, | |
| "loss": 0.2684, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 0.2529478371143341, | |
| "eval_runtime": 7.6013, | |
| "eval_samples_per_second": 131.689, | |
| "eval_steps_per_second": 33.021, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.8296296296296298e-05, | |
| "loss": 0.302, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.8222222222222224e-05, | |
| "loss": 0.195, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.814814814814815e-05, | |
| "loss": 0.1844, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_loss": 0.2522367835044861, | |
| "eval_runtime": 7.3231, | |
| "eval_samples_per_second": 136.692, | |
| "eval_steps_per_second": 34.275, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8074074074074074e-05, | |
| "loss": 0.2901, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.2303, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 0.252410352230072, | |
| "eval_runtime": 7.3587, | |
| "eval_samples_per_second": 136.029, | |
| "eval_steps_per_second": 34.109, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.7925925925925927e-05, | |
| "loss": 0.3308, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.7851851851851853e-05, | |
| "loss": 0.2441, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 0.2126, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_loss": 0.25146955251693726, | |
| "eval_runtime": 7.3139, | |
| "eval_samples_per_second": 136.863, | |
| "eval_steps_per_second": 34.318, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7703703703703706e-05, | |
| "loss": 0.2338, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.762962962962963e-05, | |
| "loss": 0.2278, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_loss": 0.2519494593143463, | |
| "eval_runtime": 7.2536, | |
| "eval_samples_per_second": 138.0, | |
| "eval_steps_per_second": 34.604, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7555555555555556e-05, | |
| "loss": 0.2033, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.7481481481481483e-05, | |
| "loss": 0.2524, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.740740740740741e-05, | |
| "loss": 0.2692, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_loss": 0.25108450651168823, | |
| "eval_runtime": 7.1825, | |
| "eval_samples_per_second": 139.366, | |
| "eval_steps_per_second": 34.946, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7333333333333336e-05, | |
| "loss": 0.2222, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.7259259259259262e-05, | |
| "loss": 0.2398, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_loss": 0.25103849172592163, | |
| "eval_runtime": 7.1987, | |
| "eval_samples_per_second": 139.053, | |
| "eval_steps_per_second": 34.867, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7185185185185185e-05, | |
| "loss": 0.2295, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.7111111111111112e-05, | |
| "loss": 0.2153, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.7037037037037038e-05, | |
| "loss": 0.2799, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 0.25079530477523804, | |
| "eval_runtime": 7.4048, | |
| "eval_samples_per_second": 135.183, | |
| "eval_steps_per_second": 33.897, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.6962962962962965e-05, | |
| "loss": 0.2143, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.688888888888889e-05, | |
| "loss": 0.2492, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_loss": 0.25007081031799316, | |
| "eval_runtime": 7.3873, | |
| "eval_samples_per_second": 135.504, | |
| "eval_steps_per_second": 33.977, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6814814814814818e-05, | |
| "loss": 0.2657, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.674074074074074e-05, | |
| "loss": 0.2207, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.2202, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.24966831505298615, | |
| "eval_runtime": 7.3428, | |
| "eval_samples_per_second": 136.324, | |
| "eval_steps_per_second": 34.183, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6592592592592594e-05, | |
| "loss": 0.2467, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.651851851851852e-05, | |
| "loss": 0.1736, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 0.25051918625831604, | |
| "eval_runtime": 7.2975, | |
| "eval_samples_per_second": 137.171, | |
| "eval_steps_per_second": 34.395, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.6444444444444447e-05, | |
| "loss": 0.1836, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6370370370370374e-05, | |
| "loss": 0.1811, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.62962962962963e-05, | |
| "loss": 0.204, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_loss": 0.25177687406539917, | |
| "eval_runtime": 7.1922, | |
| "eval_samples_per_second": 139.179, | |
| "eval_steps_per_second": 34.899, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6222222222222223e-05, | |
| "loss": 0.22, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.614814814814815e-05, | |
| "loss": 0.1853, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_loss": 0.25192761421203613, | |
| "eval_runtime": 7.1908, | |
| "eval_samples_per_second": 139.205, | |
| "eval_steps_per_second": 34.906, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6074074074074076e-05, | |
| "loss": 0.1916, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.2385, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.5925925925925926e-05, | |
| "loss": 0.1834, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 0.25096383690834045, | |
| "eval_runtime": 7.214, | |
| "eval_samples_per_second": 138.757, | |
| "eval_steps_per_second": 34.793, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5851851851851852e-05, | |
| "loss": 0.3046, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.577777777777778e-05, | |
| "loss": 0.2285, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_loss": 0.251299113035202, | |
| "eval_runtime": 7.2235, | |
| "eval_samples_per_second": 138.575, | |
| "eval_steps_per_second": 34.748, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5703703703703705e-05, | |
| "loss": 0.213, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.5629629629629632e-05, | |
| "loss": 0.1222, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5555555555555555e-05, | |
| "loss": 0.179, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_loss": 0.25189557671546936, | |
| "eval_runtime": 7.2744, | |
| "eval_samples_per_second": 137.607, | |
| "eval_steps_per_second": 34.505, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.548148148148148e-05, | |
| "loss": 0.1749, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5407407407407408e-05, | |
| "loss": 0.2163, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_loss": 0.25133654475212097, | |
| "eval_runtime": 7.2066, | |
| "eval_samples_per_second": 138.901, | |
| "eval_steps_per_second": 34.829, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5333333333333334e-05, | |
| "loss": 0.1791, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5259259259259258e-05, | |
| "loss": 0.1868, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5185185185185186e-05, | |
| "loss": 0.2034, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 0.251429945230484, | |
| "eval_runtime": 7.2815, | |
| "eval_samples_per_second": 137.471, | |
| "eval_steps_per_second": 34.471, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5111111111111112e-05, | |
| "loss": 0.1877, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.5037037037037039e-05, | |
| "loss": 0.2275, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_loss": 0.25081896781921387, | |
| "eval_runtime": 7.1988, | |
| "eval_samples_per_second": 139.051, | |
| "eval_steps_per_second": 34.867, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.4962962962962965e-05, | |
| "loss": 0.2512, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4888888888888888e-05, | |
| "loss": 0.1883, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.2328, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 0.2509821653366089, | |
| "eval_runtime": 9.4138, | |
| "eval_samples_per_second": 106.334, | |
| "eval_steps_per_second": 26.663, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4740740740740741e-05, | |
| "loss": 0.2146, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4666666666666668e-05, | |
| "loss": 0.1935, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_loss": 0.2518753111362457, | |
| "eval_runtime": 7.4318, | |
| "eval_samples_per_second": 134.691, | |
| "eval_steps_per_second": 33.774, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4592592592592594e-05, | |
| "loss": 0.2174, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4518518518518521e-05, | |
| "loss": 0.1887, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4444444444444444e-05, | |
| "loss": 0.1613, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_loss": 0.251521497964859, | |
| "eval_runtime": 7.26, | |
| "eval_samples_per_second": 137.88, | |
| "eval_steps_per_second": 34.573, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.437037037037037e-05, | |
| "loss": 0.2027, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4296296296296297e-05, | |
| "loss": 0.2451, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_loss": 0.2503267526626587, | |
| "eval_runtime": 7.228, | |
| "eval_samples_per_second": 138.488, | |
| "eval_steps_per_second": 34.726, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4222222222222224e-05, | |
| "loss": 0.199, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4148148148148148e-05, | |
| "loss": 0.1797, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.4074074074074075e-05, | |
| "loss": 0.199, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_loss": 0.2507803440093994, | |
| "eval_runtime": 7.2959, | |
| "eval_samples_per_second": 137.2, | |
| "eval_steps_per_second": 34.403, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 0.2138, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.3925925925925926e-05, | |
| "loss": 0.2486, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_loss": 0.25041449069976807, | |
| "eval_runtime": 7.2127, | |
| "eval_samples_per_second": 138.783, | |
| "eval_steps_per_second": 34.8, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3851851851851853e-05, | |
| "loss": 0.1906, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3777777777777778e-05, | |
| "loss": 0.2375, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3703703703703704e-05, | |
| "loss": 0.2124, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_loss": 0.2496342658996582, | |
| "eval_runtime": 7.2877, | |
| "eval_samples_per_second": 137.354, | |
| "eval_steps_per_second": 34.441, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.362962962962963e-05, | |
| "loss": 0.1827, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3555555555555557e-05, | |
| "loss": 0.2032, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_loss": 0.2495114803314209, | |
| "eval_runtime": 7.1954, | |
| "eval_samples_per_second": 139.117, | |
| "eval_steps_per_second": 34.883, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.348148148148148e-05, | |
| "loss": 0.133, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3407407407407407e-05, | |
| "loss": 0.2068, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.1772, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 0.25019142031669617, | |
| "eval_runtime": 7.291, | |
| "eval_samples_per_second": 137.293, | |
| "eval_steps_per_second": 34.426, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.325925925925926e-05, | |
| "loss": 0.213, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3185185185185186e-05, | |
| "loss": 0.1879, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_loss": 0.24890127778053284, | |
| "eval_runtime": 7.2295, | |
| "eval_samples_per_second": 138.46, | |
| "eval_steps_per_second": 34.719, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3111111111111113e-05, | |
| "loss": 0.1829, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.3037037037037036e-05, | |
| "loss": 0.211, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.2962962962962962e-05, | |
| "loss": 0.1554, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_loss": 0.24963602423667908, | |
| "eval_runtime": 7.2892, | |
| "eval_samples_per_second": 137.326, | |
| "eval_steps_per_second": 34.434, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2888888888888889e-05, | |
| "loss": 0.1922, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2814814814814815e-05, | |
| "loss": 0.2178, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_loss": 0.24953505396842957, | |
| "eval_runtime": 7.1973, | |
| "eval_samples_per_second": 139.08, | |
| "eval_steps_per_second": 34.874, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2740740740740742e-05, | |
| "loss": 0.1909, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2666666666666668e-05, | |
| "loss": 0.2111, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2592592592592592e-05, | |
| "loss": 0.1936, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_loss": 0.249254047870636, | |
| "eval_runtime": 7.2689, | |
| "eval_samples_per_second": 137.711, | |
| "eval_steps_per_second": 34.531, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2518518518518518e-05, | |
| "loss": 0.1913, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2444444444444445e-05, | |
| "loss": 0.2, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 0.2488778978586197, | |
| "eval_runtime": 7.2184, | |
| "eval_samples_per_second": 138.674, | |
| "eval_steps_per_second": 34.772, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2370370370370371e-05, | |
| "loss": 0.1723, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2296296296296298e-05, | |
| "loss": 0.1875, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2222222222222222e-05, | |
| "loss": 0.185, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_loss": 0.24892283976078033, | |
| "eval_runtime": 7.3812, | |
| "eval_samples_per_second": 135.615, | |
| "eval_steps_per_second": 34.005, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2148148148148149e-05, | |
| "loss": 0.2168, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.2074074074074075e-05, | |
| "loss": 0.1783, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_loss": 0.2482217699289322, | |
| "eval_runtime": 7.3518, | |
| "eval_samples_per_second": 136.157, | |
| "eval_steps_per_second": 34.141, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.1643, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.1925925925925927e-05, | |
| "loss": 0.2211, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1851851851851853e-05, | |
| "loss": 0.2276, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 0.2477390617132187, | |
| "eval_runtime": 7.5029, | |
| "eval_samples_per_second": 133.414, | |
| "eval_steps_per_second": 33.454, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1777777777777778e-05, | |
| "loss": 0.1951, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1703703703703705e-05, | |
| "loss": 0.2134, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 0.24737687408924103, | |
| "eval_runtime": 7.4753, | |
| "eval_samples_per_second": 133.907, | |
| "eval_steps_per_second": 33.577, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1629629629629631e-05, | |
| "loss": 0.2208, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1555555555555556e-05, | |
| "loss": 0.1714, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1481481481481482e-05, | |
| "loss": 0.1747, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_loss": 0.2470199465751648, | |
| "eval_runtime": 7.6413, | |
| "eval_samples_per_second": 130.999, | |
| "eval_steps_per_second": 32.848, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1407407407407409e-05, | |
| "loss": 0.209, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1333333333333334e-05, | |
| "loss": 0.2121, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 0.2458607703447342, | |
| "eval_runtime": 7.515, | |
| "eval_samples_per_second": 133.2, | |
| "eval_steps_per_second": 33.4, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.125925925925926e-05, | |
| "loss": 0.2266, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1185185185185187e-05, | |
| "loss": 0.1754, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.2199, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_loss": 0.2461249679327011, | |
| "eval_runtime": 7.5406, | |
| "eval_samples_per_second": 132.748, | |
| "eval_steps_per_second": 33.286, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1037037037037038e-05, | |
| "loss": 0.2609, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0962962962962963e-05, | |
| "loss": 0.2708, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_loss": 0.24557095766067505, | |
| "eval_runtime": 7.3992, | |
| "eval_samples_per_second": 135.286, | |
| "eval_steps_per_second": 33.923, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.088888888888889e-05, | |
| "loss": 0.1986, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.0814814814814814e-05, | |
| "loss": 0.1937, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.074074074074074e-05, | |
| "loss": 0.2252, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_loss": 0.24577473104000092, | |
| "eval_runtime": 7.388, | |
| "eval_samples_per_second": 135.49, | |
| "eval_steps_per_second": 33.974, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0666666666666667e-05, | |
| "loss": 0.1642, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0592592592592592e-05, | |
| "loss": 0.1921, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_loss": 0.24629971385002136, | |
| "eval_runtime": 7.2504, | |
| "eval_samples_per_second": 138.061, | |
| "eval_steps_per_second": 34.619, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0518518518518519e-05, | |
| "loss": 0.1795, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0444444444444445e-05, | |
| "loss": 0.2051, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.037037037037037e-05, | |
| "loss": 0.1627, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_loss": 0.24658331274986267, | |
| "eval_runtime": 7.6185, | |
| "eval_samples_per_second": 131.39, | |
| "eval_steps_per_second": 32.946, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0296296296296296e-05, | |
| "loss": 0.1746, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0222222222222223e-05, | |
| "loss": 0.1988, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 0.24601979553699493, | |
| "eval_runtime": 7.5137, | |
| "eval_samples_per_second": 133.223, | |
| "eval_steps_per_second": 33.406, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0148148148148148e-05, | |
| "loss": 0.1994, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1.0074074074074074e-05, | |
| "loss": 0.191, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2308, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_loss": 0.24538284540176392, | |
| "eval_runtime": 7.6411, | |
| "eval_samples_per_second": 131.002, | |
| "eval_steps_per_second": 32.849, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 9.925925925925926e-06, | |
| "loss": 0.2331, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.851851851851852e-06, | |
| "loss": 0.138, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_loss": 0.24550172686576843, | |
| "eval_runtime": 7.6397, | |
| "eval_samples_per_second": 131.026, | |
| "eval_steps_per_second": 32.855, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.777777777777779e-06, | |
| "loss": 0.2266, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.703703703703703e-06, | |
| "loss": 0.1693, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.1745, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_loss": 0.2455550730228424, | |
| "eval_runtime": 7.6397, | |
| "eval_samples_per_second": 131.025, | |
| "eval_steps_per_second": 32.855, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.555555555555556e-06, | |
| "loss": 0.1472, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.481481481481481e-06, | |
| "loss": 0.1712, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_loss": 0.24605458974838257, | |
| "eval_runtime": 8.1344, | |
| "eval_samples_per_second": 123.058, | |
| "eval_steps_per_second": 30.857, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.407407407407408e-06, | |
| "loss": 0.1755, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.1984, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.1763, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_loss": 0.24583406746387482, | |
| "eval_runtime": 10.3194, | |
| "eval_samples_per_second": 97.002, | |
| "eval_steps_per_second": 24.323, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.185185185185186e-06, | |
| "loss": 0.2261, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.111111111111112e-06, | |
| "loss": 0.2068, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 0.24605007469654083, | |
| "eval_runtime": 7.6128, | |
| "eval_samples_per_second": 131.489, | |
| "eval_steps_per_second": 32.971, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.037037037037037e-06, | |
| "loss": 0.2097, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 8.962962962962963e-06, | |
| "loss": 0.1733, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.88888888888889e-06, | |
| "loss": 0.2097, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_loss": 0.24533824622631073, | |
| "eval_runtime": 7.6087, | |
| "eval_samples_per_second": 131.559, | |
| "eval_steps_per_second": 32.988, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.814814814814815e-06, | |
| "loss": 0.1896, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.740740740740741e-06, | |
| "loss": 0.1945, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 0.24509920179843903, | |
| "eval_runtime": 7.6048, | |
| "eval_samples_per_second": 131.627, | |
| "eval_steps_per_second": 33.005, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 0.2036, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.592592592592593e-06, | |
| "loss": 0.1933, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.518518518518519e-06, | |
| "loss": 0.1906, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_loss": 0.24449166655540466, | |
| "eval_runtime": 7.5807, | |
| "eval_samples_per_second": 132.047, | |
| "eval_steps_per_second": 33.111, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.444444444444446e-06, | |
| "loss": 0.1655, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.37037037037037e-06, | |
| "loss": 0.1681, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_loss": 0.24523867666721344, | |
| "eval_runtime": 7.5355, | |
| "eval_samples_per_second": 132.839, | |
| "eval_steps_per_second": 33.309, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.296296296296297e-06, | |
| "loss": 0.202, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.222222222222223e-06, | |
| "loss": 0.1795, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.14814814814815e-06, | |
| "loss": 0.1732, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 0.24490933120250702, | |
| "eval_runtime": 7.4917, | |
| "eval_samples_per_second": 133.614, | |
| "eval_steps_per_second": 33.504, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.074074074074075e-06, | |
| "loss": 0.1765, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.2129, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_loss": 0.2450021654367447, | |
| "eval_runtime": 7.5776, | |
| "eval_samples_per_second": 132.099, | |
| "eval_steps_per_second": 33.124, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.925925925925926e-06, | |
| "loss": 0.239, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.851851851851853e-06, | |
| "loss": 0.153, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.777777777777777e-06, | |
| "loss": 0.1749, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_loss": 0.244710773229599, | |
| "eval_runtime": 7.2355, | |
| "eval_samples_per_second": 138.346, | |
| "eval_steps_per_second": 34.69, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.703703703703704e-06, | |
| "loss": 0.2436, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.629629629629629e-06, | |
| "loss": 0.217, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_loss": 0.24449826776981354, | |
| "eval_runtime": 7.2691, | |
| "eval_samples_per_second": 137.706, | |
| "eval_steps_per_second": 34.53, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.555555555555556e-06, | |
| "loss": 0.1999, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.481481481481483e-06, | |
| "loss": 0.1981, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.2359, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_loss": 0.24463185667991638, | |
| "eval_runtime": 7.1779, | |
| "eval_samples_per_second": 139.456, | |
| "eval_steps_per_second": 34.968, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.333333333333334e-06, | |
| "loss": 0.1824, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.2592592592592605e-06, | |
| "loss": 0.1813, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 0.24394886195659637, | |
| "eval_runtime": 7.21, | |
| "eval_samples_per_second": 138.835, | |
| "eval_steps_per_second": 34.813, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.185185185185185e-06, | |
| "loss": 0.2108, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.111111111111112e-06, | |
| "loss": 0.2403, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 0.1771, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_loss": 0.2437148541212082, | |
| "eval_runtime": 7.3132, | |
| "eval_samples_per_second": 136.876, | |
| "eval_steps_per_second": 34.322, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 6.962962962962963e-06, | |
| "loss": 0.2451, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.888888888888889e-06, | |
| "loss": 0.2009, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_loss": 0.2434249371290207, | |
| "eval_runtime": 7.2848, | |
| "eval_samples_per_second": 137.41, | |
| "eval_steps_per_second": 34.455, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.814814814814815e-06, | |
| "loss": 0.1734, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.74074074074074e-06, | |
| "loss": 0.2106, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.2255, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_loss": 0.24379944801330566, | |
| "eval_runtime": 7.3588, | |
| "eval_samples_per_second": 136.028, | |
| "eval_steps_per_second": 34.109, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.592592592592593e-06, | |
| "loss": 0.2042, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.518518518518518e-06, | |
| "loss": 0.1645, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_loss": 0.24349051713943481, | |
| "eval_runtime": 7.3319, | |
| "eval_samples_per_second": 136.527, | |
| "eval_steps_per_second": 34.234, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.4444444444444445e-06, | |
| "loss": 0.1971, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.370370370370371e-06, | |
| "loss": 0.1939, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.296296296296296e-06, | |
| "loss": 0.2087, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 0.24342414736747742, | |
| "eval_runtime": 7.3018, | |
| "eval_samples_per_second": 137.09, | |
| "eval_steps_per_second": 34.375, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.222222222222222e-06, | |
| "loss": 0.2373, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.148148148148149e-06, | |
| "loss": 0.2125, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_loss": 0.24332143366336823, | |
| "eval_runtime": 7.221, | |
| "eval_samples_per_second": 138.623, | |
| "eval_steps_per_second": 34.76, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.0740740740740745e-06, | |
| "loss": 0.2188, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6e-06, | |
| "loss": 0.1756, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5.925925925925927e-06, | |
| "loss": 0.2484, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_loss": 0.24283772706985474, | |
| "eval_runtime": 7.2081, | |
| "eval_samples_per_second": 138.871, | |
| "eval_steps_per_second": 34.822, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.851851851851852e-06, | |
| "loss": 0.201, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.777777777777778e-06, | |
| "loss": 0.1724, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 0.24253526329994202, | |
| "eval_runtime": 7.2139, | |
| "eval_samples_per_second": 138.76, | |
| "eval_steps_per_second": 34.794, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.7037037037037045e-06, | |
| "loss": 0.2071, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.62962962962963e-06, | |
| "loss": 0.2026, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.18, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_loss": 0.2427457869052887, | |
| "eval_runtime": 7.24, | |
| "eval_samples_per_second": 138.259, | |
| "eval_steps_per_second": 34.668, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.4814814814814815e-06, | |
| "loss": 0.2491, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.407407407407407e-06, | |
| "loss": 0.1878, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_loss": 0.2427491545677185, | |
| "eval_runtime": 7.2356, | |
| "eval_samples_per_second": 138.343, | |
| "eval_steps_per_second": 34.689, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 0.196, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.259259259259259e-06, | |
| "loss": 0.1861, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 0.2131, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 0.24262972176074982, | |
| "eval_runtime": 7.2135, | |
| "eval_samples_per_second": 138.768, | |
| "eval_steps_per_second": 34.796, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.1111111111111115e-06, | |
| "loss": 0.1904, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 5.037037037037037e-06, | |
| "loss": 0.1677, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_loss": 0.24258430302143097, | |
| "eval_runtime": 7.38, | |
| "eval_samples_per_second": 135.638, | |
| "eval_steps_per_second": 34.011, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.962962962962963e-06, | |
| "loss": 0.1728, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.888888888888889e-06, | |
| "loss": 0.2153, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.814814814814815e-06, | |
| "loss": 0.2097, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 0.2426908016204834, | |
| "eval_runtime": 7.3472, | |
| "eval_samples_per_second": 136.243, | |
| "eval_steps_per_second": 34.163, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.740740740740741e-06, | |
| "loss": 0.1584, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.2014, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 0.24265140295028687, | |
| "eval_runtime": 7.3411, | |
| "eval_samples_per_second": 136.355, | |
| "eval_steps_per_second": 34.191, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.592592592592593e-06, | |
| "loss": 0.2504, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.5185185185185185e-06, | |
| "loss": 0.207, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.444444444444445e-06, | |
| "loss": 0.1777, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_loss": 0.24221929907798767, | |
| "eval_runtime": 7.2165, | |
| "eval_samples_per_second": 138.709, | |
| "eval_steps_per_second": 34.781, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.370370370370371e-06, | |
| "loss": 0.1982, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.296296296296296e-06, | |
| "loss": 0.189, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_loss": 0.24199625849723816, | |
| "eval_runtime": 7.2328, | |
| "eval_samples_per_second": 138.397, | |
| "eval_steps_per_second": 34.703, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.222222222222223e-06, | |
| "loss": 0.1895, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.1481481481481485e-06, | |
| "loss": 0.2303, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.074074074074075e-06, | |
| "loss": 0.2499, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_loss": 0.24183855950832367, | |
| "eval_runtime": 7.2222, | |
| "eval_samples_per_second": 138.6, | |
| "eval_steps_per_second": 34.754, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.2105, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.925925925925926e-06, | |
| "loss": 0.1827, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_loss": 0.24157124757766724, | |
| "eval_runtime": 7.2228, | |
| "eval_samples_per_second": 138.589, | |
| "eval_steps_per_second": 34.751, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.851851851851852e-06, | |
| "loss": 0.1625, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.777777777777778e-06, | |
| "loss": 0.2046, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.1872, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_loss": 0.24152511358261108, | |
| "eval_runtime": 7.2189, | |
| "eval_samples_per_second": 138.665, | |
| "eval_steps_per_second": 34.77, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.6296296296296302e-06, | |
| "loss": 0.1476, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.555555555555556e-06, | |
| "loss": 0.1948, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_loss": 0.24153129756450653, | |
| "eval_runtime": 7.2338, | |
| "eval_samples_per_second": 138.379, | |
| "eval_steps_per_second": 34.698, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.4814814814814816e-06, | |
| "loss": 0.184, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.4074074074074077e-06, | |
| "loss": 0.2911, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.2382, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_loss": 0.24131891131401062, | |
| "eval_runtime": 7.204, | |
| "eval_samples_per_second": 138.95, | |
| "eval_steps_per_second": 34.842, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.259259259259259e-06, | |
| "loss": 0.2274, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.1851851851851855e-06, | |
| "loss": 0.2495, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_loss": 0.24129566550254822, | |
| "eval_runtime": 7.2128, | |
| "eval_samples_per_second": 138.78, | |
| "eval_steps_per_second": 34.799, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.111111111111111e-06, | |
| "loss": 0.1895, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.0370370370370372e-06, | |
| "loss": 0.2067, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.9629629629629633e-06, | |
| "loss": 0.2258, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 0.24125269055366516, | |
| "eval_runtime": 7.2125, | |
| "eval_samples_per_second": 138.787, | |
| "eval_steps_per_second": 34.801, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.888888888888889e-06, | |
| "loss": 0.1759, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.814814814814815e-06, | |
| "loss": 0.1618, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_loss": 0.24155820906162262, | |
| "eval_runtime": 7.2268, | |
| "eval_samples_per_second": 138.511, | |
| "eval_steps_per_second": 34.732, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.7407407407407407e-06, | |
| "loss": 0.1782, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.1813, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.5925925925925925e-06, | |
| "loss": 0.1605, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_loss": 0.24137118458747864, | |
| "eval_runtime": 7.2587, | |
| "eval_samples_per_second": 137.903, | |
| "eval_steps_per_second": 34.579, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.5185185185185186e-06, | |
| "loss": 0.1751, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.4444444444444447e-06, | |
| "loss": 0.1712, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_loss": 0.24129529297351837, | |
| "eval_runtime": 7.385, | |
| "eval_samples_per_second": 135.545, | |
| "eval_steps_per_second": 33.988, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.3703703703703703e-06, | |
| "loss": 0.2067, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.2962962962962964e-06, | |
| "loss": 0.177, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.2222222222222225e-06, | |
| "loss": 0.2021, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_loss": 0.24113218486309052, | |
| "eval_runtime": 7.1916, | |
| "eval_samples_per_second": 139.191, | |
| "eval_steps_per_second": 34.902, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.148148148148148e-06, | |
| "loss": 0.1895, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.0740740740740742e-06, | |
| "loss": 0.1985, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 0.24104492366313934, | |
| "eval_runtime": 7.2162, | |
| "eval_samples_per_second": 138.715, | |
| "eval_steps_per_second": 34.783, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.1742, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.925925925925926e-06, | |
| "loss": 0.2035, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.1773, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_loss": 0.24099375307559967, | |
| "eval_runtime": 7.2337, | |
| "eval_samples_per_second": 138.38, | |
| "eval_steps_per_second": 34.699, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.777777777777778e-06, | |
| "loss": 0.1872, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.7037037037037038e-06, | |
| "loss": 0.2152, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_loss": 0.24097038805484772, | |
| "eval_runtime": 7.2148, | |
| "eval_samples_per_second": 138.743, | |
| "eval_steps_per_second": 34.79, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.6296296296296295e-06, | |
| "loss": 0.2588, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5555555555555556e-06, | |
| "loss": 0.2228, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.4814814814814817e-06, | |
| "loss": 0.2225, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 0.24102109670639038, | |
| "eval_runtime": 7.2724, | |
| "eval_samples_per_second": 137.644, | |
| "eval_steps_per_second": 34.514, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.4074074074074075e-06, | |
| "loss": 0.2303, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.1393, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_loss": 0.24098366498947144, | |
| "eval_runtime": 7.2615, | |
| "eval_samples_per_second": 137.849, | |
| "eval_steps_per_second": 34.566, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.2592592592592593e-06, | |
| "loss": 0.1581, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1851851851851852e-06, | |
| "loss": 0.2093, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1111111111111112e-06, | |
| "loss": 0.1631, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_loss": 0.24084864556789398, | |
| "eval_runtime": 7.2231, | |
| "eval_samples_per_second": 138.583, | |
| "eval_steps_per_second": 34.75, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0370370370370371e-06, | |
| "loss": 0.2028, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 9.62962962962963e-07, | |
| "loss": 0.2158, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_loss": 0.24080170691013336, | |
| "eval_runtime": 7.2315, | |
| "eval_samples_per_second": 138.423, | |
| "eval_steps_per_second": 34.709, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.88888888888889e-07, | |
| "loss": 0.1775, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.148148148148147e-07, | |
| "loss": 0.1827, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.407407407407408e-07, | |
| "loss": 0.218, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 0.24078905582427979, | |
| "eval_runtime": 7.2318, | |
| "eval_samples_per_second": 138.417, | |
| "eval_steps_per_second": 34.708, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.1736, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 5.925925925925926e-07, | |
| "loss": 0.1654, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_loss": 0.24077662825584412, | |
| "eval_runtime": 7.2025, | |
| "eval_samples_per_second": 138.98, | |
| "eval_steps_per_second": 34.849, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.185185185185186e-07, | |
| "loss": 0.1993, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.444444444444445e-07, | |
| "loss": 0.1852, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.703703703703704e-07, | |
| "loss": 0.1955, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 0.24076078832149506, | |
| "eval_runtime": 7.1929, | |
| "eval_samples_per_second": 139.165, | |
| "eval_steps_per_second": 34.895, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.962962962962963e-07, | |
| "loss": 0.1923, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 2.2222222222222224e-07, | |
| "loss": 0.2142, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_loss": 0.24076122045516968, | |
| "eval_runtime": 7.1938, | |
| "eval_samples_per_second": 139.147, | |
| "eval_steps_per_second": 34.891, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.4814814814814815e-07, | |
| "loss": 0.2464, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 7.407407407407407e-08, | |
| "loss": 0.2322, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.1781, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.24075740575790405, | |
| "eval_runtime": 7.2037, | |
| "eval_samples_per_second": 138.956, | |
| "eval_steps_per_second": 34.843, | |
| "step": 6750 | |
| } | |
| ], | |
| "max_steps": 6750, | |
| "num_train_epochs": 3, | |
| "total_flos": 915255853056000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |