| { |
| "best_metric": 0.24075740575790405, |
| "best_model_checkpoint": "./results/checkpoint-6750", |
| "epoch": 3.0, |
| "global_step": 6750, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9992592592592596e-05, |
| "loss": 6.945, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9925925925925926e-05, |
| "loss": 1.4118, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9851851851851855e-05, |
| "loss": 0.7118, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.01, |
| "eval_loss": 0.6554884910583496, |
| "eval_runtime": 6.9152, |
| "eval_samples_per_second": 144.754, |
| "eval_steps_per_second": 36.297, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.977777777777778e-05, |
| "loss": 0.8383, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.970370370370371e-05, |
| "loss": 0.6623, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.962962962962963e-05, |
| "loss": 0.6282, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_loss": 0.5736271739006042, |
| "eval_runtime": 6.8557, |
| "eval_samples_per_second": 146.01, |
| "eval_steps_per_second": 36.612, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.955555555555556e-05, |
| "loss": 0.5446, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.9481481481481485e-05, |
| "loss": 0.5983, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 0.5498507022857666, |
| "eval_runtime": 6.8866, |
| "eval_samples_per_second": 145.356, |
| "eval_steps_per_second": 36.448, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.940740740740741e-05, |
| "loss": 0.5406, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.933333333333334e-05, |
| "loss": 0.618, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.925925925925926e-05, |
| "loss": 0.6346, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04, |
| "eval_loss": 0.5268338918685913, |
| "eval_runtime": 6.9999, |
| "eval_samples_per_second": 143.001, |
| "eval_steps_per_second": 35.857, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.918518518518519e-05, |
| "loss": 0.53, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9111111111111114e-05, |
| "loss": 0.4268, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06, |
| "eval_loss": 0.5140772461891174, |
| "eval_runtime": 7.002, |
| "eval_samples_per_second": 142.959, |
| "eval_steps_per_second": 35.847, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.903703703703704e-05, |
| "loss": 0.5474, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 4.896296296296297e-05, |
| "loss": 0.5773, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.888888888888889e-05, |
| "loss": 0.5611, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_loss": 0.5012452006340027, |
| "eval_runtime": 7.2224, |
| "eval_samples_per_second": 138.597, |
| "eval_steps_per_second": 34.753, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.881481481481482e-05, |
| "loss": 0.5421, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.874074074074074e-05, |
| "loss": 0.5056, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08, |
| "eval_loss": 0.492546945810318, |
| "eval_runtime": 7.468, |
| "eval_samples_per_second": 134.038, |
| "eval_steps_per_second": 33.61, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.866666666666667e-05, |
| "loss": 0.5086, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.8592592592592596e-05, |
| "loss": 0.4423, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.851851851851852e-05, |
| "loss": 0.5391, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_loss": 0.4847224950790405, |
| "eval_runtime": 7.3642, |
| "eval_samples_per_second": 135.928, |
| "eval_steps_per_second": 34.084, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.844444444444445e-05, |
| "loss": 0.4785, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.837037037037037e-05, |
| "loss": 0.4303, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_loss": 0.4781314730644226, |
| "eval_runtime": 7.4029, |
| "eval_samples_per_second": 135.217, |
| "eval_steps_per_second": 33.906, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.82962962962963e-05, |
| "loss": 0.5914, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.8222222222222225e-05, |
| "loss": 0.4892, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.814814814814815e-05, |
| "loss": 0.5442, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11, |
| "eval_loss": 0.4688411355018616, |
| "eval_runtime": 7.5871, |
| "eval_samples_per_second": 131.935, |
| "eval_steps_per_second": 33.083, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.807407407407408e-05, |
| "loss": 0.4823, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.8e-05, |
| "loss": 0.4739, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_loss": 0.462319940328598, |
| "eval_runtime": 7.2477, |
| "eval_samples_per_second": 138.112, |
| "eval_steps_per_second": 34.632, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.792592592592593e-05, |
| "loss": 0.5542, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.7851851851851854e-05, |
| "loss": 0.4593, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.7777777777777784e-05, |
| "loss": 0.4388, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13, |
| "eval_loss": 0.4547964036464691, |
| "eval_runtime": 7.2941, |
| "eval_samples_per_second": 137.235, |
| "eval_steps_per_second": 34.412, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.770370370370371e-05, |
| "loss": 0.4762, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.762962962962963e-05, |
| "loss": 0.428, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_loss": 0.45131900906562805, |
| "eval_runtime": 7.2669, |
| "eval_samples_per_second": 137.748, |
| "eval_steps_per_second": 34.54, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.755555555555556e-05, |
| "loss": 0.4719, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.7481481481481483e-05, |
| "loss": 0.4293, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.740740740740741e-05, |
| "loss": 0.4112, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_loss": 0.4421839714050293, |
| "eval_runtime": 7.3019, |
| "eval_samples_per_second": 137.087, |
| "eval_steps_per_second": 34.374, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.7333333333333336e-05, |
| "loss": 0.3981, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.7259259259259266e-05, |
| "loss": 0.501, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.17, |
| "eval_loss": 0.43612053990364075, |
| "eval_runtime": 7.1757, |
| "eval_samples_per_second": 139.499, |
| "eval_steps_per_second": 34.979, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.718518518518519e-05, |
| "loss": 0.4066, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.711111111111111e-05, |
| "loss": 0.4892, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.703703703703704e-05, |
| "loss": 0.4584, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.18, |
| "eval_loss": 0.4388324022293091, |
| "eval_runtime": 7.2341, |
| "eval_samples_per_second": 138.372, |
| "eval_steps_per_second": 34.697, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.6962962962962966e-05, |
| "loss": 0.432, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.6888888888888895e-05, |
| "loss": 0.4717, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_loss": 0.42829033732414246, |
| "eval_runtime": 7.261, |
| "eval_samples_per_second": 137.86, |
| "eval_steps_per_second": 34.568, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.681481481481482e-05, |
| "loss": 0.399, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.674074074074074e-05, |
| "loss": 0.4021, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.666666666666667e-05, |
| "loss": 0.4122, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2, |
| "eval_loss": 0.4233491122722626, |
| "eval_runtime": 7.2994, |
| "eval_samples_per_second": 137.134, |
| "eval_steps_per_second": 34.386, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.6592592592592595e-05, |
| "loss": 0.4539, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.6518518518518525e-05, |
| "loss": 0.5041, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_loss": 0.4170687794685364, |
| "eval_runtime": 7.1967, |
| "eval_samples_per_second": 139.092, |
| "eval_steps_per_second": 34.877, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 4.644444444444445e-05, |
| "loss": 0.5674, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.637037037037038e-05, |
| "loss": 0.4226, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.62962962962963e-05, |
| "loss": 0.4153, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22, |
| "eval_loss": 0.41686439514160156, |
| "eval_runtime": 7.2526, |
| "eval_samples_per_second": 138.019, |
| "eval_steps_per_second": 34.608, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.6222222222222224e-05, |
| "loss": 0.3956, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.6148148148148154e-05, |
| "loss": 0.4524, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.23, |
| "eval_loss": 0.41372087597846985, |
| "eval_runtime": 7.2599, |
| "eval_samples_per_second": 137.88, |
| "eval_steps_per_second": 34.573, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.607407407407408e-05, |
| "loss": 0.4319, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.4098, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.592592592592593e-05, |
| "loss": 0.511, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_loss": 0.406745046377182, |
| "eval_runtime": 7.4218, |
| "eval_samples_per_second": 134.872, |
| "eval_steps_per_second": 33.819, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.585185185185185e-05, |
| "loss": 0.5086, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.577777777777778e-05, |
| "loss": 0.4096, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_loss": 0.404786080121994, |
| "eval_runtime": 7.39, |
| "eval_samples_per_second": 135.453, |
| "eval_steps_per_second": 33.965, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.5703703703703706e-05, |
| "loss": 0.4281, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.5629629629629636e-05, |
| "loss": 0.4779, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.555555555555556e-05, |
| "loss": 0.405, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.27, |
| "eval_loss": 0.40296873450279236, |
| "eval_runtime": 7.4985, |
| "eval_samples_per_second": 133.493, |
| "eval_steps_per_second": 33.473, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.548148148148149e-05, |
| "loss": 0.4144, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.540740740740741e-05, |
| "loss": 0.3645, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.28, |
| "eval_loss": 0.39786386489868164, |
| "eval_runtime": 7.5248, |
| "eval_samples_per_second": 133.027, |
| "eval_steps_per_second": 33.356, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.5333333333333335e-05, |
| "loss": 0.3679, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.5259259259259265e-05, |
| "loss": 0.3724, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.518518518518519e-05, |
| "loss": 0.4452, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.29, |
| "eval_loss": 0.39415648579597473, |
| "eval_runtime": 7.2333, |
| "eval_samples_per_second": 138.388, |
| "eval_steps_per_second": 34.701, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.511111111111112e-05, |
| "loss": 0.325, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.503703703703704e-05, |
| "loss": 0.4001, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3, |
| "eval_loss": 0.38874566555023193, |
| "eval_runtime": 7.2318, |
| "eval_samples_per_second": 138.417, |
| "eval_steps_per_second": 34.708, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.496296296296297e-05, |
| "loss": 0.3631, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.4888888888888894e-05, |
| "loss": 0.4206, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.481481481481482e-05, |
| "loss": 0.4616, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_loss": 0.38908329606056213, |
| "eval_runtime": 7.2894, |
| "eval_samples_per_second": 137.323, |
| "eval_steps_per_second": 34.434, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.474074074074075e-05, |
| "loss": 0.4723, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.466666666666667e-05, |
| "loss": 0.3344, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.32, |
| "eval_loss": 0.38453349471092224, |
| "eval_runtime": 7.2247, |
| "eval_samples_per_second": 138.552, |
| "eval_steps_per_second": 34.742, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.4592592592592594e-05, |
| "loss": 0.4402, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.4518518518518523e-05, |
| "loss": 0.389, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.3899, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.33, |
| "eval_loss": 0.3842224180698395, |
| "eval_runtime": 7.3669, |
| "eval_samples_per_second": 135.879, |
| "eval_steps_per_second": 34.071, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.4370370370370376e-05, |
| "loss": 0.3759, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.42962962962963e-05, |
| "loss": 0.3662, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.34, |
| "eval_loss": 0.3829396665096283, |
| "eval_runtime": 7.3321, |
| "eval_samples_per_second": 136.523, |
| "eval_steps_per_second": 34.233, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.422222222222222e-05, |
| "loss": 0.4342, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.414814814814815e-05, |
| "loss": 0.4007, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.4074074074074076e-05, |
| "loss": 0.3931, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_loss": 0.37909042835235596, |
| "eval_runtime": 7.3121, |
| "eval_samples_per_second": 136.896, |
| "eval_steps_per_second": 34.327, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.3937, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.392592592592593e-05, |
| "loss": 0.3655, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.37, |
| "eval_loss": 0.37680765986442566, |
| "eval_runtime": 7.2101, |
| "eval_samples_per_second": 138.832, |
| "eval_steps_per_second": 34.812, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.385185185185185e-05, |
| "loss": 0.3454, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.377777777777778e-05, |
| "loss": 0.3279, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.3703703703703705e-05, |
| "loss": 0.4125, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_loss": 0.3775666058063507, |
| "eval_runtime": 7.2081, |
| "eval_samples_per_second": 138.872, |
| "eval_steps_per_second": 34.822, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.3629629629629635e-05, |
| "loss": 0.3324, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.355555555555556e-05, |
| "loss": 0.3848, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.39, |
| "eval_loss": 0.373475044965744, |
| "eval_runtime": 7.1966, |
| "eval_samples_per_second": 139.093, |
| "eval_steps_per_second": 34.877, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.348148148148148e-05, |
| "loss": 0.5328, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.340740740740741e-05, |
| "loss": 0.3791, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.3333333333333334e-05, |
| "loss": 0.3907, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4, |
| "eval_loss": 0.3697744607925415, |
| "eval_runtime": 7.2557, |
| "eval_samples_per_second": 137.961, |
| "eval_steps_per_second": 34.594, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.325925925925926e-05, |
| "loss": 0.3595, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.318518518518519e-05, |
| "loss": 0.4298, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_loss": 0.3690047562122345, |
| "eval_runtime": 7.1998, |
| "eval_samples_per_second": 139.032, |
| "eval_steps_per_second": 34.862, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.311111111111111e-05, |
| "loss": 0.368, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.303703703703704e-05, |
| "loss": 0.384, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.296296296296296e-05, |
| "loss": 0.4369, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_loss": 0.3619600534439087, |
| "eval_runtime": 7.2818, |
| "eval_samples_per_second": 137.467, |
| "eval_steps_per_second": 34.47, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.2888888888888886e-05, |
| "loss": 0.3569, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.2814814814814816e-05, |
| "loss": 0.3332, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.43, |
| "eval_loss": 0.3631249666213989, |
| "eval_runtime": 7.2547, |
| "eval_samples_per_second": 137.98, |
| "eval_steps_per_second": 34.598, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.274074074074074e-05, |
| "loss": 0.3758, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.266666666666667e-05, |
| "loss": 0.385, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.259259259259259e-05, |
| "loss": 0.4328, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.44, |
| "eval_loss": 0.35931944847106934, |
| "eval_runtime": 7.3342, |
| "eval_samples_per_second": 136.484, |
| "eval_steps_per_second": 34.223, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.2518518518518515e-05, |
| "loss": 0.4093, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.2444444444444445e-05, |
| "loss": 0.3752, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_loss": 0.3549325466156006, |
| "eval_runtime": 7.1796, |
| "eval_samples_per_second": 139.424, |
| "eval_steps_per_second": 34.96, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.237037037037037e-05, |
| "loss": 0.3473, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.22962962962963e-05, |
| "loss": 0.3823, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.222222222222222e-05, |
| "loss": 0.4004, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_loss": 0.3550175428390503, |
| "eval_runtime": 7.3, |
| "eval_samples_per_second": 137.122, |
| "eval_steps_per_second": 34.383, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.2148148148148145e-05, |
| "loss": 0.3484, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.2074074074074075e-05, |
| "loss": 0.3552, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.48, |
| "eval_loss": 0.3549079895019531, |
| "eval_runtime": 7.6065, |
| "eval_samples_per_second": 131.599, |
| "eval_steps_per_second": 32.998, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.2e-05, |
| "loss": 0.3847, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.192592592592593e-05, |
| "loss": 0.3861, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.185185185185185e-05, |
| "loss": 0.3719, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.49, |
| "eval_loss": 0.3503241539001465, |
| "eval_runtime": 7.7017, |
| "eval_samples_per_second": 129.971, |
| "eval_steps_per_second": 32.59, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.177777777777778e-05, |
| "loss": 0.3661, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.1703703703703704e-05, |
| "loss": 0.3762, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_loss": 0.3482719659805298, |
| "eval_runtime": 7.5987, |
| "eval_samples_per_second": 131.732, |
| "eval_steps_per_second": 33.032, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.162962962962963e-05, |
| "loss": 0.3803, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.155555555555556e-05, |
| "loss": 0.2975, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.148148148148148e-05, |
| "loss": 0.3397, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.51, |
| "eval_loss": 0.34838753938674927, |
| "eval_runtime": 7.5913, |
| "eval_samples_per_second": 131.861, |
| "eval_steps_per_second": 33.064, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.140740740740741e-05, |
| "loss": 0.3218, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.133333333333333e-05, |
| "loss": 0.345, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_loss": 0.3447699248790741, |
| "eval_runtime": 7.6124, |
| "eval_samples_per_second": 131.496, |
| "eval_steps_per_second": 32.973, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.1259259259259256e-05, |
| "loss": 0.3635, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.1185185185185186e-05, |
| "loss": 0.3337, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.111111111111111e-05, |
| "loss": 0.3892, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.53, |
| "eval_loss": 0.3436849117279053, |
| "eval_runtime": 7.6409, |
| "eval_samples_per_second": 131.006, |
| "eval_steps_per_second": 32.85, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.103703703703704e-05, |
| "loss": 0.3792, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.096296296296296e-05, |
| "loss": 0.3062, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.54, |
| "eval_loss": 0.34092003107070923, |
| "eval_runtime": 7.6331, |
| "eval_samples_per_second": 131.14, |
| "eval_steps_per_second": 32.883, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.088888888888889e-05, |
| "loss": 0.3764, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.0814814814814815e-05, |
| "loss": 0.3541, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.074074074074074e-05, |
| "loss": 0.3728, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.56, |
| "eval_loss": 0.3405691683292389, |
| "eval_runtime": 7.5692, |
| "eval_samples_per_second": 132.247, |
| "eval_steps_per_second": 33.161, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.066666666666667e-05, |
| "loss": 0.3013, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.059259259259259e-05, |
| "loss": 0.321, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_loss": 0.3399699926376343, |
| "eval_runtime": 7.588, |
| "eval_samples_per_second": 131.919, |
| "eval_steps_per_second": 33.079, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.051851851851852e-05, |
| "loss": 0.3404, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.0444444444444444e-05, |
| "loss": 0.381, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.0370370370370374e-05, |
| "loss": 0.314, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.58, |
| "eval_loss": 0.3377434313297272, |
| "eval_runtime": 7.5595, |
| "eval_samples_per_second": 132.417, |
| "eval_steps_per_second": 33.203, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.02962962962963e-05, |
| "loss": 0.3016, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.022222222222222e-05, |
| "loss": 0.4416, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.59, |
| "eval_loss": 0.33633822202682495, |
| "eval_runtime": 7.5093, |
| "eval_samples_per_second": 133.302, |
| "eval_steps_per_second": 33.425, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.014814814814815e-05, |
| "loss": 0.348, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.007407407407407e-05, |
| "loss": 0.3685, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4e-05, |
| "loss": 0.362, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6, |
| "eval_loss": 0.33278197050094604, |
| "eval_runtime": 7.5483, |
| "eval_samples_per_second": 132.612, |
| "eval_steps_per_second": 33.252, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 3.9925925925925926e-05, |
| "loss": 0.3961, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.985185185185185e-05, |
| "loss": 0.2903, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.61, |
| "eval_loss": 0.33177217841148376, |
| "eval_runtime": 7.574, |
| "eval_samples_per_second": 132.162, |
| "eval_steps_per_second": 33.14, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.977777777777778e-05, |
| "loss": 0.3527, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.97037037037037e-05, |
| "loss": 0.3297, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 3.962962962962963e-05, |
| "loss": 0.3362, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_loss": 0.3336848020553589, |
| "eval_runtime": 7.5409, |
| "eval_samples_per_second": 132.743, |
| "eval_steps_per_second": 33.285, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.9555555555555556e-05, |
| "loss": 0.3714, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 3.9481481481481485e-05, |
| "loss": 0.3278, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.63, |
| "eval_loss": 0.32720068097114563, |
| "eval_runtime": 7.5879, |
| "eval_samples_per_second": 131.921, |
| "eval_steps_per_second": 33.079, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.940740740740741e-05, |
| "loss": 0.3328, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.933333333333333e-05, |
| "loss": 0.2908, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.925925925925926e-05, |
| "loss": 0.358, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_loss": 0.3258882761001587, |
| "eval_runtime": 7.6673, |
| "eval_samples_per_second": 130.554, |
| "eval_steps_per_second": 32.736, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.9185185185185185e-05, |
| "loss": 0.4164, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.9111111111111115e-05, |
| "loss": 0.3103, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.66, |
| "eval_loss": 0.3261792063713074, |
| "eval_runtime": 7.6042, |
| "eval_samples_per_second": 131.637, |
| "eval_steps_per_second": 33.008, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.903703703703704e-05, |
| "loss": 0.4174, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.896296296296296e-05, |
| "loss": 0.3545, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.3297, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_loss": 0.32423877716064453, |
| "eval_runtime": 7.6588, |
| "eval_samples_per_second": 130.7, |
| "eval_steps_per_second": 32.773, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 3.8814814814814814e-05, |
| "loss": 0.3856, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.8740740740740744e-05, |
| "loss": 0.3598, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.68, |
| "eval_loss": 0.3228550851345062, |
| "eval_runtime": 7.6093, |
| "eval_samples_per_second": 131.55, |
| "eval_steps_per_second": 32.986, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.866666666666667e-05, |
| "loss": 0.3218, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 3.85925925925926e-05, |
| "loss": 0.3577, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 3.851851851851852e-05, |
| "loss": 0.3343, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.69, |
| "eval_loss": 0.32196611166000366, |
| "eval_runtime": 7.6896, |
| "eval_samples_per_second": 130.175, |
| "eval_steps_per_second": 32.641, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 3.844444444444444e-05, |
| "loss": 0.2857, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 3.837037037037037e-05, |
| "loss": 0.3324, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7, |
| "eval_loss": 0.32020455598831177, |
| "eval_runtime": 7.512, |
| "eval_samples_per_second": 133.254, |
| "eval_steps_per_second": 33.413, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 3.8296296296296296e-05, |
| "loss": 0.3791, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.8222222222222226e-05, |
| "loss": 0.3563, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.814814814814815e-05, |
| "loss": 0.3744, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.71, |
| "eval_loss": 0.31763964891433716, |
| "eval_runtime": 7.5475, |
| "eval_samples_per_second": 132.626, |
| "eval_steps_per_second": 33.256, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.807407407407408e-05, |
| "loss": 0.3689, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.8e-05, |
| "loss": 0.3385, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_loss": 0.3172718584537506, |
| "eval_runtime": 7.4165, |
| "eval_samples_per_second": 134.969, |
| "eval_steps_per_second": 33.843, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.7925925925925925e-05, |
| "loss": 0.3142, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.7851851851851855e-05, |
| "loss": 0.369, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.777777777777778e-05, |
| "loss": 0.3264, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_loss": 0.31627902388572693, |
| "eval_runtime": 7.4047, |
| "eval_samples_per_second": 135.184, |
| "eval_steps_per_second": 33.897, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.770370370370371e-05, |
| "loss": 0.3481, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.762962962962963e-05, |
| "loss": 0.3162, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.74, |
| "eval_loss": 0.31443050503730774, |
| "eval_runtime": 7.2703, |
| "eval_samples_per_second": 137.683, |
| "eval_steps_per_second": 34.524, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.7555555555555554e-05, |
| "loss": 0.3458, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.7481481481481484e-05, |
| "loss": 0.3641, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.740740740740741e-05, |
| "loss": 0.3399, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.76, |
| "eval_loss": 0.3139602839946747, |
| "eval_runtime": 7.5549, |
| "eval_samples_per_second": 132.496, |
| "eval_steps_per_second": 33.223, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.733333333333334e-05, |
| "loss": 0.3082, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.725925925925926e-05, |
| "loss": 0.3544, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_loss": 0.3116389811038971, |
| "eval_runtime": 7.3628, |
| "eval_samples_per_second": 135.955, |
| "eval_steps_per_second": 34.091, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.718518518518519e-05, |
| "loss": 0.3356, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 3.7111111111111113e-05, |
| "loss": 0.383, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.7037037037037037e-05, |
| "loss": 0.3839, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_loss": 0.3124999403953552, |
| "eval_runtime": 7.3086, |
| "eval_samples_per_second": 136.962, |
| "eval_steps_per_second": 34.343, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.6962962962962966e-05, |
| "loss": 0.3378, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.688888888888889e-05, |
| "loss": 0.3034, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.79, |
| "eval_loss": 0.31177276372909546, |
| "eval_runtime": 7.2443, |
| "eval_samples_per_second": 138.177, |
| "eval_steps_per_second": 34.648, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.681481481481482e-05, |
| "loss": 0.3384, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.674074074074074e-05, |
| "loss": 0.3401, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.6666666666666666e-05, |
| "loss": 0.2989, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8, |
| "eval_loss": 0.3089355528354645, |
| "eval_runtime": 7.1728, |
| "eval_samples_per_second": 139.556, |
| "eval_steps_per_second": 34.993, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 3.6592592592592596e-05, |
| "loss": 0.3398, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.651851851851852e-05, |
| "loss": 0.3055, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.81, |
| "eval_loss": 0.30923065543174744, |
| "eval_runtime": 7.208, |
| "eval_samples_per_second": 138.874, |
| "eval_steps_per_second": 34.822, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.644444444444445e-05, |
| "loss": 0.3088, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 3.637037037037037e-05, |
| "loss": 0.3016, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 3.62962962962963e-05, |
| "loss": 0.3143, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.82, |
| "eval_loss": 0.3072822690010071, |
| "eval_runtime": 7.2053, |
| "eval_samples_per_second": 138.926, |
| "eval_steps_per_second": 34.836, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 3.6222222222222225e-05, |
| "loss": 0.2985, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 3.614814814814815e-05, |
| "loss": 0.3805, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_loss": 0.3062894940376282, |
| "eval_runtime": 7.2009, |
| "eval_samples_per_second": 139.01, |
| "eval_steps_per_second": 34.857, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.607407407407408e-05, |
| "loss": 0.2831, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.6e-05, |
| "loss": 0.3258, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.592592592592593e-05, |
| "loss": 0.2625, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.84, |
| "eval_loss": 0.30678972601890564, |
| "eval_runtime": 7.1985, |
| "eval_samples_per_second": 139.057, |
| "eval_steps_per_second": 34.869, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.5851851851851854e-05, |
| "loss": 0.2923, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.577777777777778e-05, |
| "loss": 0.3276, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.86, |
| "eval_loss": 0.30508390069007874, |
| "eval_runtime": 7.2926, |
| "eval_samples_per_second": 137.262, |
| "eval_steps_per_second": 34.418, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.570370370370371e-05, |
| "loss": 0.3521, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.562962962962963e-05, |
| "loss": 0.283, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.555555555555556e-05, |
| "loss": 0.3364, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.87, |
| "eval_loss": 0.3016437888145447, |
| "eval_runtime": 7.3704, |
| "eval_samples_per_second": 135.814, |
| "eval_steps_per_second": 34.055, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.548148148148148e-05, |
| "loss": 0.2882, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.540740740740741e-05, |
| "loss": 0.3353, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_loss": 0.30082637071609497, |
| "eval_runtime": 7.6161, |
| "eval_samples_per_second": 131.432, |
| "eval_steps_per_second": 32.957, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.5333333333333336e-05, |
| "loss": 0.2675, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.525925925925926e-05, |
| "loss": 0.3182, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.518518518518519e-05, |
| "loss": 0.2932, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.89, |
| "eval_loss": 0.2999679744243622, |
| "eval_runtime": 7.5872, |
| "eval_samples_per_second": 131.933, |
| "eval_steps_per_second": 33.082, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.511111111111111e-05, |
| "loss": 0.3029, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.503703703703704e-05, |
| "loss": 0.2434, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9, |
| "eval_loss": 0.2994518280029297, |
| "eval_runtime": 7.5416, |
| "eval_samples_per_second": 132.731, |
| "eval_steps_per_second": 33.282, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.4962962962962965e-05, |
| "loss": 0.3236, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.4888888888888895e-05, |
| "loss": 0.2967, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.481481481481482e-05, |
| "loss": 0.3572, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.91, |
| "eval_loss": 0.29705023765563965, |
| "eval_runtime": 7.5563, |
| "eval_samples_per_second": 132.473, |
| "eval_steps_per_second": 33.218, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.474074074074074e-05, |
| "loss": 0.3384, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.466666666666667e-05, |
| "loss": 0.2816, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.92, |
| "eval_loss": 0.29650408029556274, |
| "eval_runtime": 7.3924, |
| "eval_samples_per_second": 135.409, |
| "eval_steps_per_second": 33.954, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.4592592592592594e-05, |
| "loss": 0.3361, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.4518518518518524e-05, |
| "loss": 0.3125, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.444444444444445e-05, |
| "loss": 0.3801, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_loss": 0.2950206696987152, |
| "eval_runtime": 7.3773, |
| "eval_samples_per_second": 135.687, |
| "eval_steps_per_second": 34.023, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.437037037037037e-05, |
| "loss": 0.2684, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.42962962962963e-05, |
| "loss": 0.3387, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.94, |
| "eval_loss": 0.29483914375305176, |
| "eval_runtime": 7.5911, |
| "eval_samples_per_second": 131.865, |
| "eval_steps_per_second": 33.065, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.4222222222222224e-05, |
| "loss": 0.3394, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.4148148148148153e-05, |
| "loss": 0.2852, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.4074074074074077e-05, |
| "loss": 0.2903, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_loss": 0.2979108989238739, |
| "eval_runtime": 7.666, |
| "eval_samples_per_second": 130.576, |
| "eval_steps_per_second": 32.742, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.3144, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.392592592592593e-05, |
| "loss": 0.3238, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.97, |
| "eval_loss": 0.2956538200378418, |
| "eval_runtime": 7.6479, |
| "eval_samples_per_second": 130.886, |
| "eval_steps_per_second": 32.82, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.385185185185185e-05, |
| "loss": 0.2848, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.377777777777778e-05, |
| "loss": 0.2742, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.3703703703703706e-05, |
| "loss": 0.3392, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_loss": 0.29324308037757874, |
| "eval_runtime": 7.6322, |
| "eval_samples_per_second": 131.155, |
| "eval_steps_per_second": 32.887, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.3629629629629636e-05, |
| "loss": 0.2332, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.355555555555556e-05, |
| "loss": 0.2754, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_loss": 0.29224133491516113, |
| "eval_runtime": 7.5764, |
| "eval_samples_per_second": 132.12, |
| "eval_steps_per_second": 33.129, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.348148148148148e-05, |
| "loss": 0.2778, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.340740740740741e-05, |
| "loss": 0.2646, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.3542, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.2917466461658478, |
| "eval_runtime": 7.5812, |
| "eval_samples_per_second": 132.038, |
| "eval_steps_per_second": 33.108, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.3259259259259265e-05, |
| "loss": 0.3091, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.318518518518519e-05, |
| "loss": 0.314, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.01, |
| "eval_loss": 0.29272007942199707, |
| "eval_runtime": 7.5219, |
| "eval_samples_per_second": 133.079, |
| "eval_steps_per_second": 33.369, |
| "step": 2275 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.311111111111112e-05, |
| "loss": 0.269, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.303703703703704e-05, |
| "loss": 0.2504, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.2962962962962964e-05, |
| "loss": 0.3201, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.02, |
| "eval_loss": 0.29086679220199585, |
| "eval_runtime": 7.5125, |
| "eval_samples_per_second": 133.245, |
| "eval_steps_per_second": 33.411, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.2888888888888894e-05, |
| "loss": 0.2967, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.281481481481482e-05, |
| "loss": 0.2799, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_loss": 0.28955137729644775, |
| "eval_runtime": 7.3165, |
| "eval_samples_per_second": 136.814, |
| "eval_steps_per_second": 34.306, |
| "step": 2325 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.274074074074075e-05, |
| "loss": 0.2535, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.266666666666667e-05, |
| "loss": 0.2763, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.25925925925926e-05, |
| "loss": 0.2356, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_loss": 0.2900914251804352, |
| "eval_runtime": 7.3036, |
| "eval_samples_per_second": 137.055, |
| "eval_steps_per_second": 34.366, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.251851851851852e-05, |
| "loss": 0.3099, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.2444444444444446e-05, |
| "loss": 0.271, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.06, |
| "eval_loss": 0.28874123096466064, |
| "eval_runtime": 7.2862, |
| "eval_samples_per_second": 137.382, |
| "eval_steps_per_second": 34.448, |
| "step": 2375 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.2370370370370376e-05, |
| "loss": 0.32, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.22962962962963e-05, |
| "loss": 0.28, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.222222222222223e-05, |
| "loss": 0.2246, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.07, |
| "eval_loss": 0.2897484004497528, |
| "eval_runtime": 7.2105, |
| "eval_samples_per_second": 138.825, |
| "eval_steps_per_second": 34.81, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.214814814814815e-05, |
| "loss": 0.3076, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.2074074074074075e-05, |
| "loss": 0.2824, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_loss": 0.2881883680820465, |
| "eval_runtime": 7.5403, |
| "eval_samples_per_second": 132.752, |
| "eval_steps_per_second": 33.288, |
| "step": 2425 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.3322, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.192592592592593e-05, |
| "loss": 0.2299, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 3.185185185185185e-05, |
| "loss": 0.2516, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_loss": 0.287548303604126, |
| "eval_runtime": 7.3579, |
| "eval_samples_per_second": 136.045, |
| "eval_steps_per_second": 34.113, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 3.177777777777778e-05, |
| "loss": 0.2652, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.1703703703703705e-05, |
| "loss": 0.2465, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.1, |
| "eval_loss": 0.2856321632862091, |
| "eval_runtime": 7.7527, |
| "eval_samples_per_second": 129.116, |
| "eval_steps_per_second": 32.376, |
| "step": 2475 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.1629629629629634e-05, |
| "loss": 0.301, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.155555555555556e-05, |
| "loss": 0.1987, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.148148148148148e-05, |
| "loss": 0.3417, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.11, |
| "eval_loss": 0.2860187292098999, |
| "eval_runtime": 7.5819, |
| "eval_samples_per_second": 132.024, |
| "eval_steps_per_second": 33.105, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.140740740740741e-05, |
| "loss": 0.2464, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.1333333333333334e-05, |
| "loss": 0.2418, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.12, |
| "eval_loss": 0.28458064794540405, |
| "eval_runtime": 7.684, |
| "eval_samples_per_second": 130.271, |
| "eval_steps_per_second": 32.665, |
| "step": 2525 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.1259259259259264e-05, |
| "loss": 0.2884, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.118518518518519e-05, |
| "loss": 0.2396, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.111111111111111e-05, |
| "loss": 0.2625, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.13, |
| "eval_loss": 0.28442564606666565, |
| "eval_runtime": 7.6251, |
| "eval_samples_per_second": 131.277, |
| "eval_steps_per_second": 32.918, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.103703703703704e-05, |
| "loss": 0.2286, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.096296296296296e-05, |
| "loss": 0.3023, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_loss": 0.28357136249542236, |
| "eval_runtime": 7.6709, |
| "eval_samples_per_second": 130.493, |
| "eval_steps_per_second": 32.721, |
| "step": 2575 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 3.088888888888889e-05, |
| "loss": 0.2631, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 3.0814814814814816e-05, |
| "loss": 0.2656, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.074074074074074e-05, |
| "loss": 0.2301, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.16, |
| "eval_loss": 0.2838016450405121, |
| "eval_runtime": 7.4972, |
| "eval_samples_per_second": 133.516, |
| "eval_steps_per_second": 33.479, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.066666666666667e-05, |
| "loss": 0.2708, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.059259259259259e-05, |
| "loss": 0.2638, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.17, |
| "eval_loss": 0.2819526791572571, |
| "eval_runtime": 7.5414, |
| "eval_samples_per_second": 132.734, |
| "eval_steps_per_second": 33.283, |
| "step": 2625 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.0518518518518515e-05, |
| "loss": 0.2786, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.044444444444445e-05, |
| "loss": 0.3328, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.037037037037037e-05, |
| "loss": 0.2835, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.18, |
| "eval_loss": 0.2829004228115082, |
| "eval_runtime": 7.3311, |
| "eval_samples_per_second": 136.541, |
| "eval_steps_per_second": 34.238, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.02962962962963e-05, |
| "loss": 0.3523, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.0222222222222225e-05, |
| "loss": 0.2512, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_loss": 0.28273966908454895, |
| "eval_runtime": 7.3473, |
| "eval_samples_per_second": 136.24, |
| "eval_steps_per_second": 34.162, |
| "step": 2675 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.0148148148148148e-05, |
| "loss": 0.2382, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 3.0074074074074078e-05, |
| "loss": 0.2668, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 3e-05, |
| "loss": 0.2472, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2, |
| "eval_loss": 0.28208473324775696, |
| "eval_runtime": 7.2579, |
| "eval_samples_per_second": 137.92, |
| "eval_steps_per_second": 34.583, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 2.992592592592593e-05, |
| "loss": 0.2332, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 2.9851851851851854e-05, |
| "loss": 0.266, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.21, |
| "eval_loss": 0.281426340341568, |
| "eval_runtime": 7.2593, |
| "eval_samples_per_second": 137.892, |
| "eval_steps_per_second": 34.576, |
| "step": 2725 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 2.9777777777777777e-05, |
| "loss": 0.2908, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.9703703703703707e-05, |
| "loss": 0.2299, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 2.962962962962963e-05, |
| "loss": 0.2284, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.22, |
| "eval_loss": 0.2816203832626343, |
| "eval_runtime": 7.2174, |
| "eval_samples_per_second": 138.694, |
| "eval_steps_per_second": 34.777, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2.955555555555556e-05, |
| "loss": 0.2821, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2.9481481481481483e-05, |
| "loss": 0.3079, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.23, |
| "eval_loss": 0.2785053551197052, |
| "eval_runtime": 7.3, |
| "eval_samples_per_second": 137.124, |
| "eval_steps_per_second": 34.384, |
| "step": 2775 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 2.9407407407407413e-05, |
| "loss": 0.1826, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 2.9333333333333336e-05, |
| "loss": 0.2438, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 2.925925925925926e-05, |
| "loss": 0.2014, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.24, |
| "eval_loss": 0.2803582549095154, |
| "eval_runtime": 7.196, |
| "eval_samples_per_second": 139.106, |
| "eval_steps_per_second": 34.881, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.918518518518519e-05, |
| "loss": 0.2979, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.9111111111111112e-05, |
| "loss": 0.2223, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.26, |
| "eval_loss": 0.27921053767204285, |
| "eval_runtime": 7.2996, |
| "eval_samples_per_second": 137.132, |
| "eval_steps_per_second": 34.386, |
| "step": 2825 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 2.9037037037037042e-05, |
| "loss": 0.2515, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 2.8962962962962965e-05, |
| "loss": 0.2771, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 2.8888888888888888e-05, |
| "loss": 0.2611, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.27, |
| "eval_loss": 0.2792360186576843, |
| "eval_runtime": 7.1956, |
| "eval_samples_per_second": 139.112, |
| "eval_steps_per_second": 34.882, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 2.8814814814814818e-05, |
| "loss": 0.2867, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.874074074074074e-05, |
| "loss": 0.2492, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.28, |
| "eval_loss": 0.2788338363170624, |
| "eval_runtime": 7.2868, |
| "eval_samples_per_second": 137.371, |
| "eval_steps_per_second": 34.446, |
| "step": 2875 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.8666666666666668e-05, |
| "loss": 0.26, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.8592592592592594e-05, |
| "loss": 0.3693, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2.851851851851852e-05, |
| "loss": 0.2686, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_loss": 0.27813464403152466, |
| "eval_runtime": 7.1901, |
| "eval_samples_per_second": 139.219, |
| "eval_steps_per_second": 34.909, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 2.8444444444444447e-05, |
| "loss": 0.2091, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 2.837037037037037e-05, |
| "loss": 0.2701, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_loss": 0.27804136276245117, |
| "eval_runtime": 7.2533, |
| "eval_samples_per_second": 138.006, |
| "eval_steps_per_second": 34.605, |
| "step": 2925 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 2.8296296296296297e-05, |
| "loss": 0.2795, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 2.8222222222222223e-05, |
| "loss": 0.3346, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 2.814814814814815e-05, |
| "loss": 0.2254, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.31, |
| "eval_loss": 0.27507713437080383, |
| "eval_runtime": 7.2159, |
| "eval_samples_per_second": 138.721, |
| "eval_steps_per_second": 34.784, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2.8074074074074076e-05, |
| "loss": 0.2507, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.2942, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.32, |
| "eval_loss": 0.2757761776447296, |
| "eval_runtime": 7.2423, |
| "eval_samples_per_second": 138.216, |
| "eval_steps_per_second": 34.658, |
| "step": 2975 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 2.7925925925925926e-05, |
| "loss": 0.2678, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2.7851851851851853e-05, |
| "loss": 0.2459, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 0.2446, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.33, |
| "eval_loss": 0.2748652994632721, |
| "eval_runtime": 7.2844, |
| "eval_samples_per_second": 137.416, |
| "eval_steps_per_second": 34.457, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2.7703703703703706e-05, |
| "loss": 0.2278, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 2.7629629629629632e-05, |
| "loss": 0.2337, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_loss": 0.2740112841129303, |
| "eval_runtime": 7.3832, |
| "eval_samples_per_second": 135.578, |
| "eval_steps_per_second": 33.996, |
| "step": 3025 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.7555555555555555e-05, |
| "loss": 0.2335, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.7481481481481482e-05, |
| "loss": 0.2023, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2.7407407407407408e-05, |
| "loss": 0.2166, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.36, |
| "eval_loss": 0.27378755807876587, |
| "eval_runtime": 7.4184, |
| "eval_samples_per_second": 134.934, |
| "eval_steps_per_second": 33.835, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2.733333333333333e-05, |
| "loss": 0.2161, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 2.725925925925926e-05, |
| "loss": 0.3003, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.37, |
| "eval_loss": 0.2731979191303253, |
| "eval_runtime": 13.3786, |
| "eval_samples_per_second": 74.821, |
| "eval_steps_per_second": 18.761, |
| "step": 3075 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 2.7185185185185184e-05, |
| "loss": 0.2498, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 2.7111111111111114e-05, |
| "loss": 0.2749, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2.7037037037037037e-05, |
| "loss": 0.2422, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.38, |
| "eval_loss": 0.27196845412254333, |
| "eval_runtime": 7.5816, |
| "eval_samples_per_second": 132.03, |
| "eval_steps_per_second": 33.107, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2.696296296296296e-05, |
| "loss": 0.2468, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2.688888888888889e-05, |
| "loss": 0.263, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_loss": 0.2727610766887665, |
| "eval_runtime": 7.6161, |
| "eval_samples_per_second": 131.432, |
| "eval_steps_per_second": 32.956, |
| "step": 3125 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2.6814814814814814e-05, |
| "loss": 0.2436, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.6740740740740743e-05, |
| "loss": 0.27, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.6666666666666667e-05, |
| "loss": 0.2462, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_loss": 0.2711770832538605, |
| "eval_runtime": 7.4596, |
| "eval_samples_per_second": 134.19, |
| "eval_steps_per_second": 33.648, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.659259259259259e-05, |
| "loss": 0.3066, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.651851851851852e-05, |
| "loss": 0.2262, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.41, |
| "eval_loss": 0.27057451009750366, |
| "eval_runtime": 7.4675, |
| "eval_samples_per_second": 134.047, |
| "eval_steps_per_second": 33.612, |
| "step": 3175 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.6444444444444443e-05, |
| "loss": 0.2187, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2.6370370370370373e-05, |
| "loss": 0.2992, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2.6296296296296296e-05, |
| "loss": 0.3181, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_loss": 0.27069294452667236, |
| "eval_runtime": 7.2871, |
| "eval_samples_per_second": 137.367, |
| "eval_steps_per_second": 34.445, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.6222222222222226e-05, |
| "loss": 0.275, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.614814814814815e-05, |
| "loss": 0.2337, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.43, |
| "eval_loss": 0.2696068286895752, |
| "eval_runtime": 7.3083, |
| "eval_samples_per_second": 136.967, |
| "eval_steps_per_second": 34.344, |
| "step": 3225 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.6074074074074072e-05, |
| "loss": 0.2596, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.6000000000000002e-05, |
| "loss": 0.2511, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.5925925925925925e-05, |
| "loss": 0.2223, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.44, |
| "eval_loss": 0.26948779821395874, |
| "eval_runtime": 7.2594, |
| "eval_samples_per_second": 137.89, |
| "eval_steps_per_second": 34.576, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.5851851851851855e-05, |
| "loss": 0.266, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.5777777777777778e-05, |
| "loss": 0.2594, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.46, |
| "eval_loss": 0.2680164873600006, |
| "eval_runtime": 7.3879, |
| "eval_samples_per_second": 135.492, |
| "eval_steps_per_second": 33.975, |
| "step": 3275 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.5703703703703708e-05, |
| "loss": 0.2623, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.562962962962963e-05, |
| "loss": 0.2028, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5555555555555554e-05, |
| "loss": 0.2226, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.47, |
| "eval_loss": 0.27027028799057007, |
| "eval_runtime": 7.2794, |
| "eval_samples_per_second": 137.512, |
| "eval_steps_per_second": 34.481, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5481481481481484e-05, |
| "loss": 0.211, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.5407407407407407e-05, |
| "loss": 0.2037, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.48, |
| "eval_loss": 0.2683195471763611, |
| "eval_runtime": 7.3164, |
| "eval_samples_per_second": 136.815, |
| "eval_steps_per_second": 34.306, |
| "step": 3325 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.5333333333333337e-05, |
| "loss": 0.1988, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.525925925925926e-05, |
| "loss": 0.2262, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.5185185185185183e-05, |
| "loss": 0.2538, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.49, |
| "eval_loss": 0.26811686158180237, |
| "eval_runtime": 7.2097, |
| "eval_samples_per_second": 138.841, |
| "eval_steps_per_second": 34.814, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.5111111111111113e-05, |
| "loss": 0.2029, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.5037037037037036e-05, |
| "loss": 0.2094, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_loss": 0.269042432308197, |
| "eval_runtime": 7.2591, |
| "eval_samples_per_second": 137.896, |
| "eval_steps_per_second": 34.577, |
| "step": 3375 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.4962962962962963e-05, |
| "loss": 0.195, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2.488888888888889e-05, |
| "loss": 0.2639, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2.4814814814814816e-05, |
| "loss": 0.2591, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_loss": 0.26725488901138306, |
| "eval_runtime": 7.2092, |
| "eval_samples_per_second": 138.851, |
| "eval_steps_per_second": 34.817, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.4740740740740742e-05, |
| "loss": 0.2043, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.466666666666667e-05, |
| "loss": 0.2274, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.52, |
| "eval_loss": 0.2678578794002533, |
| "eval_runtime": 7.2753, |
| "eval_samples_per_second": 137.588, |
| "eval_steps_per_second": 34.5, |
| "step": 3425 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.4592592592592595e-05, |
| "loss": 0.2352, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.451851851851852e-05, |
| "loss": 0.2365, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.4444444444444445e-05, |
| "loss": 0.2526, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_loss": 0.266437292098999, |
| "eval_runtime": 7.2076, |
| "eval_samples_per_second": 138.88, |
| "eval_steps_per_second": 34.824, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.437037037037037e-05, |
| "loss": 0.1861, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.4296296296296298e-05, |
| "loss": 0.2659, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.54, |
| "eval_loss": 0.265876829624176, |
| "eval_runtime": 7.5122, |
| "eval_samples_per_second": 133.25, |
| "eval_steps_per_second": 33.412, |
| "step": 3475 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.4222222222222224e-05, |
| "loss": 0.2159, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.414814814814815e-05, |
| "loss": 0.2809, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.4074074074074074e-05, |
| "loss": 0.2144, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.56, |
| "eval_loss": 0.2642614245414734, |
| "eval_runtime": 7.468, |
| "eval_samples_per_second": 134.039, |
| "eval_steps_per_second": 33.61, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.4e-05, |
| "loss": 0.2122, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.3925925925925927e-05, |
| "loss": 0.2432, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.57, |
| "eval_loss": 0.2640276551246643, |
| "eval_runtime": 7.5832, |
| "eval_samples_per_second": 132.003, |
| "eval_steps_per_second": 33.1, |
| "step": 3525 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.3851851851851854e-05, |
| "loss": 0.2441, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.377777777777778e-05, |
| "loss": 0.2647, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.3703703703703707e-05, |
| "loss": 0.2852, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.58, |
| "eval_loss": 0.26369205117225647, |
| "eval_runtime": 7.3907, |
| "eval_samples_per_second": 135.44, |
| "eval_steps_per_second": 33.962, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.3629629629629633e-05, |
| "loss": 0.2321, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.3555555555555556e-05, |
| "loss": 0.3375, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.59, |
| "eval_loss": 0.2633427381515503, |
| "eval_runtime": 7.3881, |
| "eval_samples_per_second": 135.489, |
| "eval_steps_per_second": 33.974, |
| "step": 3575 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.3481481481481483e-05, |
| "loss": 0.2219, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.340740740740741e-05, |
| "loss": 0.2004, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3333333333333336e-05, |
| "loss": 0.272, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_loss": 0.26376578211784363, |
| "eval_runtime": 7.2236, |
| "eval_samples_per_second": 138.574, |
| "eval_steps_per_second": 34.747, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3259259259259262e-05, |
| "loss": 0.2887, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2.318518518518519e-05, |
| "loss": 0.2502, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_loss": 0.2631487250328064, |
| "eval_runtime": 7.2851, |
| "eval_samples_per_second": 137.403, |
| "eval_steps_per_second": 34.454, |
| "step": 3625 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2.3111111111111112e-05, |
| "loss": 0.206, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.303703703703704e-05, |
| "loss": 0.2368, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.2962962962962965e-05, |
| "loss": 0.2131, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.62, |
| "eval_loss": 0.2649364173412323, |
| "eval_runtime": 7.1872, |
| "eval_samples_per_second": 139.275, |
| "eval_steps_per_second": 34.923, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.288888888888889e-05, |
| "loss": 0.2137, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.2814814814814818e-05, |
| "loss": 0.2313, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.63, |
| "eval_loss": 0.2636246979236603, |
| "eval_runtime": 7.2744, |
| "eval_samples_per_second": 137.606, |
| "eval_steps_per_second": 34.505, |
| "step": 3675 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.2740740740740744e-05, |
| "loss": 0.239, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.2666666666666668e-05, |
| "loss": 0.2354, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.2592592592592594e-05, |
| "loss": 0.1919, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.64, |
| "eval_loss": 0.262579083442688, |
| "eval_runtime": 7.2981, |
| "eval_samples_per_second": 137.159, |
| "eval_steps_per_second": 34.392, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.251851851851852e-05, |
| "loss": 0.2836, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.2444444444444447e-05, |
| "loss": 0.2761, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_loss": 0.26134076714515686, |
| "eval_runtime": 7.3981, |
| "eval_samples_per_second": 135.305, |
| "eval_steps_per_second": 33.928, |
| "step": 3725 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.2370370370370374e-05, |
| "loss": 0.1932, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.2296296296296297e-05, |
| "loss": 0.2248, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 0.2151, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.67, |
| "eval_loss": 0.26156380772590637, |
| "eval_runtime": 7.2684, |
| "eval_samples_per_second": 137.72, |
| "eval_steps_per_second": 34.533, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.214814814814815e-05, |
| "loss": 0.2614, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.2074074074074076e-05, |
| "loss": 0.2725, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.68, |
| "eval_loss": 0.26058655977249146, |
| "eval_runtime": 7.2639, |
| "eval_samples_per_second": 137.804, |
| "eval_steps_per_second": 34.554, |
| "step": 3775 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.2000000000000003e-05, |
| "loss": 0.2402, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.1925925925925926e-05, |
| "loss": 0.2283, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.1851851851851852e-05, |
| "loss": 0.2552, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.69, |
| "eval_loss": 0.26024872064590454, |
| "eval_runtime": 7.191, |
| "eval_samples_per_second": 139.201, |
| "eval_steps_per_second": 34.905, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.177777777777778e-05, |
| "loss": 0.2615, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.1703703703703705e-05, |
| "loss": 0.2094, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.7, |
| "eval_loss": 0.2611374258995056, |
| "eval_runtime": 7.1864, |
| "eval_samples_per_second": 139.291, |
| "eval_steps_per_second": 34.927, |
| "step": 3825 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.162962962962963e-05, |
| "loss": 0.2267, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.1555555555555555e-05, |
| "loss": 0.2807, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.148148148148148e-05, |
| "loss": 0.2948, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_loss": 0.25876420736312866, |
| "eval_runtime": 7.2079, |
| "eval_samples_per_second": 138.875, |
| "eval_steps_per_second": 34.823, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1407407407407408e-05, |
| "loss": 0.261, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1333333333333335e-05, |
| "loss": 0.3131, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_loss": 0.25907066464424133, |
| "eval_runtime": 7.2187, |
| "eval_samples_per_second": 138.668, |
| "eval_steps_per_second": 34.771, |
| "step": 3875 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1259259259259258e-05, |
| "loss": 0.2667, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.1185185185185184e-05, |
| "loss": 0.216, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.111111111111111e-05, |
| "loss": 0.237, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.73, |
| "eval_loss": 0.2582587003707886, |
| "eval_runtime": 8.5503, |
| "eval_samples_per_second": 117.072, |
| "eval_steps_per_second": 29.356, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.1037037037037037e-05, |
| "loss": 0.3102, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.0962962962962964e-05, |
| "loss": 0.2385, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.74, |
| "eval_loss": 0.2583344876766205, |
| "eval_runtime": 7.2821, |
| "eval_samples_per_second": 137.461, |
| "eval_steps_per_second": 34.468, |
| "step": 3925 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.088888888888889e-05, |
| "loss": 0.3235, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.0814814814814813e-05, |
| "loss": 0.2154, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.074074074074074e-05, |
| "loss": 0.1842, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_loss": 0.2580818831920624, |
| "eval_runtime": 7.213, |
| "eval_samples_per_second": 138.777, |
| "eval_steps_per_second": 34.798, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.0666666666666666e-05, |
| "loss": 0.2051, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.0592592592592593e-05, |
| "loss": 0.2291, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.77, |
| "eval_loss": 0.25752562284469604, |
| "eval_runtime": 7.2125, |
| "eval_samples_per_second": 138.787, |
| "eval_steps_per_second": 34.801, |
| "step": 3975 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.051851851851852e-05, |
| "loss": 0.1973, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.0444444444444446e-05, |
| "loss": 0.2864, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.037037037037037e-05, |
| "loss": 0.2124, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.78, |
| "eval_loss": 0.2576400637626648, |
| "eval_runtime": 7.2072, |
| "eval_samples_per_second": 138.89, |
| "eval_steps_per_second": 34.827, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.0296296296296296e-05, |
| "loss": 0.3026, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.0222222222222222e-05, |
| "loss": 0.2713, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.79, |
| "eval_loss": 0.256246954202652, |
| "eval_runtime": 7.2163, |
| "eval_samples_per_second": 138.714, |
| "eval_steps_per_second": 34.782, |
| "step": 4025 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.014814814814815e-05, |
| "loss": 0.2531, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 2.0074074074074075e-05, |
| "loss": 0.2241, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 2e-05, |
| "loss": 0.2111, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.8, |
| "eval_loss": 0.25542324781417847, |
| "eval_runtime": 7.2106, |
| "eval_samples_per_second": 138.823, |
| "eval_steps_per_second": 34.81, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.9925925925925925e-05, |
| "loss": 0.2535, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.985185185185185e-05, |
| "loss": 0.2385, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_loss": 0.2562381625175476, |
| "eval_runtime": 7.1846, |
| "eval_samples_per_second": 139.325, |
| "eval_steps_per_second": 34.936, |
| "step": 4075 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 1.9777777777777778e-05, |
| "loss": 0.212, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.9703703703703704e-05, |
| "loss": 0.2094, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.962962962962963e-05, |
| "loss": 0.2499, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.82, |
| "eval_loss": 0.2557750344276428, |
| "eval_runtime": 7.2707, |
| "eval_samples_per_second": 137.676, |
| "eval_steps_per_second": 34.522, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.9555555555555557e-05, |
| "loss": 0.2507, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.948148148148148e-05, |
| "loss": 0.233, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.83, |
| "eval_loss": 0.25592681765556335, |
| "eval_runtime": 7.3242, |
| "eval_samples_per_second": 136.671, |
| "eval_steps_per_second": 34.27, |
| "step": 4125 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.9407407407407407e-05, |
| "loss": 0.2772, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.9333333333333333e-05, |
| "loss": 0.2487, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.925925925925926e-05, |
| "loss": 0.2538, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.84, |
| "eval_loss": 0.25490960478782654, |
| "eval_runtime": 7.3569, |
| "eval_samples_per_second": 136.062, |
| "eval_steps_per_second": 34.117, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.9185185185185186e-05, |
| "loss": 0.2681, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.9111111111111113e-05, |
| "loss": 0.3042, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_loss": 0.2556719183921814, |
| "eval_runtime": 7.4839, |
| "eval_samples_per_second": 133.753, |
| "eval_steps_per_second": 33.539, |
| "step": 4175 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.903703703703704e-05, |
| "loss": 0.2017, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 1.8962962962962963e-05, |
| "loss": 0.2162, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.888888888888889e-05, |
| "loss": 0.1989, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_loss": 0.25471413135528564, |
| "eval_runtime": 7.4955, |
| "eval_samples_per_second": 133.547, |
| "eval_steps_per_second": 33.487, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.8814814814814816e-05, |
| "loss": 0.2227, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.8740740740740742e-05, |
| "loss": 0.198, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.88, |
| "eval_loss": 0.2542085349559784, |
| "eval_runtime": 7.5713, |
| "eval_samples_per_second": 132.211, |
| "eval_steps_per_second": 33.152, |
| "step": 4225 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 0.2839, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 1.8592592592592595e-05, |
| "loss": 0.2969, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.8518518518518518e-05, |
| "loss": 0.2634, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.89, |
| "eval_loss": 0.25297510623931885, |
| "eval_runtime": 7.6201, |
| "eval_samples_per_second": 131.363, |
| "eval_steps_per_second": 32.939, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.8444444444444445e-05, |
| "loss": 0.2182, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.837037037037037e-05, |
| "loss": 0.2684, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.9, |
| "eval_loss": 0.2529478371143341, |
| "eval_runtime": 7.6013, |
| "eval_samples_per_second": 131.689, |
| "eval_steps_per_second": 33.021, |
| "step": 4275 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.8296296296296298e-05, |
| "loss": 0.302, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.8222222222222224e-05, |
| "loss": 0.195, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 1.814814814814815e-05, |
| "loss": 0.1844, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_loss": 0.2522367835044861, |
| "eval_runtime": 7.3231, |
| "eval_samples_per_second": 136.692, |
| "eval_steps_per_second": 34.275, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.8074074074074074e-05, |
| "loss": 0.2901, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.8e-05, |
| "loss": 0.2303, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_loss": 0.252410352230072, |
| "eval_runtime": 7.3587, |
| "eval_samples_per_second": 136.029, |
| "eval_steps_per_second": 34.109, |
| "step": 4325 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.7925925925925927e-05, |
| "loss": 0.3308, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.7851851851851853e-05, |
| "loss": 0.2441, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.777777777777778e-05, |
| "loss": 0.2126, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.93, |
| "eval_loss": 0.25146955251693726, |
| "eval_runtime": 7.3139, |
| "eval_samples_per_second": 136.863, |
| "eval_steps_per_second": 34.318, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.7703703703703706e-05, |
| "loss": 0.2338, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.762962962962963e-05, |
| "loss": 0.2278, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.94, |
| "eval_loss": 0.2519494593143463, |
| "eval_runtime": 7.2536, |
| "eval_samples_per_second": 138.0, |
| "eval_steps_per_second": 34.604, |
| "step": 4375 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.7555555555555556e-05, |
| "loss": 0.2033, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.7481481481481483e-05, |
| "loss": 0.2524, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.740740740740741e-05, |
| "loss": 0.2692, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_loss": 0.25108450651168823, |
| "eval_runtime": 7.1825, |
| "eval_samples_per_second": 139.366, |
| "eval_steps_per_second": 34.946, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.7333333333333336e-05, |
| "loss": 0.2222, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.7259259259259262e-05, |
| "loss": 0.2398, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_loss": 0.25103849172592163, |
| "eval_runtime": 7.1987, |
| "eval_samples_per_second": 139.053, |
| "eval_steps_per_second": 34.867, |
| "step": 4425 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7185185185185185e-05, |
| "loss": 0.2295, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.7111111111111112e-05, |
| "loss": 0.2153, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.7037037037037038e-05, |
| "loss": 0.2799, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.98, |
| "eval_loss": 0.25079530477523804, |
| "eval_runtime": 7.4048, |
| "eval_samples_per_second": 135.183, |
| "eval_steps_per_second": 33.897, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.6962962962962965e-05, |
| "loss": 0.2143, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.688888888888889e-05, |
| "loss": 0.2492, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.99, |
| "eval_loss": 0.25007081031799316, |
| "eval_runtime": 7.3873, |
| "eval_samples_per_second": 135.504, |
| "eval_steps_per_second": 33.977, |
| "step": 4475 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.6814814814814818e-05, |
| "loss": 0.2657, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.674074074074074e-05, |
| "loss": 0.2207, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.2202, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.24966831505298615, |
| "eval_runtime": 7.3428, |
| "eval_samples_per_second": 136.324, |
| "eval_steps_per_second": 34.183, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.6592592592592594e-05, |
| "loss": 0.2467, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.651851851851852e-05, |
| "loss": 0.1736, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.01, |
| "eval_loss": 0.25051918625831604, |
| "eval_runtime": 7.2975, |
| "eval_samples_per_second": 137.171, |
| "eval_steps_per_second": 34.395, |
| "step": 4525 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.6444444444444447e-05, |
| "loss": 0.1836, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.6370370370370374e-05, |
| "loss": 0.1811, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.62962962962963e-05, |
| "loss": 0.204, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_loss": 0.25177687406539917, |
| "eval_runtime": 7.1922, |
| "eval_samples_per_second": 139.179, |
| "eval_steps_per_second": 34.899, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.6222222222222223e-05, |
| "loss": 0.22, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.614814814814815e-05, |
| "loss": 0.1853, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.03, |
| "eval_loss": 0.25192761421203613, |
| "eval_runtime": 7.1908, |
| "eval_samples_per_second": 139.205, |
| "eval_steps_per_second": 34.906, |
| "step": 4575 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.6074074074074076e-05, |
| "loss": 0.1916, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.2385, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.5925925925925926e-05, |
| "loss": 0.1834, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.04, |
| "eval_loss": 0.25096383690834045, |
| "eval_runtime": 7.214, |
| "eval_samples_per_second": 138.757, |
| "eval_steps_per_second": 34.793, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.5851851851851852e-05, |
| "loss": 0.3046, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.577777777777778e-05, |
| "loss": 0.2285, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.06, |
| "eval_loss": 0.251299113035202, |
| "eval_runtime": 7.2235, |
| "eval_samples_per_second": 138.575, |
| "eval_steps_per_second": 34.748, |
| "step": 4625 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.5703703703703705e-05, |
| "loss": 0.213, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.5629629629629632e-05, |
| "loss": 0.1222, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.5555555555555555e-05, |
| "loss": 0.179, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.07, |
| "eval_loss": 0.25189557671546936, |
| "eval_runtime": 7.2744, |
| "eval_samples_per_second": 137.607, |
| "eval_steps_per_second": 34.505, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.548148148148148e-05, |
| "loss": 0.1749, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.5407407407407408e-05, |
| "loss": 0.2163, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.08, |
| "eval_loss": 0.25133654475212097, |
| "eval_runtime": 7.2066, |
| "eval_samples_per_second": 138.901, |
| "eval_steps_per_second": 34.829, |
| "step": 4675 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.5333333333333334e-05, |
| "loss": 0.1791, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.5259259259259258e-05, |
| "loss": 0.1868, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.5185185185185186e-05, |
| "loss": 0.2034, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.09, |
| "eval_loss": 0.251429945230484, |
| "eval_runtime": 7.2815, |
| "eval_samples_per_second": 137.471, |
| "eval_steps_per_second": 34.471, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.5111111111111112e-05, |
| "loss": 0.1877, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.5037037037037039e-05, |
| "loss": 0.2275, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.1, |
| "eval_loss": 0.25081896781921387, |
| "eval_runtime": 7.1988, |
| "eval_samples_per_second": 139.051, |
| "eval_steps_per_second": 34.867, |
| "step": 4725 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.4962962962962965e-05, |
| "loss": 0.2512, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.4888888888888888e-05, |
| "loss": 0.1883, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.4814814814814815e-05, |
| "loss": 0.2328, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_loss": 0.2509821653366089, |
| "eval_runtime": 9.4138, |
| "eval_samples_per_second": 106.334, |
| "eval_steps_per_second": 26.663, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.4740740740740741e-05, |
| "loss": 0.2146, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.4666666666666668e-05, |
| "loss": 0.1935, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.12, |
| "eval_loss": 0.2518753111362457, |
| "eval_runtime": 7.4318, |
| "eval_samples_per_second": 134.691, |
| "eval_steps_per_second": 33.774, |
| "step": 4775 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.4592592592592594e-05, |
| "loss": 0.2174, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.4518518518518521e-05, |
| "loss": 0.1887, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.4444444444444444e-05, |
| "loss": 0.1613, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.13, |
| "eval_loss": 0.251521497964859, |
| "eval_runtime": 7.26, |
| "eval_samples_per_second": 137.88, |
| "eval_steps_per_second": 34.573, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.437037037037037e-05, |
| "loss": 0.2027, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.4296296296296297e-05, |
| "loss": 0.2451, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.14, |
| "eval_loss": 0.2503267526626587, |
| "eval_runtime": 7.228, |
| "eval_samples_per_second": 138.488, |
| "eval_steps_per_second": 34.726, |
| "step": 4825 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.4222222222222224e-05, |
| "loss": 0.199, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.4148148148148148e-05, |
| "loss": 0.1797, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.4074074074074075e-05, |
| "loss": 0.199, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.16, |
| "eval_loss": 0.2507803440093994, |
| "eval_runtime": 7.2959, |
| "eval_samples_per_second": 137.2, |
| "eval_steps_per_second": 34.403, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.4000000000000001e-05, |
| "loss": 0.2138, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.3925925925925926e-05, |
| "loss": 0.2486, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_loss": 0.25041449069976807, |
| "eval_runtime": 7.2127, |
| "eval_samples_per_second": 138.783, |
| "eval_steps_per_second": 34.8, |
| "step": 4875 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.3851851851851853e-05, |
| "loss": 0.1906, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.3777777777777778e-05, |
| "loss": 0.2375, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.3703703703703704e-05, |
| "loss": 0.2124, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.18, |
| "eval_loss": 0.2496342658996582, |
| "eval_runtime": 7.2877, |
| "eval_samples_per_second": 137.354, |
| "eval_steps_per_second": 34.441, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.362962962962963e-05, |
| "loss": 0.1827, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.3555555555555557e-05, |
| "loss": 0.2032, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.19, |
| "eval_loss": 0.2495114803314209, |
| "eval_runtime": 7.1954, |
| "eval_samples_per_second": 139.117, |
| "eval_steps_per_second": 34.883, |
| "step": 4925 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.348148148148148e-05, |
| "loss": 0.133, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.3407407407407407e-05, |
| "loss": 0.2068, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.3333333333333333e-05, |
| "loss": 0.1772, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.2, |
| "eval_loss": 0.25019142031669617, |
| "eval_runtime": 7.291, |
| "eval_samples_per_second": 137.293, |
| "eval_steps_per_second": 34.426, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.325925925925926e-05, |
| "loss": 0.213, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3185185185185186e-05, |
| "loss": 0.1879, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.21, |
| "eval_loss": 0.24890127778053284, |
| "eval_runtime": 7.2295, |
| "eval_samples_per_second": 138.46, |
| "eval_steps_per_second": 34.719, |
| "step": 4975 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3111111111111113e-05, |
| "loss": 0.1829, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.3037037037037036e-05, |
| "loss": 0.211, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.2962962962962962e-05, |
| "loss": 0.1554, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.22, |
| "eval_loss": 0.24963602423667908, |
| "eval_runtime": 7.2892, |
| "eval_samples_per_second": 137.326, |
| "eval_steps_per_second": 34.434, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.2888888888888889e-05, |
| "loss": 0.1922, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.2814814814814815e-05, |
| "loss": 0.2178, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.23, |
| "eval_loss": 0.24953505396842957, |
| "eval_runtime": 7.1973, |
| "eval_samples_per_second": 139.08, |
| "eval_steps_per_second": 34.874, |
| "step": 5025 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.2740740740740742e-05, |
| "loss": 0.1909, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.2666666666666668e-05, |
| "loss": 0.2111, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.2592592592592592e-05, |
| "loss": 0.1936, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.24, |
| "eval_loss": 0.249254047870636, |
| "eval_runtime": 7.2689, |
| "eval_samples_per_second": 137.711, |
| "eval_steps_per_second": 34.531, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.2518518518518518e-05, |
| "loss": 0.1913, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.2444444444444445e-05, |
| "loss": 0.2, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.26, |
| "eval_loss": 0.2488778978586197, |
| "eval_runtime": 7.2184, |
| "eval_samples_per_second": 138.674, |
| "eval_steps_per_second": 34.772, |
| "step": 5075 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.2370370370370371e-05, |
| "loss": 0.1723, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.2296296296296298e-05, |
| "loss": 0.1875, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.2222222222222222e-05, |
| "loss": 0.185, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.27, |
| "eval_loss": 0.24892283976078033, |
| "eval_runtime": 7.3812, |
| "eval_samples_per_second": 135.615, |
| "eval_steps_per_second": 34.005, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.2148148148148149e-05, |
| "loss": 0.2168, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.2074074074074075e-05, |
| "loss": 0.1783, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.28, |
| "eval_loss": 0.2482217699289322, |
| "eval_runtime": 7.3518, |
| "eval_samples_per_second": 136.157, |
| "eval_steps_per_second": 34.141, |
| "step": 5125 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.2e-05, |
| "loss": 0.1643, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.1925925925925927e-05, |
| "loss": 0.2211, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.1851851851851853e-05, |
| "loss": 0.2276, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.29, |
| "eval_loss": 0.2477390617132187, |
| "eval_runtime": 7.5029, |
| "eval_samples_per_second": 133.414, |
| "eval_steps_per_second": 33.454, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.1777777777777778e-05, |
| "loss": 0.1951, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.1703703703703705e-05, |
| "loss": 0.2134, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_loss": 0.24737687408924103, |
| "eval_runtime": 7.4753, |
| "eval_samples_per_second": 133.907, |
| "eval_steps_per_second": 33.577, |
| "step": 5175 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.1629629629629631e-05, |
| "loss": 0.2208, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.1555555555555556e-05, |
| "loss": 0.1714, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.1481481481481482e-05, |
| "loss": 0.1747, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.31, |
| "eval_loss": 0.2470199465751648, |
| "eval_runtime": 7.6413, |
| "eval_samples_per_second": 130.999, |
| "eval_steps_per_second": 32.848, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.1407407407407409e-05, |
| "loss": 0.209, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.1333333333333334e-05, |
| "loss": 0.2121, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.32, |
| "eval_loss": 0.2458607703447342, |
| "eval_runtime": 7.515, |
| "eval_samples_per_second": 133.2, |
| "eval_steps_per_second": 33.4, |
| "step": 5225 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.125925925925926e-05, |
| "loss": 0.2266, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.1185185185185187e-05, |
| "loss": 0.1754, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 0.2199, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.33, |
| "eval_loss": 0.2461249679327011, |
| "eval_runtime": 7.5406, |
| "eval_samples_per_second": 132.748, |
| "eval_steps_per_second": 33.286, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.1037037037037038e-05, |
| "loss": 0.2609, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.0962962962962963e-05, |
| "loss": 0.2708, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.34, |
| "eval_loss": 0.24557095766067505, |
| "eval_runtime": 7.3992, |
| "eval_samples_per_second": 135.286, |
| "eval_steps_per_second": 33.923, |
| "step": 5275 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.088888888888889e-05, |
| "loss": 0.1986, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.0814814814814814e-05, |
| "loss": 0.1937, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.074074074074074e-05, |
| "loss": 0.2252, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.36, |
| "eval_loss": 0.24577473104000092, |
| "eval_runtime": 7.388, |
| "eval_samples_per_second": 135.49, |
| "eval_steps_per_second": 33.974, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.0666666666666667e-05, |
| "loss": 0.1642, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.0592592592592592e-05, |
| "loss": 0.1921, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_loss": 0.24629971385002136, |
| "eval_runtime": 7.2504, |
| "eval_samples_per_second": 138.061, |
| "eval_steps_per_second": 34.619, |
| "step": 5325 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.0518518518518519e-05, |
| "loss": 0.1795, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.0444444444444445e-05, |
| "loss": 0.2051, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.037037037037037e-05, |
| "loss": 0.1627, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.38, |
| "eval_loss": 0.24658331274986267, |
| "eval_runtime": 7.6185, |
| "eval_samples_per_second": 131.39, |
| "eval_steps_per_second": 32.946, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.0296296296296296e-05, |
| "loss": 0.1746, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.0222222222222223e-05, |
| "loss": 0.1988, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.39, |
| "eval_loss": 0.24601979553699493, |
| "eval_runtime": 7.5137, |
| "eval_samples_per_second": 133.223, |
| "eval_steps_per_second": 33.406, |
| "step": 5375 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.0148148148148148e-05, |
| "loss": 0.1994, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.0074074074074074e-05, |
| "loss": 0.191, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1e-05, |
| "loss": 0.2308, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.4, |
| "eval_loss": 0.24538284540176392, |
| "eval_runtime": 7.6411, |
| "eval_samples_per_second": 131.002, |
| "eval_steps_per_second": 32.849, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 9.925925925925926e-06, |
| "loss": 0.2331, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.851851851851852e-06, |
| "loss": 0.138, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.41, |
| "eval_loss": 0.24550172686576843, |
| "eval_runtime": 7.6397, |
| "eval_samples_per_second": 131.026, |
| "eval_steps_per_second": 32.855, |
| "step": 5425 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.777777777777779e-06, |
| "loss": 0.2266, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.703703703703703e-06, |
| "loss": 0.1693, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 9.62962962962963e-06, |
| "loss": 0.1745, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.42, |
| "eval_loss": 0.2455550730228424, |
| "eval_runtime": 7.6397, |
| "eval_samples_per_second": 131.025, |
| "eval_steps_per_second": 32.855, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.555555555555556e-06, |
| "loss": 0.1472, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.481481481481481e-06, |
| "loss": 0.1712, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_loss": 0.24605458974838257, |
| "eval_runtime": 8.1344, |
| "eval_samples_per_second": 123.058, |
| "eval_steps_per_second": 30.857, |
| "step": 5475 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.407407407407408e-06, |
| "loss": 0.1755, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 0.1984, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 9.259259259259259e-06, |
| "loss": 0.1763, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.44, |
| "eval_loss": 0.24583406746387482, |
| "eval_runtime": 10.3194, |
| "eval_samples_per_second": 97.002, |
| "eval_steps_per_second": 24.323, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.185185185185186e-06, |
| "loss": 0.2261, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 9.111111111111112e-06, |
| "loss": 0.2068, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.46, |
| "eval_loss": 0.24605007469654083, |
| "eval_runtime": 7.6128, |
| "eval_samples_per_second": 131.489, |
| "eval_steps_per_second": 32.971, |
| "step": 5525 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 9.037037037037037e-06, |
| "loss": 0.2097, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 8.962962962962963e-06, |
| "loss": 0.1733, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 8.88888888888889e-06, |
| "loss": 0.2097, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.47, |
| "eval_loss": 0.24533824622631073, |
| "eval_runtime": 7.6087, |
| "eval_samples_per_second": 131.559, |
| "eval_steps_per_second": 32.988, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 8.814814814814815e-06, |
| "loss": 0.1896, |
| "step": 5560 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.740740740740741e-06, |
| "loss": 0.1945, |
| "step": 5570 |
| }, |
| { |
| "epoch": 2.48, |
| "eval_loss": 0.24509920179843903, |
| "eval_runtime": 7.6048, |
| "eval_samples_per_second": 131.627, |
| "eval_steps_per_second": 33.005, |
| "step": 5575 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.666666666666668e-06, |
| "loss": 0.2036, |
| "step": 5580 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.592592592592593e-06, |
| "loss": 0.1933, |
| "step": 5590 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 8.518518518518519e-06, |
| "loss": 0.1906, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_loss": 0.24449166655540466, |
| "eval_runtime": 7.5807, |
| "eval_samples_per_second": 132.047, |
| "eval_steps_per_second": 33.111, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 8.444444444444446e-06, |
| "loss": 0.1655, |
| "step": 5610 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.37037037037037e-06, |
| "loss": 0.1681, |
| "step": 5620 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_loss": 0.24523867666721344, |
| "eval_runtime": 7.5355, |
| "eval_samples_per_second": 132.839, |
| "eval_steps_per_second": 33.309, |
| "step": 5625 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 8.296296296296297e-06, |
| "loss": 0.202, |
| "step": 5630 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 8.222222222222223e-06, |
| "loss": 0.1795, |
| "step": 5640 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 8.14814814814815e-06, |
| "loss": 0.1732, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.51, |
| "eval_loss": 0.24490933120250702, |
| "eval_runtime": 7.4917, |
| "eval_samples_per_second": 133.614, |
| "eval_steps_per_second": 33.504, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 8.074074074074075e-06, |
| "loss": 0.1765, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.2129, |
| "step": 5670 |
| }, |
| { |
| "epoch": 2.52, |
| "eval_loss": 0.2450021654367447, |
| "eval_runtime": 7.5776, |
| "eval_samples_per_second": 132.099, |
| "eval_steps_per_second": 33.124, |
| "step": 5675 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 7.925925925925926e-06, |
| "loss": 0.239, |
| "step": 5680 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.851851851851853e-06, |
| "loss": 0.153, |
| "step": 5690 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.777777777777777e-06, |
| "loss": 0.1749, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.53, |
| "eval_loss": 0.244710773229599, |
| "eval_runtime": 7.2355, |
| "eval_samples_per_second": 138.346, |
| "eval_steps_per_second": 34.69, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.703703703703704e-06, |
| "loss": 0.2436, |
| "step": 5710 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 7.629629629629629e-06, |
| "loss": 0.217, |
| "step": 5720 |
| }, |
| { |
| "epoch": 2.54, |
| "eval_loss": 0.24449826776981354, |
| "eval_runtime": 7.2691, |
| "eval_samples_per_second": 137.706, |
| "eval_steps_per_second": 34.53, |
| "step": 5725 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 7.555555555555556e-06, |
| "loss": 0.1999, |
| "step": 5730 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 7.481481481481483e-06, |
| "loss": 0.1981, |
| "step": 5740 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.4074074074074075e-06, |
| "loss": 0.2359, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.56, |
| "eval_loss": 0.24463185667991638, |
| "eval_runtime": 7.1779, |
| "eval_samples_per_second": 139.456, |
| "eval_steps_per_second": 34.968, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.333333333333334e-06, |
| "loss": 0.1824, |
| "step": 5760 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.2592592592592605e-06, |
| "loss": 0.1813, |
| "step": 5770 |
| }, |
| { |
| "epoch": 2.57, |
| "eval_loss": 0.24394886195659637, |
| "eval_runtime": 7.21, |
| "eval_samples_per_second": 138.835, |
| "eval_steps_per_second": 34.813, |
| "step": 5775 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.185185185185185e-06, |
| "loss": 0.2108, |
| "step": 5780 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.111111111111112e-06, |
| "loss": 0.2403, |
| "step": 5790 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 7.0370370370370375e-06, |
| "loss": 0.1771, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_loss": 0.2437148541212082, |
| "eval_runtime": 7.3132, |
| "eval_samples_per_second": 136.876, |
| "eval_steps_per_second": 34.322, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 6.962962962962963e-06, |
| "loss": 0.2451, |
| "step": 5810 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 6.888888888888889e-06, |
| "loss": 0.2009, |
| "step": 5820 |
| }, |
| { |
| "epoch": 2.59, |
| "eval_loss": 0.2434249371290207, |
| "eval_runtime": 7.2848, |
| "eval_samples_per_second": 137.41, |
| "eval_steps_per_second": 34.455, |
| "step": 5825 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 6.814814814814815e-06, |
| "loss": 0.1734, |
| "step": 5830 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.74074074074074e-06, |
| "loss": 0.2106, |
| "step": 5840 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.2255, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.6, |
| "eval_loss": 0.24379944801330566, |
| "eval_runtime": 7.3588, |
| "eval_samples_per_second": 136.028, |
| "eval_steps_per_second": 34.109, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 6.592592592592593e-06, |
| "loss": 0.2042, |
| "step": 5860 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.518518518518518e-06, |
| "loss": 0.1645, |
| "step": 5870 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_loss": 0.24349051713943481, |
| "eval_runtime": 7.3319, |
| "eval_samples_per_second": 136.527, |
| "eval_steps_per_second": 34.234, |
| "step": 5875 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.4444444444444445e-06, |
| "loss": 0.1971, |
| "step": 5880 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 6.370370370370371e-06, |
| "loss": 0.1939, |
| "step": 5890 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 6.296296296296296e-06, |
| "loss": 0.2087, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.62, |
| "eval_loss": 0.24342414736747742, |
| "eval_runtime": 7.3018, |
| "eval_samples_per_second": 137.09, |
| "eval_steps_per_second": 34.375, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.222222222222222e-06, |
| "loss": 0.2373, |
| "step": 5910 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.148148148148149e-06, |
| "loss": 0.2125, |
| "step": 5920 |
| }, |
| { |
| "epoch": 2.63, |
| "eval_loss": 0.24332143366336823, |
| "eval_runtime": 7.221, |
| "eval_samples_per_second": 138.623, |
| "eval_steps_per_second": 34.76, |
| "step": 5925 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 6.0740740740740745e-06, |
| "loss": 0.2188, |
| "step": 5930 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 6e-06, |
| "loss": 0.1756, |
| "step": 5940 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 5.925925925925927e-06, |
| "loss": 0.2484, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.64, |
| "eval_loss": 0.24283772706985474, |
| "eval_runtime": 7.2081, |
| "eval_samples_per_second": 138.871, |
| "eval_steps_per_second": 34.822, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.851851851851852e-06, |
| "loss": 0.201, |
| "step": 5960 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.777777777777778e-06, |
| "loss": 0.1724, |
| "step": 5970 |
| }, |
| { |
| "epoch": 2.66, |
| "eval_loss": 0.24253526329994202, |
| "eval_runtime": 7.2139, |
| "eval_samples_per_second": 138.76, |
| "eval_steps_per_second": 34.794, |
| "step": 5975 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 5.7037037037037045e-06, |
| "loss": 0.2071, |
| "step": 5980 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 5.62962962962963e-06, |
| "loss": 0.2026, |
| "step": 5990 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 0.18, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.67, |
| "eval_loss": 0.2427457869052887, |
| "eval_runtime": 7.24, |
| "eval_samples_per_second": 138.259, |
| "eval_steps_per_second": 34.668, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 5.4814814814814815e-06, |
| "loss": 0.2491, |
| "step": 6010 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.407407407407407e-06, |
| "loss": 0.1878, |
| "step": 6020 |
| }, |
| { |
| "epoch": 2.68, |
| "eval_loss": 0.2427491545677185, |
| "eval_runtime": 7.2356, |
| "eval_samples_per_second": 138.343, |
| "eval_steps_per_second": 34.689, |
| "step": 6025 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.333333333333334e-06, |
| "loss": 0.196, |
| "step": 6030 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.259259259259259e-06, |
| "loss": 0.1861, |
| "step": 6040 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.185185185185185e-06, |
| "loss": 0.2131, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.69, |
| "eval_loss": 0.24262972176074982, |
| "eval_runtime": 7.2135, |
| "eval_samples_per_second": 138.768, |
| "eval_steps_per_second": 34.796, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.1111111111111115e-06, |
| "loss": 0.1904, |
| "step": 6060 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.037037037037037e-06, |
| "loss": 0.1677, |
| "step": 6070 |
| }, |
| { |
| "epoch": 2.7, |
| "eval_loss": 0.24258430302143097, |
| "eval_runtime": 7.38, |
| "eval_samples_per_second": 135.638, |
| "eval_steps_per_second": 34.011, |
| "step": 6075 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 4.962962962962963e-06, |
| "loss": 0.1728, |
| "step": 6080 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.888888888888889e-06, |
| "loss": 0.2153, |
| "step": 6090 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 4.814814814814815e-06, |
| "loss": 0.2097, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.71, |
| "eval_loss": 0.2426908016204834, |
| "eval_runtime": 7.3472, |
| "eval_samples_per_second": 136.243, |
| "eval_steps_per_second": 34.163, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.740740740740741e-06, |
| "loss": 0.1584, |
| "step": 6110 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 0.2014, |
| "step": 6120 |
| }, |
| { |
| "epoch": 2.72, |
| "eval_loss": 0.24265140295028687, |
| "eval_runtime": 7.3411, |
| "eval_samples_per_second": 136.355, |
| "eval_steps_per_second": 34.191, |
| "step": 6125 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 4.592592592592593e-06, |
| "loss": 0.2504, |
| "step": 6130 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.5185185185185185e-06, |
| "loss": 0.207, |
| "step": 6140 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.444444444444445e-06, |
| "loss": 0.1777, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.73, |
| "eval_loss": 0.24221929907798767, |
| "eval_runtime": 7.2165, |
| "eval_samples_per_second": 138.709, |
| "eval_steps_per_second": 34.781, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.370370370370371e-06, |
| "loss": 0.1982, |
| "step": 6160 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.296296296296296e-06, |
| "loss": 0.189, |
| "step": 6170 |
| }, |
| { |
| "epoch": 2.74, |
| "eval_loss": 0.24199625849723816, |
| "eval_runtime": 7.2328, |
| "eval_samples_per_second": 138.397, |
| "eval_steps_per_second": 34.703, |
| "step": 6175 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.222222222222223e-06, |
| "loss": 0.1895, |
| "step": 6180 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.1481481481481485e-06, |
| "loss": 0.2303, |
| "step": 6190 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 4.074074074074075e-06, |
| "loss": 0.2499, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.76, |
| "eval_loss": 0.24183855950832367, |
| "eval_runtime": 7.2222, |
| "eval_samples_per_second": 138.6, |
| "eval_steps_per_second": 34.754, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.2105, |
| "step": 6210 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.925925925925926e-06, |
| "loss": 0.1827, |
| "step": 6220 |
| }, |
| { |
| "epoch": 2.77, |
| "eval_loss": 0.24157124757766724, |
| "eval_runtime": 7.2228, |
| "eval_samples_per_second": 138.589, |
| "eval_steps_per_second": 34.751, |
| "step": 6225 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.851851851851852e-06, |
| "loss": 0.1625, |
| "step": 6230 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 3.777777777777778e-06, |
| "loss": 0.2046, |
| "step": 6240 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 3.7037037037037037e-06, |
| "loss": 0.1872, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.78, |
| "eval_loss": 0.24152511358261108, |
| "eval_runtime": 7.2189, |
| "eval_samples_per_second": 138.665, |
| "eval_steps_per_second": 34.77, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 3.6296296296296302e-06, |
| "loss": 0.1476, |
| "step": 6260 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.555555555555556e-06, |
| "loss": 0.1948, |
| "step": 6270 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_loss": 0.24153129756450653, |
| "eval_runtime": 7.2338, |
| "eval_samples_per_second": 138.379, |
| "eval_steps_per_second": 34.698, |
| "step": 6275 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.4814814814814816e-06, |
| "loss": 0.184, |
| "step": 6280 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.4074074074074077e-06, |
| "loss": 0.2911, |
| "step": 6290 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.2382, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_loss": 0.24131891131401062, |
| "eval_runtime": 7.204, |
| "eval_samples_per_second": 138.95, |
| "eval_steps_per_second": 34.842, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.259259259259259e-06, |
| "loss": 0.2274, |
| "step": 6310 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.1851851851851855e-06, |
| "loss": 0.2495, |
| "step": 6320 |
| }, |
| { |
| "epoch": 2.81, |
| "eval_loss": 0.24129566550254822, |
| "eval_runtime": 7.2128, |
| "eval_samples_per_second": 138.78, |
| "eval_steps_per_second": 34.799, |
| "step": 6325 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.111111111111111e-06, |
| "loss": 0.1895, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 3.0370370370370372e-06, |
| "loss": 0.2067, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 2.9629629629629633e-06, |
| "loss": 0.2258, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.82, |
| "eval_loss": 0.24125269055366516, |
| "eval_runtime": 7.2125, |
| "eval_samples_per_second": 138.787, |
| "eval_steps_per_second": 34.801, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.888888888888889e-06, |
| "loss": 0.1759, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.814814814814815e-06, |
| "loss": 0.1618, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.83, |
| "eval_loss": 0.24155820906162262, |
| "eval_runtime": 7.2268, |
| "eval_samples_per_second": 138.511, |
| "eval_steps_per_second": 34.732, |
| "step": 6375 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2.7407407407407407e-06, |
| "loss": 0.1782, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2.666666666666667e-06, |
| "loss": 0.1813, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2.5925925925925925e-06, |
| "loss": 0.1605, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.84, |
| "eval_loss": 0.24137118458747864, |
| "eval_runtime": 7.2587, |
| "eval_samples_per_second": 137.903, |
| "eval_steps_per_second": 34.579, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.5185185185185186e-06, |
| "loss": 0.1751, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.4444444444444447e-06, |
| "loss": 0.1712, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.86, |
| "eval_loss": 0.24129529297351837, |
| "eval_runtime": 7.385, |
| "eval_samples_per_second": 135.545, |
| "eval_steps_per_second": 33.988, |
| "step": 6425 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2.3703703703703703e-06, |
| "loss": 0.2067, |
| "step": 6430 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2.2962962962962964e-06, |
| "loss": 0.177, |
| "step": 6440 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.2222222222222225e-06, |
| "loss": 0.2021, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.87, |
| "eval_loss": 0.24113218486309052, |
| "eval_runtime": 7.1916, |
| "eval_samples_per_second": 139.191, |
| "eval_steps_per_second": 34.902, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.148148148148148e-06, |
| "loss": 0.1895, |
| "step": 6460 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.0740740740740742e-06, |
| "loss": 0.1985, |
| "step": 6470 |
| }, |
| { |
| "epoch": 2.88, |
| "eval_loss": 0.24104492366313934, |
| "eval_runtime": 7.2162, |
| "eval_samples_per_second": 138.715, |
| "eval_steps_per_second": 34.783, |
| "step": 6475 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.1742, |
| "step": 6480 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 1.925925925925926e-06, |
| "loss": 0.2035, |
| "step": 6490 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.8518518518518519e-06, |
| "loss": 0.1773, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.89, |
| "eval_loss": 0.24099375307559967, |
| "eval_runtime": 7.2337, |
| "eval_samples_per_second": 138.38, |
| "eval_steps_per_second": 34.699, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 1.777777777777778e-06, |
| "loss": 0.1872, |
| "step": 6510 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.7037037037037038e-06, |
| "loss": 0.2152, |
| "step": 6520 |
| }, |
| { |
| "epoch": 2.9, |
| "eval_loss": 0.24097038805484772, |
| "eval_runtime": 7.2148, |
| "eval_samples_per_second": 138.743, |
| "eval_steps_per_second": 34.79, |
| "step": 6525 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.6296296296296295e-06, |
| "loss": 0.2588, |
| "step": 6530 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.5555555555555556e-06, |
| "loss": 0.2228, |
| "step": 6540 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.4814814814814817e-06, |
| "loss": 0.2225, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.91, |
| "eval_loss": 0.24102109670639038, |
| "eval_runtime": 7.2724, |
| "eval_samples_per_second": 137.644, |
| "eval_steps_per_second": 34.514, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.4074074074074075e-06, |
| "loss": 0.2303, |
| "step": 6560 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.3333333333333334e-06, |
| "loss": 0.1393, |
| "step": 6570 |
| }, |
| { |
| "epoch": 2.92, |
| "eval_loss": 0.24098366498947144, |
| "eval_runtime": 7.2615, |
| "eval_samples_per_second": 137.849, |
| "eval_steps_per_second": 34.566, |
| "step": 6575 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.2592592592592593e-06, |
| "loss": 0.1581, |
| "step": 6580 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.1851851851851852e-06, |
| "loss": 0.2093, |
| "step": 6590 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.1111111111111112e-06, |
| "loss": 0.1631, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.93, |
| "eval_loss": 0.24084864556789398, |
| "eval_runtime": 7.2231, |
| "eval_samples_per_second": 138.583, |
| "eval_steps_per_second": 34.75, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.0370370370370371e-06, |
| "loss": 0.2028, |
| "step": 6610 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 9.62962962962963e-07, |
| "loss": 0.2158, |
| "step": 6620 |
| }, |
| { |
| "epoch": 2.94, |
| "eval_loss": 0.24080170691013336, |
| "eval_runtime": 7.2315, |
| "eval_samples_per_second": 138.423, |
| "eval_steps_per_second": 34.709, |
| "step": 6625 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.88888888888889e-07, |
| "loss": 0.1775, |
| "step": 6630 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.148148148148147e-07, |
| "loss": 0.1827, |
| "step": 6640 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 7.407407407407408e-07, |
| "loss": 0.218, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.96, |
| "eval_loss": 0.24078905582427979, |
| "eval_runtime": 7.2318, |
| "eval_samples_per_second": 138.417, |
| "eval_steps_per_second": 34.708, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 6.666666666666667e-07, |
| "loss": 0.1736, |
| "step": 6660 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 5.925925925925926e-07, |
| "loss": 0.1654, |
| "step": 6670 |
| }, |
| { |
| "epoch": 2.97, |
| "eval_loss": 0.24077662825584412, |
| "eval_runtime": 7.2025, |
| "eval_samples_per_second": 138.98, |
| "eval_steps_per_second": 34.849, |
| "step": 6675 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 5.185185185185186e-07, |
| "loss": 0.1993, |
| "step": 6680 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.444444444444445e-07, |
| "loss": 0.1852, |
| "step": 6690 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 3.703703703703704e-07, |
| "loss": 0.1955, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.98, |
| "eval_loss": 0.24076078832149506, |
| "eval_runtime": 7.1929, |
| "eval_samples_per_second": 139.165, |
| "eval_steps_per_second": 34.895, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 2.962962962962963e-07, |
| "loss": 0.1923, |
| "step": 6710 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 2.2222222222222224e-07, |
| "loss": 0.2142, |
| "step": 6720 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_loss": 0.24076122045516968, |
| "eval_runtime": 7.1938, |
| "eval_samples_per_second": 139.147, |
| "eval_steps_per_second": 34.891, |
| "step": 6725 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.4814814814814815e-07, |
| "loss": 0.2464, |
| "step": 6730 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 7.407407407407407e-08, |
| "loss": 0.2322, |
| "step": 6740 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 0.0, |
| "loss": 0.1781, |
| "step": 6750 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.24075740575790405, |
| "eval_runtime": 7.2037, |
| "eval_samples_per_second": 138.956, |
| "eval_steps_per_second": 34.843, |
| "step": 6750 |
| } |
| ], |
| "max_steps": 6750, |
| "num_train_epochs": 3, |
| "total_flos": 915255853056000.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|