| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 29.746727859935408, | |
| "eval_steps": 5000, | |
| "global_step": 175000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.987500000000001e-06, | |
| "loss": 1.4907, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.975000000000001e-06, | |
| "loss": 1.3231, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.9625e-06, | |
| "loss": 1.3028, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.95e-06, | |
| "loss": 1.2531, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.937500000000001e-06, | |
| "loss": 1.219, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.925e-06, | |
| "loss": 1.177, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.912500000000001e-06, | |
| "loss": 1.1509, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 1.1144, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 4.8875e-06, | |
| "loss": 1.0829, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.875e-06, | |
| "loss": 1.0648, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 1.3415521383285522, | |
| "eval_runtime": 31.2796, | |
| "eval_samples_per_second": 375.261, | |
| "eval_steps_per_second": 46.932, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 4.8625000000000005e-06, | |
| "loss": 1.0567, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.85e-06, | |
| "loss": 1.0334, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.837500000000001e-06, | |
| "loss": 0.9995, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 4.825e-06, | |
| "loss": 0.9909, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 4.8125e-06, | |
| "loss": 0.9687, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 4.800000000000001e-06, | |
| "loss": 0.9444, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.7875e-06, | |
| "loss": 0.9547, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.775e-06, | |
| "loss": 0.9217, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.7625000000000006e-06, | |
| "loss": 0.9121, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 4.75e-06, | |
| "loss": 0.9045, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 1.1334267854690552, | |
| "eval_runtime": 33.2532, | |
| "eval_samples_per_second": 352.989, | |
| "eval_steps_per_second": 44.146, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.737500000000001e-06, | |
| "loss": 0.8987, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 4.7250000000000005e-06, | |
| "loss": 0.8923, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 4.7125e-06, | |
| "loss": 0.8711, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 4.7e-06, | |
| "loss": 0.8655, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 4.6875000000000004e-06, | |
| "loss": 0.8458, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 4.675000000000001e-06, | |
| "loss": 0.8454, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 4.662500000000001e-06, | |
| "loss": 0.822, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 4.65e-06, | |
| "loss": 0.8252, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 4.6375e-06, | |
| "loss": 0.8105, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 4.625000000000001e-06, | |
| "loss": 0.8092, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 1.0175799131393433, | |
| "eval_runtime": 30.8472, | |
| "eval_samples_per_second": 380.521, | |
| "eval_steps_per_second": 47.589, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 4.6125e-06, | |
| "loss": 0.8106, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.600000000000001e-06, | |
| "loss": 0.8025, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.5875000000000005e-06, | |
| "loss": 0.7931, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 4.575e-06, | |
| "loss": 0.8081, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.5625e-06, | |
| "loss": 0.7895, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 4.5500000000000005e-06, | |
| "loss": 0.7828, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 4.5375e-06, | |
| "loss": 0.7617, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 4.525000000000001e-06, | |
| "loss": 0.756, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 4.5125e-06, | |
| "loss": 0.7465, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 4.5e-06, | |
| "loss": 0.7432, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_loss": 0.9422996044158936, | |
| "eval_runtime": 28.729, | |
| "eval_samples_per_second": 408.577, | |
| "eval_steps_per_second": 51.098, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 4.4875e-06, | |
| "loss": 0.7522, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 4.475e-06, | |
| "loss": 0.7471, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 4.4625e-06, | |
| "loss": 0.7368, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 4.450000000000001e-06, | |
| "loss": 0.7467, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.82, | |
| "learning_rate": 4.4375e-06, | |
| "loss": 0.733, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 4.425e-06, | |
| "loss": 0.7281, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 4.4125000000000005e-06, | |
| "loss": 0.7269, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 4.4e-06, | |
| "loss": 0.7234, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 4.3875e-06, | |
| "loss": 0.7101, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 4.3750000000000005e-06, | |
| "loss": 0.7029, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "eval_loss": 0.8877292275428772, | |
| "eval_runtime": 28.5804, | |
| "eval_samples_per_second": 410.7, | |
| "eval_steps_per_second": 51.364, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 4.362500000000001e-06, | |
| "loss": 0.6897, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 4.350000000000001e-06, | |
| "loss": 0.7004, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 4.3375e-06, | |
| "loss": 0.6925, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 4.325e-06, | |
| "loss": 0.6967, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 4.312500000000001e-06, | |
| "loss": 0.6872, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 4.3e-06, | |
| "loss": 0.6927, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 4.287500000000001e-06, | |
| "loss": 0.6745, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 4.2750000000000006e-06, | |
| "loss": 0.686, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.01, | |
| "learning_rate": 4.2625e-06, | |
| "loss": 0.6753, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 4.25e-06, | |
| "loss": 0.6609, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "eval_loss": 0.8455274105072021, | |
| "eval_runtime": 31.1298, | |
| "eval_samples_per_second": 377.066, | |
| "eval_steps_per_second": 47.157, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 4.2375000000000005e-06, | |
| "loss": 0.668, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 4.225e-06, | |
| "loss": 0.6669, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 4.212500000000001e-06, | |
| "loss": 0.6514, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 4.2000000000000004e-06, | |
| "loss": 0.662, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "learning_rate": 4.1875e-06, | |
| "loss": 0.6617, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 4.175e-06, | |
| "loss": 0.6561, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 4.1625e-06, | |
| "loss": 0.6551, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 4.15e-06, | |
| "loss": 0.6429, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 4.137500000000001e-06, | |
| "loss": 0.6332, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 4.125e-06, | |
| "loss": 0.6479, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "eval_loss": 0.81136155128479, | |
| "eval_runtime": 28.2662, | |
| "eval_samples_per_second": 415.267, | |
| "eval_steps_per_second": 51.935, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 4.1125e-06, | |
| "loss": 0.6444, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 4.1e-06, | |
| "loss": 0.6345, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 4.0875e-06, | |
| "loss": 0.6274, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 4.075e-06, | |
| "loss": 0.6288, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 4.0625000000000005e-06, | |
| "loss": 0.6392, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 4.05e-06, | |
| "loss": 0.6252, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 4.037500000000001e-06, | |
| "loss": 0.6223, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 4.0250000000000004e-06, | |
| "loss": 0.6155, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "learning_rate": 4.0125e-06, | |
| "loss": 0.6287, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.624, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "eval_loss": 0.7838146090507507, | |
| "eval_runtime": 30.7199, | |
| "eval_samples_per_second": 382.097, | |
| "eval_steps_per_second": 47.787, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 3.9875e-06, | |
| "loss": 0.612, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 3.975000000000001e-06, | |
| "loss": 0.6172, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "learning_rate": 3.962500000000001e-06, | |
| "loss": 0.6094, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 3.95e-06, | |
| "loss": 0.603, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 3.9375e-06, | |
| "loss": 0.6002, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 3.9250000000000005e-06, | |
| "loss": 0.6095, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 3.9125e-06, | |
| "loss": 0.5925, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 3.900000000000001e-06, | |
| "loss": 0.593, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 3.8875000000000005e-06, | |
| "loss": 0.5963, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 3.875e-06, | |
| "loss": 0.6045, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "eval_loss": 0.7607721090316772, | |
| "eval_runtime": 30.9239, | |
| "eval_samples_per_second": 379.577, | |
| "eval_steps_per_second": 47.471, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "learning_rate": 3.8625e-06, | |
| "loss": 0.5967, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 3.85e-06, | |
| "loss": 0.5958, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 3.8375e-06, | |
| "loss": 0.5856, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 3.825000000000001e-06, | |
| "loss": 0.5957, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 3.8125e-06, | |
| "loss": 0.5742, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 0.5745, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "learning_rate": 3.7875e-06, | |
| "loss": 0.5847, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 3.7750000000000003e-06, | |
| "loss": 0.5826, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 3.7625e-06, | |
| "loss": 0.5765, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.571, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "eval_loss": 0.7427138686180115, | |
| "eval_runtime": 32.0182, | |
| "eval_samples_per_second": 366.604, | |
| "eval_steps_per_second": 45.849, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 3.7375000000000006e-06, | |
| "loss": 0.5777, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 3.7250000000000003e-06, | |
| "loss": 0.573, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 3.7125000000000005e-06, | |
| "loss": 0.5813, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 3.7e-06, | |
| "loss": 0.5715, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "learning_rate": 3.6875000000000007e-06, | |
| "loss": 0.5636, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 3.6750000000000004e-06, | |
| "loss": 0.5749, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 3.6625000000000005e-06, | |
| "loss": 0.573, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 3.65e-06, | |
| "loss": 0.5606, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 3.6375000000000003e-06, | |
| "loss": 0.5553, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 3.625e-06, | |
| "loss": 0.5637, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "eval_loss": 0.7249044179916382, | |
| "eval_runtime": 32.2708, | |
| "eval_samples_per_second": 363.734, | |
| "eval_steps_per_second": 45.49, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "learning_rate": 3.6125000000000006e-06, | |
| "loss": 0.5553, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 0.5648, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 3.5875000000000004e-06, | |
| "loss": 0.5512, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 9.69, | |
| "learning_rate": 3.575e-06, | |
| "loss": 0.5534, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 3.5625e-06, | |
| "loss": 0.5544, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 3.5500000000000003e-06, | |
| "loss": 0.5508, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 3.5375000000000004e-06, | |
| "loss": 0.5458, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 10.03, | |
| "learning_rate": 3.525e-06, | |
| "loss": 0.557, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 3.5125000000000003e-06, | |
| "loss": 0.5406, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "learning_rate": 3.5e-06, | |
| "loss": 0.5488, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "eval_loss": 0.7100504636764526, | |
| "eval_runtime": 32.186, | |
| "eval_samples_per_second": 364.692, | |
| "eval_steps_per_second": 45.61, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 3.4875000000000005e-06, | |
| "loss": 0.5462, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "learning_rate": 3.475e-06, | |
| "loss": 0.5364, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 10.45, | |
| "learning_rate": 3.4625000000000003e-06, | |
| "loss": 0.5452, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 3.45e-06, | |
| "loss": 0.5449, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 3.4375e-06, | |
| "loss": 0.5353, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 10.71, | |
| "learning_rate": 3.4250000000000007e-06, | |
| "loss": 0.5359, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 10.79, | |
| "learning_rate": 3.4125000000000004e-06, | |
| "loss": 0.5356, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 3.4000000000000005e-06, | |
| "loss": 0.5385, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "learning_rate": 3.3875e-06, | |
| "loss": 0.53, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "learning_rate": 3.3750000000000003e-06, | |
| "loss": 0.525, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 11.05, | |
| "eval_loss": 0.6971380114555359, | |
| "eval_runtime": 29.293, | |
| "eval_samples_per_second": 400.71, | |
| "eval_steps_per_second": 50.114, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 11.13, | |
| "learning_rate": 3.3625000000000004e-06, | |
| "loss": 0.519, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 3.3500000000000005e-06, | |
| "loss": 0.5309, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "learning_rate": 3.3375000000000002e-06, | |
| "loss": 0.5314, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "learning_rate": 3.3250000000000004e-06, | |
| "loss": 0.5255, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 11.47, | |
| "learning_rate": 3.3125e-06, | |
| "loss": 0.5297, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "learning_rate": 3.3000000000000006e-06, | |
| "loss": 0.5238, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 11.64, | |
| "learning_rate": 3.2875000000000003e-06, | |
| "loss": 0.5162, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 11.73, | |
| "learning_rate": 3.2750000000000004e-06, | |
| "loss": 0.5187, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 11.81, | |
| "learning_rate": 3.2625e-06, | |
| "loss": 0.5249, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "learning_rate": 3.2500000000000002e-06, | |
| "loss": 0.5283, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "eval_loss": 0.685904324054718, | |
| "eval_runtime": 29.2682, | |
| "eval_samples_per_second": 401.05, | |
| "eval_steps_per_second": 50.157, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 11.98, | |
| "learning_rate": 3.2375e-06, | |
| "loss": 0.5261, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "learning_rate": 3.2250000000000005e-06, | |
| "loss": 0.5099, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "learning_rate": 3.2125e-06, | |
| "loss": 0.5187, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 3.2000000000000003e-06, | |
| "loss": 0.5144, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 12.32, | |
| "learning_rate": 3.1875e-06, | |
| "loss": 0.515, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 12.41, | |
| "learning_rate": 3.175e-06, | |
| "loss": 0.5158, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "learning_rate": 3.1625000000000002e-06, | |
| "loss": 0.5078, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "learning_rate": 3.1500000000000003e-06, | |
| "loss": 0.5068, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 12.66, | |
| "learning_rate": 3.1375e-06, | |
| "loss": 0.5114, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 12.75, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.522, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 12.75, | |
| "eval_loss": 0.6754906177520752, | |
| "eval_runtime": 29.2581, | |
| "eval_samples_per_second": 401.188, | |
| "eval_steps_per_second": 50.174, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 3.1125000000000007e-06, | |
| "loss": 0.5094, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "learning_rate": 3.1000000000000004e-06, | |
| "loss": 0.5108, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 3.0875000000000005e-06, | |
| "loss": 0.5065, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 13.09, | |
| "learning_rate": 3.075e-06, | |
| "loss": 0.5033, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 13.17, | |
| "learning_rate": 3.0625000000000003e-06, | |
| "loss": 0.5032, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 3.05e-06, | |
| "loss": 0.5011, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 13.34, | |
| "learning_rate": 3.0375000000000006e-06, | |
| "loss": 0.5007, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 13.43, | |
| "learning_rate": 3.0250000000000003e-06, | |
| "loss": 0.4989, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 13.51, | |
| "learning_rate": 3.0125000000000004e-06, | |
| "loss": 0.4992, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4996, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "eval_loss": 0.6659587025642395, | |
| "eval_runtime": 29.4004, | |
| "eval_samples_per_second": 399.247, | |
| "eval_steps_per_second": 49.931, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 13.68, | |
| "learning_rate": 2.9875e-06, | |
| "loss": 0.5022, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 13.77, | |
| "learning_rate": 2.9750000000000003e-06, | |
| "loss": 0.4983, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "learning_rate": 2.9625000000000004e-06, | |
| "loss": 0.502, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "learning_rate": 2.95e-06, | |
| "loss": 0.495, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 2.9375000000000003e-06, | |
| "loss": 0.4967, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "learning_rate": 2.925e-06, | |
| "loss": 0.4947, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 14.19, | |
| "learning_rate": 2.9125000000000005e-06, | |
| "loss": 0.4841, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 14.28, | |
| "learning_rate": 2.9e-06, | |
| "loss": 0.4922, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 14.36, | |
| "learning_rate": 2.8875000000000003e-06, | |
| "loss": 0.4925, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 14.45, | |
| "learning_rate": 2.875e-06, | |
| "loss": 0.4868, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 14.45, | |
| "eval_loss": 0.6585991978645325, | |
| "eval_runtime": 29.5661, | |
| "eval_samples_per_second": 397.009, | |
| "eval_steps_per_second": 49.652, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 14.53, | |
| "learning_rate": 2.8625e-06, | |
| "loss": 0.4943, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 2.85e-06, | |
| "loss": 0.4887, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 14.7, | |
| "learning_rate": 2.8375000000000004e-06, | |
| "loss": 0.4832, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 14.79, | |
| "learning_rate": 2.825e-06, | |
| "loss": 0.4922, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 14.87, | |
| "learning_rate": 2.8125e-06, | |
| "loss": 0.483, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "learning_rate": 2.8000000000000003e-06, | |
| "loss": 0.4924, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "learning_rate": 2.7875000000000004e-06, | |
| "loss": 0.4836, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 15.13, | |
| "learning_rate": 2.7750000000000005e-06, | |
| "loss": 0.4736, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 15.21, | |
| "learning_rate": 2.7625000000000002e-06, | |
| "loss": 0.4799, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 15.3, | |
| "learning_rate": 2.7500000000000004e-06, | |
| "loss": 0.4773, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 15.3, | |
| "eval_loss": 0.6510518789291382, | |
| "eval_runtime": 28.0473, | |
| "eval_samples_per_second": 418.507, | |
| "eval_steps_per_second": 52.34, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "learning_rate": 2.7375e-06, | |
| "loss": 0.4791, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 15.47, | |
| "learning_rate": 2.7250000000000006e-06, | |
| "loss": 0.4788, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 15.55, | |
| "learning_rate": 2.7125000000000003e-06, | |
| "loss": 0.4759, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 15.64, | |
| "learning_rate": 2.7000000000000004e-06, | |
| "loss": 0.4784, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 15.72, | |
| "learning_rate": 2.6875e-06, | |
| "loss": 0.4762, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 15.81, | |
| "learning_rate": 2.6750000000000002e-06, | |
| "loss": 0.4827, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "learning_rate": 2.6625e-06, | |
| "loss": 0.4844, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 2.6500000000000005e-06, | |
| "loss": 0.4787, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "learning_rate": 2.6375e-06, | |
| "loss": 0.4759, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 16.15, | |
| "learning_rate": 2.6250000000000003e-06, | |
| "loss": 0.4724, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 16.15, | |
| "eval_loss": 0.6447970271110535, | |
| "eval_runtime": 27.4195, | |
| "eval_samples_per_second": 428.09, | |
| "eval_steps_per_second": 53.539, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 16.23, | |
| "learning_rate": 2.6125e-06, | |
| "loss": 0.4748, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 16.32, | |
| "learning_rate": 2.6e-06, | |
| "loss": 0.4711, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "learning_rate": 2.5875000000000002e-06, | |
| "loss": 0.4744, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 16.49, | |
| "learning_rate": 2.5750000000000003e-06, | |
| "loss": 0.4751, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 16.57, | |
| "learning_rate": 2.5625e-06, | |
| "loss": 0.4716, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 16.66, | |
| "learning_rate": 2.55e-06, | |
| "loss": 0.4646, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "learning_rate": 2.5375e-06, | |
| "loss": 0.4629, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 16.83, | |
| "learning_rate": 2.5250000000000004e-06, | |
| "loss": 0.4711, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 16.91, | |
| "learning_rate": 2.5125e-06, | |
| "loss": 0.4708, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.4682, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.6382132768630981, | |
| "eval_runtime": 27.4181, | |
| "eval_samples_per_second": 428.111, | |
| "eval_steps_per_second": 53.541, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 17.08, | |
| "learning_rate": 2.4875000000000003e-06, | |
| "loss": 0.4575, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 17.17, | |
| "learning_rate": 2.475e-06, | |
| "loss": 0.4609, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 17.25, | |
| "learning_rate": 2.4625e-06, | |
| "loss": 0.4673, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "learning_rate": 2.4500000000000003e-06, | |
| "loss": 0.4653, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 17.42, | |
| "learning_rate": 2.4375e-06, | |
| "loss": 0.4595, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 17.51, | |
| "learning_rate": 2.425e-06, | |
| "loss": 0.4578, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 17.59, | |
| "learning_rate": 2.4125e-06, | |
| "loss": 0.4682, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 17.68, | |
| "learning_rate": 2.4000000000000003e-06, | |
| "loss": 0.4601, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 17.76, | |
| "learning_rate": 2.3875e-06, | |
| "loss": 0.4585, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "learning_rate": 2.375e-06, | |
| "loss": 0.4648, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "eval_loss": 0.6338370442390442, | |
| "eval_runtime": 27.5324, | |
| "eval_samples_per_second": 426.334, | |
| "eval_steps_per_second": 53.319, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 17.93, | |
| "learning_rate": 2.3625000000000003e-06, | |
| "loss": 0.4642, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "learning_rate": 2.35e-06, | |
| "loss": 0.4648, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 18.1, | |
| "learning_rate": 2.3375000000000005e-06, | |
| "loss": 0.4599, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 18.19, | |
| "learning_rate": 2.325e-06, | |
| "loss": 0.4481, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 18.27, | |
| "learning_rate": 2.3125000000000003e-06, | |
| "loss": 0.4601, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 18.36, | |
| "learning_rate": 2.3000000000000004e-06, | |
| "loss": 0.4582, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 18.44, | |
| "learning_rate": 2.2875e-06, | |
| "loss": 0.4589, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 18.53, | |
| "learning_rate": 2.2750000000000002e-06, | |
| "loss": 0.4505, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 18.61, | |
| "learning_rate": 2.2625000000000004e-06, | |
| "loss": 0.4584, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "learning_rate": 2.25e-06, | |
| "loss": 0.4551, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "eval_loss": 0.6278859972953796, | |
| "eval_runtime": 28.5577, | |
| "eval_samples_per_second": 411.027, | |
| "eval_steps_per_second": 51.405, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 18.78, | |
| "learning_rate": 2.2375e-06, | |
| "loss": 0.4512, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 18.87, | |
| "learning_rate": 2.2250000000000003e-06, | |
| "loss": 0.4549, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 18.95, | |
| "learning_rate": 2.2125e-06, | |
| "loss": 0.4607, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 19.04, | |
| "learning_rate": 2.2e-06, | |
| "loss": 0.4493, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 19.12, | |
| "learning_rate": 2.1875000000000002e-06, | |
| "loss": 0.4481, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 19.21, | |
| "learning_rate": 2.1750000000000004e-06, | |
| "loss": 0.4475, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 19.29, | |
| "learning_rate": 2.1625e-06, | |
| "loss": 0.4487, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 19.38, | |
| "learning_rate": 2.15e-06, | |
| "loss": 0.4471, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 19.46, | |
| "learning_rate": 2.1375000000000003e-06, | |
| "loss": 0.4501, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 19.55, | |
| "learning_rate": 2.125e-06, | |
| "loss": 0.4412, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 19.55, | |
| "eval_loss": 0.6246311068534851, | |
| "eval_runtime": 32.3732, | |
| "eval_samples_per_second": 362.584, | |
| "eval_steps_per_second": 45.346, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 19.63, | |
| "learning_rate": 2.1125e-06, | |
| "loss": 0.4557, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 2.1000000000000002e-06, | |
| "loss": 0.4509, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 19.8, | |
| "learning_rate": 2.0875e-06, | |
| "loss": 0.4484, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 19.89, | |
| "learning_rate": 2.075e-06, | |
| "loss": 0.4464, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 19.97, | |
| "learning_rate": 2.0625e-06, | |
| "loss": 0.4442, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 20.06, | |
| "learning_rate": 2.05e-06, | |
| "loss": 0.4479, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 20.14, | |
| "learning_rate": 2.0375e-06, | |
| "loss": 0.4376, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 20.23, | |
| "learning_rate": 2.025e-06, | |
| "loss": 0.4441, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 20.31, | |
| "learning_rate": 2.0125000000000002e-06, | |
| "loss": 0.4429, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.447, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "eval_loss": 0.620963990688324, | |
| "eval_runtime": 29.0345, | |
| "eval_samples_per_second": 404.278, | |
| "eval_steps_per_second": 50.561, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 20.48, | |
| "learning_rate": 1.9875000000000005e-06, | |
| "loss": 0.4466, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 20.57, | |
| "learning_rate": 1.975e-06, | |
| "loss": 0.4487, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "learning_rate": 1.9625000000000003e-06, | |
| "loss": 0.4406, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 20.74, | |
| "learning_rate": 1.9500000000000004e-06, | |
| "loss": 0.4423, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 20.82, | |
| "learning_rate": 1.9375e-06, | |
| "loss": 0.4454, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 20.91, | |
| "learning_rate": 1.925e-06, | |
| "loss": 0.4396, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "learning_rate": 1.9125000000000003e-06, | |
| "loss": 0.4387, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 21.08, | |
| "learning_rate": 1.9000000000000002e-06, | |
| "loss": 0.4339, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 21.16, | |
| "learning_rate": 1.8875000000000001e-06, | |
| "loss": 0.4407, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "learning_rate": 1.8750000000000003e-06, | |
| "loss": 0.4431, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "eval_loss": 0.6155585646629333, | |
| "eval_runtime": 29.6557, | |
| "eval_samples_per_second": 395.809, | |
| "eval_steps_per_second": 49.501, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 21.33, | |
| "learning_rate": 1.8625000000000002e-06, | |
| "loss": 0.4341, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 21.42, | |
| "learning_rate": 1.85e-06, | |
| "loss": 0.4358, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "learning_rate": 1.8375000000000002e-06, | |
| "loss": 0.4443, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 21.59, | |
| "learning_rate": 1.825e-06, | |
| "loss": 0.4307, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 21.67, | |
| "learning_rate": 1.8125e-06, | |
| "loss": 0.4422, | |
| "step": 127500 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "learning_rate": 1.8000000000000001e-06, | |
| "loss": 0.4384, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 21.84, | |
| "learning_rate": 1.7875e-06, | |
| "loss": 0.4372, | |
| "step": 128500 | |
| }, | |
| { | |
| "epoch": 21.93, | |
| "learning_rate": 1.7750000000000002e-06, | |
| "loss": 0.4328, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 22.01, | |
| "learning_rate": 1.7625e-06, | |
| "loss": 0.4404, | |
| "step": 129500 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "learning_rate": 1.75e-06, | |
| "loss": 0.4328, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "eval_loss": 0.6130816340446472, | |
| "eval_runtime": 29.5785, | |
| "eval_samples_per_second": 396.843, | |
| "eval_steps_per_second": 49.631, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 22.18, | |
| "learning_rate": 1.7375e-06, | |
| "loss": 0.427, | |
| "step": 130500 | |
| }, | |
| { | |
| "epoch": 22.27, | |
| "learning_rate": 1.725e-06, | |
| "loss": 0.4246, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 22.35, | |
| "learning_rate": 1.7125000000000003e-06, | |
| "loss": 0.4369, | |
| "step": 131500 | |
| }, | |
| { | |
| "epoch": 22.44, | |
| "learning_rate": 1.7000000000000002e-06, | |
| "loss": 0.4315, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "learning_rate": 1.6875000000000001e-06, | |
| "loss": 0.4356, | |
| "step": 132500 | |
| }, | |
| { | |
| "epoch": 22.61, | |
| "learning_rate": 1.6750000000000003e-06, | |
| "loss": 0.4282, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 22.69, | |
| "learning_rate": 1.6625000000000002e-06, | |
| "loss": 0.4295, | |
| "step": 133500 | |
| }, | |
| { | |
| "epoch": 22.78, | |
| "learning_rate": 1.6500000000000003e-06, | |
| "loss": 0.4303, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "learning_rate": 1.6375000000000002e-06, | |
| "loss": 0.4346, | |
| "step": 134500 | |
| }, | |
| { | |
| "epoch": 22.95, | |
| "learning_rate": 1.6250000000000001e-06, | |
| "loss": 0.4352, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 22.95, | |
| "eval_loss": 0.6105329394340515, | |
| "eval_runtime": 31.3111, | |
| "eval_samples_per_second": 374.883, | |
| "eval_steps_per_second": 46.884, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 23.03, | |
| "learning_rate": 1.6125000000000002e-06, | |
| "loss": 0.4328, | |
| "step": 135500 | |
| }, | |
| { | |
| "epoch": 23.12, | |
| "learning_rate": 1.6000000000000001e-06, | |
| "loss": 0.4291, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "learning_rate": 1.5875e-06, | |
| "loss": 0.4238, | |
| "step": 136500 | |
| }, | |
| { | |
| "epoch": 23.29, | |
| "learning_rate": 1.5750000000000002e-06, | |
| "loss": 0.4239, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 23.37, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 0.4267, | |
| "step": 137500 | |
| }, | |
| { | |
| "epoch": 23.46, | |
| "learning_rate": 1.5500000000000002e-06, | |
| "loss": 0.4306, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 23.54, | |
| "learning_rate": 1.5375e-06, | |
| "loss": 0.4327, | |
| "step": 138500 | |
| }, | |
| { | |
| "epoch": 23.63, | |
| "learning_rate": 1.525e-06, | |
| "loss": 0.429, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 23.71, | |
| "learning_rate": 1.5125000000000001e-06, | |
| "loss": 0.4295, | |
| "step": 139500 | |
| }, | |
| { | |
| "epoch": 23.8, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.4268, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 23.8, | |
| "eval_loss": 0.6070874929428101, | |
| "eval_runtime": 28.7528, | |
| "eval_samples_per_second": 408.239, | |
| "eval_steps_per_second": 51.056, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 23.88, | |
| "learning_rate": 1.4875000000000002e-06, | |
| "loss": 0.424, | |
| "step": 140500 | |
| }, | |
| { | |
| "epoch": 23.97, | |
| "learning_rate": 1.475e-06, | |
| "loss": 0.423, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 24.05, | |
| "learning_rate": 1.4625e-06, | |
| "loss": 0.4194, | |
| "step": 141500 | |
| }, | |
| { | |
| "epoch": 24.14, | |
| "learning_rate": 1.45e-06, | |
| "loss": 0.4246, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 24.22, | |
| "learning_rate": 1.4375e-06, | |
| "loss": 0.4268, | |
| "step": 142500 | |
| }, | |
| { | |
| "epoch": 24.31, | |
| "learning_rate": 1.425e-06, | |
| "loss": 0.4245, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 24.39, | |
| "learning_rate": 1.4125e-06, | |
| "loss": 0.4183, | |
| "step": 143500 | |
| }, | |
| { | |
| "epoch": 24.48, | |
| "learning_rate": 1.4000000000000001e-06, | |
| "loss": 0.4234, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 24.56, | |
| "learning_rate": 1.3875000000000003e-06, | |
| "loss": 0.4267, | |
| "step": 144500 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "learning_rate": 1.3750000000000002e-06, | |
| "loss": 0.4212, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "eval_loss": 0.6056092381477356, | |
| "eval_runtime": 29.9328, | |
| "eval_samples_per_second": 392.145, | |
| "eval_steps_per_second": 49.043, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 24.73, | |
| "learning_rate": 1.3625000000000003e-06, | |
| "loss": 0.4223, | |
| "step": 145500 | |
| }, | |
| { | |
| "epoch": 24.82, | |
| "learning_rate": 1.3500000000000002e-06, | |
| "loss": 0.4325, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 24.9, | |
| "learning_rate": 1.3375000000000001e-06, | |
| "loss": 0.4199, | |
| "step": 146500 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "learning_rate": 1.3250000000000002e-06, | |
| "loss": 0.4301, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 25.07, | |
| "learning_rate": 1.3125000000000001e-06, | |
| "loss": 0.413, | |
| "step": 147500 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "learning_rate": 1.3e-06, | |
| "loss": 0.4213, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 25.24, | |
| "learning_rate": 1.2875000000000002e-06, | |
| "loss": 0.4211, | |
| "step": 148500 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "learning_rate": 1.275e-06, | |
| "loss": 0.4288, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 25.41, | |
| "learning_rate": 1.2625000000000002e-06, | |
| "loss": 0.4256, | |
| "step": 149500 | |
| }, | |
| { | |
| "epoch": 25.5, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.4261, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 25.5, | |
| "eval_loss": 0.6023589372634888, | |
| "eval_runtime": 42.7696, | |
| "eval_samples_per_second": 274.447, | |
| "eval_steps_per_second": 34.323, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 25.58, | |
| "learning_rate": 1.2375e-06, | |
| "loss": 0.4193, | |
| "step": 150500 | |
| }, | |
| { | |
| "epoch": 25.67, | |
| "learning_rate": 1.2250000000000001e-06, | |
| "loss": 0.4186, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 25.75, | |
| "learning_rate": 1.2125e-06, | |
| "loss": 0.4154, | |
| "step": 151500 | |
| }, | |
| { | |
| "epoch": 25.84, | |
| "learning_rate": 1.2000000000000002e-06, | |
| "loss": 0.4238, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 25.92, | |
| "learning_rate": 1.1875e-06, | |
| "loss": 0.4165, | |
| "step": 152500 | |
| }, | |
| { | |
| "epoch": 26.01, | |
| "learning_rate": 1.175e-06, | |
| "loss": 0.4165, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 26.09, | |
| "learning_rate": 1.1625e-06, | |
| "loss": 0.4169, | |
| "step": 153500 | |
| }, | |
| { | |
| "epoch": 26.18, | |
| "learning_rate": 1.1500000000000002e-06, | |
| "loss": 0.4116, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "learning_rate": 1.1375000000000001e-06, | |
| "loss": 0.4138, | |
| "step": 154500 | |
| }, | |
| { | |
| "epoch": 26.35, | |
| "learning_rate": 1.125e-06, | |
| "loss": 0.4192, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 26.35, | |
| "eval_loss": 0.6006730794906616, | |
| "eval_runtime": 27.6694, | |
| "eval_samples_per_second": 424.223, | |
| "eval_steps_per_second": 53.055, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 26.43, | |
| "learning_rate": 1.1125000000000001e-06, | |
| "loss": 0.4216, | |
| "step": 155500 | |
| }, | |
| { | |
| "epoch": 26.52, | |
| "learning_rate": 1.1e-06, | |
| "loss": 0.4186, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 26.6, | |
| "learning_rate": 1.0875000000000002e-06, | |
| "loss": 0.4148, | |
| "step": 156500 | |
| }, | |
| { | |
| "epoch": 26.69, | |
| "learning_rate": 1.075e-06, | |
| "loss": 0.4186, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 26.77, | |
| "learning_rate": 1.0625e-06, | |
| "loss": 0.4202, | |
| "step": 157500 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "learning_rate": 1.0500000000000001e-06, | |
| "loss": 0.4139, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 26.94, | |
| "learning_rate": 1.0375e-06, | |
| "loss": 0.4196, | |
| "step": 158500 | |
| }, | |
| { | |
| "epoch": 27.03, | |
| "learning_rate": 1.025e-06, | |
| "loss": 0.4191, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 27.11, | |
| "learning_rate": 1.0125e-06, | |
| "loss": 0.4145, | |
| "step": 159500 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.4117, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "eval_loss": 0.599934995174408, | |
| "eval_runtime": 27.3077, | |
| "eval_samples_per_second": 429.842, | |
| "eval_steps_per_second": 53.758, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 27.28, | |
| "learning_rate": 9.875e-07, | |
| "loss": 0.4202, | |
| "step": 160500 | |
| }, | |
| { | |
| "epoch": 27.37, | |
| "learning_rate": 9.750000000000002e-07, | |
| "loss": 0.4166, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 27.45, | |
| "learning_rate": 9.625e-07, | |
| "loss": 0.4119, | |
| "step": 161500 | |
| }, | |
| { | |
| "epoch": 27.54, | |
| "learning_rate": 9.500000000000001e-07, | |
| "loss": 0.4109, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 27.62, | |
| "learning_rate": 9.375000000000001e-07, | |
| "loss": 0.4149, | |
| "step": 162500 | |
| }, | |
| { | |
| "epoch": 27.71, | |
| "learning_rate": 9.25e-07, | |
| "loss": 0.412, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 27.79, | |
| "learning_rate": 9.125e-07, | |
| "loss": 0.4145, | |
| "step": 163500 | |
| }, | |
| { | |
| "epoch": 27.88, | |
| "learning_rate": 9.000000000000001e-07, | |
| "loss": 0.4175, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 27.96, | |
| "learning_rate": 8.875000000000001e-07, | |
| "loss": 0.4112, | |
| "step": 164500 | |
| }, | |
| { | |
| "epoch": 28.05, | |
| "learning_rate": 8.75e-07, | |
| "loss": 0.4087, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 28.05, | |
| "eval_loss": 0.5984655618667603, | |
| "eval_runtime": 27.6329, | |
| "eval_samples_per_second": 424.783, | |
| "eval_steps_per_second": 53.125, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 28.13, | |
| "learning_rate": 8.625e-07, | |
| "loss": 0.4147, | |
| "step": 165500 | |
| }, | |
| { | |
| "epoch": 28.22, | |
| "learning_rate": 8.500000000000001e-07, | |
| "loss": 0.4125, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 28.3, | |
| "learning_rate": 8.375000000000001e-07, | |
| "loss": 0.4117, | |
| "step": 166500 | |
| }, | |
| { | |
| "epoch": 28.39, | |
| "learning_rate": 8.250000000000001e-07, | |
| "loss": 0.4186, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 28.47, | |
| "learning_rate": 8.125000000000001e-07, | |
| "loss": 0.4056, | |
| "step": 167500 | |
| }, | |
| { | |
| "epoch": 28.56, | |
| "learning_rate": 8.000000000000001e-07, | |
| "loss": 0.4177, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 28.64, | |
| "learning_rate": 7.875000000000001e-07, | |
| "loss": 0.414, | |
| "step": 168500 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "learning_rate": 7.750000000000001e-07, | |
| "loss": 0.4147, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 28.81, | |
| "learning_rate": 7.625e-07, | |
| "loss": 0.4134, | |
| "step": 169500 | |
| }, | |
| { | |
| "epoch": 28.9, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.4219, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 28.9, | |
| "eval_loss": 0.5966935157775879, | |
| "eval_runtime": 27.6605, | |
| "eval_samples_per_second": 424.36, | |
| "eval_steps_per_second": 53.072, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 28.98, | |
| "learning_rate": 7.375e-07, | |
| "loss": 0.4117, | |
| "step": 170500 | |
| }, | |
| { | |
| "epoch": 29.07, | |
| "learning_rate": 7.25e-07, | |
| "loss": 0.409, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 29.15, | |
| "learning_rate": 7.125e-07, | |
| "loss": 0.4138, | |
| "step": 171500 | |
| }, | |
| { | |
| "epoch": 29.24, | |
| "learning_rate": 7.000000000000001e-07, | |
| "loss": 0.4104, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 29.32, | |
| "learning_rate": 6.875000000000001e-07, | |
| "loss": 0.4015, | |
| "step": 172500 | |
| }, | |
| { | |
| "epoch": 29.41, | |
| "learning_rate": 6.750000000000001e-07, | |
| "loss": 0.4104, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 29.49, | |
| "learning_rate": 6.625000000000001e-07, | |
| "loss": 0.4053, | |
| "step": 173500 | |
| }, | |
| { | |
| "epoch": 29.58, | |
| "learning_rate": 6.5e-07, | |
| "loss": 0.4163, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 29.66, | |
| "learning_rate": 6.375e-07, | |
| "loss": 0.4124, | |
| "step": 174500 | |
| }, | |
| { | |
| "epoch": 29.75, | |
| "learning_rate": 6.25e-07, | |
| "loss": 0.411, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 29.75, | |
| "eval_loss": 0.5960233211517334, | |
| "eval_runtime": 27.9317, | |
| "eval_samples_per_second": 420.239, | |
| "eval_steps_per_second": 52.557, | |
| "step": 175000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 200000, | |
| "num_train_epochs": 34, | |
| "save_steps": 5000, | |
| "total_flos": 9.1304017790976e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |