| { |
| "best_metric": 0.8833928108215332, |
| "best_model_checkpoint": "/gscratch/xlab/hallisky/rewriting/src/models/nontoxic/bart-base_2.5e-06_0_48_jigsaw_randmask/checkpoint-95000", |
| "epoch": 4.742033383915023, |
| "global_step": 100000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.4875000000000003e-06, |
| "loss": 0.9804, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.475e-06, |
| "loss": 0.9795, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 2.4625e-06, |
| "loss": 0.98, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.9784, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2.4375e-06, |
| "loss": 0.973, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 2.425e-06, |
| "loss": 0.9785, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.4125e-06, |
| "loss": 0.9749, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.9747, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 2.3875e-06, |
| "loss": 0.9733, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 2.375e-06, |
| "loss": 0.97, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.24, |
| "eval_loss": 0.8958264589309692, |
| "eval_runtime": 1541.2819, |
| "eval_samples_per_second": 163.861, |
| "eval_steps_per_second": 1.707, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 2.3625000000000003e-06, |
| "loss": 0.9801, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 2.35e-06, |
| "loss": 0.9836, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2.3375000000000005e-06, |
| "loss": 0.9813, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 2.325e-06, |
| "loss": 0.9834, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 2.3125000000000003e-06, |
| "loss": 0.9814, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.9809, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 2.2875e-06, |
| "loss": 0.9777, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 2.2750000000000002e-06, |
| "loss": 0.9786, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 2.2625000000000004e-06, |
| "loss": 0.979, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 2.25e-06, |
| "loss": 0.9776, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_loss": 0.8941081762313843, |
| "eval_runtime": 1537.7822, |
| "eval_samples_per_second": 164.234, |
| "eval_steps_per_second": 1.711, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 2.2375e-06, |
| "loss": 0.9787, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 2.2250000000000003e-06, |
| "loss": 0.9791, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 2.2125e-06, |
| "loss": 0.9793, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 2.2e-06, |
| "loss": 0.9782, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 2.1875000000000002e-06, |
| "loss": 0.9753, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 2.1750000000000004e-06, |
| "loss": 0.98, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 2.1625e-06, |
| "loss": 0.9804, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 2.15e-06, |
| "loss": 0.9785, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 2.1375000000000003e-06, |
| "loss": 0.9771, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 2.125e-06, |
| "loss": 0.9746, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.71, |
| "eval_loss": 0.8944218158721924, |
| "eval_runtime": 1538.3111, |
| "eval_samples_per_second": 164.177, |
| "eval_steps_per_second": 1.71, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 2.1125e-06, |
| "loss": 0.9802, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.9711, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.0875e-06, |
| "loss": 0.9759, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 2.075e-06, |
| "loss": 0.9761, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 2.0625e-06, |
| "loss": 0.9745, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 2.05e-06, |
| "loss": 0.9752, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 2.0375e-06, |
| "loss": 0.9787, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 2.025e-06, |
| "loss": 0.9777, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 2.0125000000000002e-06, |
| "loss": 0.975, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.9794, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.95, |
| "eval_loss": 0.8913277983665466, |
| "eval_runtime": 1541.5464, |
| "eval_samples_per_second": 163.833, |
| "eval_steps_per_second": 1.707, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.9875000000000005e-06, |
| "loss": 0.973, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.975e-06, |
| "loss": 0.9733, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 1.9625000000000003e-06, |
| "loss": 0.974, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.9747, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 1.9375e-06, |
| "loss": 0.976, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 1.925e-06, |
| "loss": 0.9723, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 1.9125000000000003e-06, |
| "loss": 0.971, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.9753, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 1.8875000000000001e-06, |
| "loss": 0.972, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 1.8750000000000003e-06, |
| "loss": 0.9762, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_loss": 0.8904610872268677, |
| "eval_runtime": 1541.2728, |
| "eval_samples_per_second": 163.862, |
| "eval_steps_per_second": 1.707, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 1.8625000000000002e-06, |
| "loss": 0.9687, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 1.85e-06, |
| "loss": 0.9702, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 1.8375000000000002e-06, |
| "loss": 0.9728, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 1.825e-06, |
| "loss": 0.9764, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 1.8125e-06, |
| "loss": 0.9696, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.9704, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 1.7875e-06, |
| "loss": 0.9706, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 1.7750000000000002e-06, |
| "loss": 0.9715, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 1.7625e-06, |
| "loss": 0.9721, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 1.75e-06, |
| "loss": 0.9735, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.42, |
| "eval_loss": 0.8899921178817749, |
| "eval_runtime": 1541.1232, |
| "eval_samples_per_second": 163.878, |
| "eval_steps_per_second": 1.707, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 1.7375e-06, |
| "loss": 0.9677, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 1.725e-06, |
| "loss": 0.9704, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 1.7125000000000003e-06, |
| "loss": 0.9744, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.9711, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 1.6875000000000001e-06, |
| "loss": 0.9695, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 1.6750000000000003e-06, |
| "loss": 0.9678, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 1.6625000000000002e-06, |
| "loss": 0.9701, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 0.9668, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 1.6375000000000002e-06, |
| "loss": 0.972, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 1.6250000000000001e-06, |
| "loss": 0.9681, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_loss": 0.8896079063415527, |
| "eval_runtime": 1541.3892, |
| "eval_samples_per_second": 163.85, |
| "eval_steps_per_second": 1.707, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 1.6125000000000002e-06, |
| "loss": 0.9705, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.9696, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 1.5875e-06, |
| "loss": 0.9714, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 1.5750000000000002e-06, |
| "loss": 0.9658, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 1.5625e-06, |
| "loss": 0.9692, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 0.9726, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 1.5375e-06, |
| "loss": 0.9695, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 1.525e-06, |
| "loss": 0.9677, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.5125000000000001e-06, |
| "loss": 0.9694, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 1.5e-06, |
| "loss": 0.9665, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.9, |
| "eval_loss": 0.8887319564819336, |
| "eval_runtime": 1541.0388, |
| "eval_samples_per_second": 163.887, |
| "eval_steps_per_second": 1.707, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.4875000000000002e-06, |
| "loss": 0.9686, |
| "step": 40500 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.475e-06, |
| "loss": 0.9721, |
| "step": 41000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.4625e-06, |
| "loss": 0.9696, |
| "step": 41500 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.45e-06, |
| "loss": 0.9684, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.4375e-06, |
| "loss": 0.969, |
| "step": 42500 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.425e-06, |
| "loss": 0.9688, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.4125e-06, |
| "loss": 0.9655, |
| "step": 43500 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.9683, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.3875000000000003e-06, |
| "loss": 0.9692, |
| "step": 44500 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.3750000000000002e-06, |
| "loss": 0.9683, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.13, |
| "eval_loss": 0.8875691294670105, |
| "eval_runtime": 1540.806, |
| "eval_samples_per_second": 163.912, |
| "eval_steps_per_second": 1.708, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.3625000000000003e-06, |
| "loss": 0.9683, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 0.965, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3375000000000001e-06, |
| "loss": 0.9647, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.3250000000000002e-06, |
| "loss": 0.9674, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.3125000000000001e-06, |
| "loss": 0.9666, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.3e-06, |
| "loss": 0.965, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.2875000000000002e-06, |
| "loss": 0.9685, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.275e-06, |
| "loss": 0.9702, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.2625000000000002e-06, |
| "loss": 0.9664, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.25e-06, |
| "loss": 0.964, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_loss": 0.8874109387397766, |
| "eval_runtime": 1540.0854, |
| "eval_samples_per_second": 163.988, |
| "eval_steps_per_second": 1.708, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.2375e-06, |
| "loss": 0.9699, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.2250000000000001e-06, |
| "loss": 0.9707, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.2125e-06, |
| "loss": 0.9689, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 0.9702, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.1875e-06, |
| "loss": 0.9672, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 1.175e-06, |
| "loss": 0.9636, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.1625e-06, |
| "loss": 0.967, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 0.9655, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 1.1375000000000001e-06, |
| "loss": 0.9647, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 1.125e-06, |
| "loss": 0.9658, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_loss": 0.8865290284156799, |
| "eval_runtime": 1540.5867, |
| "eval_samples_per_second": 163.935, |
| "eval_steps_per_second": 1.708, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 1.1125000000000001e-06, |
| "loss": 0.9661, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 1.1e-06, |
| "loss": 0.9662, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 1.0875000000000002e-06, |
| "loss": 0.9652, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 1.075e-06, |
| "loss": 0.9652, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 1.0625e-06, |
| "loss": 0.9644, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 0.967, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 1.0375e-06, |
| "loss": 0.9659, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 1.025e-06, |
| "loss": 0.9665, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 1.0125e-06, |
| "loss": 0.967, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.9672, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.85, |
| "eval_loss": 0.8857579827308655, |
| "eval_runtime": 1540.558, |
| "eval_samples_per_second": 163.938, |
| "eval_steps_per_second": 1.708, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 9.875e-07, |
| "loss": 0.9652, |
| "step": 60500 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 9.750000000000002e-07, |
| "loss": 0.9662, |
| "step": 61000 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 9.625e-07, |
| "loss": 0.9679, |
| "step": 61500 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 0.961, |
| "step": 62000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 9.375000000000001e-07, |
| "loss": 0.9635, |
| "step": 62500 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 9.25e-07, |
| "loss": 0.9663, |
| "step": 63000 |
| }, |
| { |
| "epoch": 3.01, |
| "learning_rate": 9.125e-07, |
| "loss": 0.9643, |
| "step": 63500 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 0.964, |
| "step": 64000 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 8.875000000000001e-07, |
| "loss": 0.9653, |
| "step": 64500 |
| }, |
| { |
| "epoch": 3.08, |
| "learning_rate": 8.75e-07, |
| "loss": 0.9612, |
| "step": 65000 |
| }, |
| { |
| "epoch": 3.08, |
| "eval_loss": 0.8849959969520569, |
| "eval_runtime": 1533.6138, |
| "eval_samples_per_second": 164.68, |
| "eval_steps_per_second": 1.716, |
| "step": 65000 |
| }, |
| { |
| "epoch": 3.11, |
| "learning_rate": 8.625e-07, |
| "loss": 0.9634, |
| "step": 65500 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 0.9611, |
| "step": 66000 |
| }, |
| { |
| "epoch": 3.15, |
| "learning_rate": 8.375000000000001e-07, |
| "loss": 0.9646, |
| "step": 66500 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 8.250000000000001e-07, |
| "loss": 0.965, |
| "step": 67000 |
| }, |
| { |
| "epoch": 3.2, |
| "learning_rate": 8.125000000000001e-07, |
| "loss": 0.9639, |
| "step": 67500 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 0.9649, |
| "step": 68000 |
| }, |
| { |
| "epoch": 3.25, |
| "learning_rate": 7.875000000000001e-07, |
| "loss": 0.9636, |
| "step": 68500 |
| }, |
| { |
| "epoch": 3.27, |
| "learning_rate": 7.750000000000001e-07, |
| "loss": 0.9657, |
| "step": 69000 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 7.625e-07, |
| "loss": 0.9696, |
| "step": 69500 |
| }, |
| { |
| "epoch": 3.32, |
| "learning_rate": 7.5e-07, |
| "loss": 0.9635, |
| "step": 70000 |
| }, |
| { |
| "epoch": 3.32, |
| "eval_loss": 0.8842886686325073, |
| "eval_runtime": 1540.0999, |
| "eval_samples_per_second": 163.987, |
| "eval_steps_per_second": 1.708, |
| "step": 70000 |
| }, |
| { |
| "epoch": 3.34, |
| "learning_rate": 7.375e-07, |
| "loss": 0.9658, |
| "step": 70500 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 7.25e-07, |
| "loss": 0.9666, |
| "step": 71000 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 7.125e-07, |
| "loss": 0.9625, |
| "step": 71500 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 0.964, |
| "step": 72000 |
| }, |
| { |
| "epoch": 3.44, |
| "learning_rate": 6.875000000000001e-07, |
| "loss": 0.9612, |
| "step": 72500 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 6.750000000000001e-07, |
| "loss": 0.9647, |
| "step": 73000 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 6.625000000000001e-07, |
| "loss": 0.9677, |
| "step": 73500 |
| }, |
| { |
| "epoch": 3.51, |
| "learning_rate": 6.5e-07, |
| "loss": 0.9636, |
| "step": 74000 |
| }, |
| { |
| "epoch": 3.53, |
| "learning_rate": 6.375e-07, |
| "loss": 0.9667, |
| "step": 74500 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 6.25e-07, |
| "loss": 0.9648, |
| "step": 75000 |
| }, |
| { |
| "epoch": 3.56, |
| "eval_loss": 0.8849769234657288, |
| "eval_runtime": 1539.7241, |
| "eval_samples_per_second": 164.027, |
| "eval_steps_per_second": 1.709, |
| "step": 75000 |
| }, |
| { |
| "epoch": 3.58, |
| "learning_rate": 6.125000000000001e-07, |
| "loss": 0.9631, |
| "step": 75500 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 0.968, |
| "step": 76000 |
| }, |
| { |
| "epoch": 3.63, |
| "learning_rate": 5.875e-07, |
| "loss": 0.9626, |
| "step": 76500 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 5.750000000000001e-07, |
| "loss": 0.9656, |
| "step": 77000 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 5.625e-07, |
| "loss": 0.964, |
| "step": 77500 |
| }, |
| { |
| "epoch": 3.7, |
| "learning_rate": 5.5e-07, |
| "loss": 0.9668, |
| "step": 78000 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 5.375e-07, |
| "loss": 0.96, |
| "step": 78500 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 5.250000000000001e-07, |
| "loss": 0.9662, |
| "step": 79000 |
| }, |
| { |
| "epoch": 3.77, |
| "learning_rate": 5.125e-07, |
| "loss": 0.9636, |
| "step": 79500 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 0.9614, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.79, |
| "eval_loss": 0.8838475942611694, |
| "eval_runtime": 1537.6849, |
| "eval_samples_per_second": 164.244, |
| "eval_steps_per_second": 1.711, |
| "step": 80000 |
| }, |
| { |
| "epoch": 3.82, |
| "learning_rate": 4.875000000000001e-07, |
| "loss": 0.9617, |
| "step": 80500 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 4.7500000000000006e-07, |
| "loss": 0.9607, |
| "step": 81000 |
| }, |
| { |
| "epoch": 3.86, |
| "learning_rate": 4.625e-07, |
| "loss": 0.9638, |
| "step": 81500 |
| }, |
| { |
| "epoch": 3.89, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 0.9628, |
| "step": 82000 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 4.375e-07, |
| "loss": 0.9653, |
| "step": 82500 |
| }, |
| { |
| "epoch": 3.94, |
| "learning_rate": 4.2500000000000006e-07, |
| "loss": 0.9629, |
| "step": 83000 |
| }, |
| { |
| "epoch": 3.96, |
| "learning_rate": 4.125000000000001e-07, |
| "loss": 0.9628, |
| "step": 83500 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 0.9671, |
| "step": 84000 |
| }, |
| { |
| "epoch": 4.01, |
| "learning_rate": 3.8750000000000005e-07, |
| "loss": 0.9621, |
| "step": 84500 |
| }, |
| { |
| "epoch": 4.03, |
| "learning_rate": 3.75e-07, |
| "loss": 0.9647, |
| "step": 85000 |
| }, |
| { |
| "epoch": 4.03, |
| "eval_loss": 0.8842937350273132, |
| "eval_runtime": 1538.1328, |
| "eval_samples_per_second": 164.196, |
| "eval_steps_per_second": 1.711, |
| "step": 85000 |
| }, |
| { |
| "epoch": 4.05, |
| "learning_rate": 3.625e-07, |
| "loss": 0.9611, |
| "step": 85500 |
| }, |
| { |
| "epoch": 4.08, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 0.9651, |
| "step": 86000 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 3.3750000000000005e-07, |
| "loss": 0.9619, |
| "step": 86500 |
| }, |
| { |
| "epoch": 4.13, |
| "learning_rate": 3.25e-07, |
| "loss": 0.9617, |
| "step": 87000 |
| }, |
| { |
| "epoch": 4.15, |
| "learning_rate": 3.125e-07, |
| "loss": 0.9609, |
| "step": 87500 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 0.9646, |
| "step": 88000 |
| }, |
| { |
| "epoch": 4.2, |
| "learning_rate": 2.8750000000000005e-07, |
| "loss": 0.9603, |
| "step": 88500 |
| }, |
| { |
| "epoch": 4.22, |
| "learning_rate": 2.75e-07, |
| "loss": 0.9637, |
| "step": 89000 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 2.6250000000000003e-07, |
| "loss": 0.9626, |
| "step": 89500 |
| }, |
| { |
| "epoch": 4.27, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 0.9612, |
| "step": 90000 |
| }, |
| { |
| "epoch": 4.27, |
| "eval_loss": 0.8842394351959229, |
| "eval_runtime": 1537.8664, |
| "eval_samples_per_second": 164.225, |
| "eval_steps_per_second": 1.711, |
| "step": 90000 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 2.3750000000000003e-07, |
| "loss": 0.9641, |
| "step": 90500 |
| }, |
| { |
| "epoch": 4.32, |
| "learning_rate": 2.2500000000000002e-07, |
| "loss": 0.9657, |
| "step": 91000 |
| }, |
| { |
| "epoch": 4.34, |
| "learning_rate": 2.1250000000000003e-07, |
| "loss": 0.9643, |
| "step": 91500 |
| }, |
| { |
| "epoch": 4.36, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 0.9611, |
| "step": 92000 |
| }, |
| { |
| "epoch": 4.39, |
| "learning_rate": 1.875e-07, |
| "loss": 0.9652, |
| "step": 92500 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 1.7500000000000002e-07, |
| "loss": 0.9624, |
| "step": 93000 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 1.625e-07, |
| "loss": 0.9645, |
| "step": 93500 |
| }, |
| { |
| "epoch": 4.46, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 0.9679, |
| "step": 94000 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 1.375e-07, |
| "loss": 0.9616, |
| "step": 94500 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 1.2500000000000002e-07, |
| "loss": 0.9654, |
| "step": 95000 |
| }, |
| { |
| "epoch": 4.5, |
| "eval_loss": 0.8833928108215332, |
| "eval_runtime": 1537.0843, |
| "eval_samples_per_second": 164.308, |
| "eval_steps_per_second": 1.712, |
| "step": 95000 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 1.1250000000000001e-07, |
| "loss": 0.9615, |
| "step": 95500 |
| }, |
| { |
| "epoch": 4.55, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 0.966, |
| "step": 96000 |
| }, |
| { |
| "epoch": 4.58, |
| "learning_rate": 8.750000000000001e-08, |
| "loss": 0.9694, |
| "step": 96500 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 7.500000000000001e-08, |
| "loss": 0.9639, |
| "step": 97000 |
| }, |
| { |
| "epoch": 4.62, |
| "learning_rate": 6.250000000000001e-08, |
| "loss": 0.9604, |
| "step": 97500 |
| }, |
| { |
| "epoch": 4.65, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 0.9574, |
| "step": 98000 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 3.7500000000000005e-08, |
| "loss": 0.9623, |
| "step": 98500 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 2.5000000000000002e-08, |
| "loss": 0.9616, |
| "step": 99000 |
| }, |
| { |
| "epoch": 4.72, |
| "learning_rate": 1.2500000000000001e-08, |
| "loss": 0.9659, |
| "step": 99500 |
| }, |
| { |
| "epoch": 4.74, |
| "learning_rate": 0.0, |
| "loss": 0.9678, |
| "step": 100000 |
| }, |
| { |
| "epoch": 4.74, |
| "eval_loss": 0.884353518486023, |
| "eval_runtime": 1538.109, |
| "eval_samples_per_second": 164.199, |
| "eval_steps_per_second": 1.711, |
| "step": 100000 |
| } |
| ], |
| "max_steps": 100000, |
| "num_train_epochs": 5, |
| "total_flos": 5.71620307894272e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|