| { | |
| "best_metric": 1.341736078262329, | |
| "best_model_checkpoint": "./dual/flan-t5-base-dual/checkpoint-52010", | |
| "epoch": 10.0, | |
| "global_step": 52010, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.951932320707556e-05, | |
| "loss": 2.0525, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.903864641415113e-05, | |
| "loss": 1.9359, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.855796962122669e-05, | |
| "loss": 1.892, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.807729282830225e-05, | |
| "loss": 1.8455, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.7596616035377816e-05, | |
| "loss": 1.8273, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.7115939242453377e-05, | |
| "loss": 1.8317, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.663526244952894e-05, | |
| "loss": 1.8103, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.61545856566045e-05, | |
| "loss": 1.8032, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.5673908863680064e-05, | |
| "loss": 1.7842, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.5193232070755624e-05, | |
| "loss": 1.7674, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 16.8, | |
| "eval_loss": 1.4587862491607666, | |
| "eval_rouge1": 43.5356, | |
| "eval_rouge2": 25.8338, | |
| "eval_rougeL": 41.1764, | |
| "eval_rougeLsum": 41.816, | |
| "eval_runtime": 629.4032, | |
| "eval_samples_per_second": 16.524, | |
| "eval_steps_per_second": 1.033, | |
| "step": 5201 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.4712555277831184e-05, | |
| "loss": 1.7405, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.423187848490675e-05, | |
| "loss": 1.7135, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 4.375120169198231e-05, | |
| "loss": 1.7161, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.327052489905787e-05, | |
| "loss": 1.7208, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 4.278984810613344e-05, | |
| "loss": 1.6999, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 4.2309171313209e-05, | |
| "loss": 1.694, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.182849452028456e-05, | |
| "loss": 1.6837, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.1347817727360125e-05, | |
| "loss": 1.6892, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.0867140934435685e-05, | |
| "loss": 1.6811, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 4.038646414151125e-05, | |
| "loss": 1.7004, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 16.87192307692308, | |
| "eval_loss": 1.4108598232269287, | |
| "eval_rouge1": 44.1953, | |
| "eval_rouge2": 26.6443, | |
| "eval_rougeL": 41.7387, | |
| "eval_rougeLsum": 42.3745, | |
| "eval_runtime": 674.3077, | |
| "eval_samples_per_second": 15.423, | |
| "eval_steps_per_second": 0.964, | |
| "step": 10402 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 3.990578734858681e-05, | |
| "loss": 1.6572, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.942511055566238e-05, | |
| "loss": 1.6449, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 3.894443376273794e-05, | |
| "loss": 1.6235, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 3.84637569698135e-05, | |
| "loss": 1.6573, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.7983080176889066e-05, | |
| "loss": 1.6262, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 3.7502403383964626e-05, | |
| "loss": 1.6377, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.7021726591040186e-05, | |
| "loss": 1.6407, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.654104979811575e-05, | |
| "loss": 1.6343, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.606037300519131e-05, | |
| "loss": 1.6221, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 3.557969621226687e-05, | |
| "loss": 1.6127, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.509901941934244e-05, | |
| "loss": 1.622, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 16.911923076923078, | |
| "eval_loss": 1.387160301208496, | |
| "eval_rouge1": 44.6617, | |
| "eval_rouge2": 27.2456, | |
| "eval_rougeL": 42.2185, | |
| "eval_rougeLsum": 42.8309, | |
| "eval_runtime": 678.3822, | |
| "eval_samples_per_second": 15.331, | |
| "eval_steps_per_second": 0.958, | |
| "step": 15603 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 3.4618342626418e-05, | |
| "loss": 1.5886, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 3.413766583349356e-05, | |
| "loss": 1.5913, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 3.365698904056912e-05, | |
| "loss": 1.5693, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 3.317631224764469e-05, | |
| "loss": 1.5781, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 3.269563545472025e-05, | |
| "loss": 1.5944, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 3.221495866179581e-05, | |
| "loss": 1.5671, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 3.1734281868871374e-05, | |
| "loss": 1.5979, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 3.1253605075946935e-05, | |
| "loss": 1.6014, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 3.0772928283022495e-05, | |
| "loss": 1.5929, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 3.029225149009806e-05, | |
| "loss": 1.5822, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 16.976153846153846, | |
| "eval_loss": 1.3675929307937622, | |
| "eval_rouge1": 44.7885, | |
| "eval_rouge2": 27.4914, | |
| "eval_rougeL": 42.3527, | |
| "eval_rougeLsum": 42.9959, | |
| "eval_runtime": 672.7849, | |
| "eval_samples_per_second": 15.458, | |
| "eval_steps_per_second": 0.966, | |
| "step": 20804 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 2.981157469717362e-05, | |
| "loss": 1.5658, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 2.9330897904249182e-05, | |
| "loss": 1.5656, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 2.885022111132475e-05, | |
| "loss": 1.5643, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 2.836954431840031e-05, | |
| "loss": 1.5484, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 2.788886752547587e-05, | |
| "loss": 1.5504, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 2.7408190732551436e-05, | |
| "loss": 1.5546, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 2.6927513939626996e-05, | |
| "loss": 1.556, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.71, | |
| "learning_rate": 2.644683714670256e-05, | |
| "loss": 1.5448, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 2.596616035377812e-05, | |
| "loss": 1.5519, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 2.5485483560853686e-05, | |
| "loss": 1.5606, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 2.5004806767929246e-05, | |
| "loss": 1.5541, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 17.028846153846153, | |
| "eval_loss": 1.3574897050857544, | |
| "eval_rouge1": 44.7589, | |
| "eval_rouge2": 27.4697, | |
| "eval_rougeL": 42.3549, | |
| "eval_rougeLsum": 42.9704, | |
| "eval_runtime": 665.8005, | |
| "eval_samples_per_second": 15.62, | |
| "eval_steps_per_second": 0.976, | |
| "step": 26005 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 2.452412997500481e-05, | |
| "loss": 1.5154, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 2.404345318208037e-05, | |
| "loss": 1.5163, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 2.3562776389155933e-05, | |
| "loss": 1.5176, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 2.3082099596231497e-05, | |
| "loss": 1.5293, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.2601422803307057e-05, | |
| "loss": 1.5237, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 2.212074601038262e-05, | |
| "loss": 1.5422, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 2.164006921745818e-05, | |
| "loss": 1.5309, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 2.1159392424533744e-05, | |
| "loss": 1.5296, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.0678715631609308e-05, | |
| "loss": 1.5137, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 2.0198038838684868e-05, | |
| "loss": 1.5116, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gen_len": 16.991923076923076, | |
| "eval_loss": 1.3511042594909668, | |
| "eval_rouge1": 45.0017, | |
| "eval_rouge2": 27.6906, | |
| "eval_rougeL": 42.5834, | |
| "eval_rougeLsum": 43.2073, | |
| "eval_runtime": 658.0426, | |
| "eval_samples_per_second": 15.804, | |
| "eval_steps_per_second": 0.988, | |
| "step": 31206 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 1.971736204576043e-05, | |
| "loss": 1.5018, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "learning_rate": 1.9236685252835995e-05, | |
| "loss": 1.5037, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 1.8756008459911555e-05, | |
| "loss": 1.4991, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 1.8275331666987118e-05, | |
| "loss": 1.4977, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 1.779465487406268e-05, | |
| "loss": 1.5024, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 1.7313978081138242e-05, | |
| "loss": 1.5043, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 1.6833301288213805e-05, | |
| "loss": 1.506, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 1.635262449528937e-05, | |
| "loss": 1.497, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 1.5871947702364932e-05, | |
| "loss": 1.5132, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 1.5391270909440492e-05, | |
| "loss": 1.5079, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gen_len": 16.982019230769232, | |
| "eval_loss": 1.347075343132019, | |
| "eval_rouge1": 44.9759, | |
| "eval_rouge2": 27.7179, | |
| "eval_rougeL": 42.5719, | |
| "eval_rougeLsum": 43.1803, | |
| "eval_runtime": 667.8543, | |
| "eval_samples_per_second": 15.572, | |
| "eval_steps_per_second": 0.973, | |
| "step": 36407 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 1.4910594116516054e-05, | |
| "loss": 1.5017, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 1.4429917323591618e-05, | |
| "loss": 1.4946, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 7.21, | |
| "learning_rate": 1.394924053066718e-05, | |
| "loss": 1.4941, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.3468563737742743e-05, | |
| "loss": 1.5029, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "learning_rate": 1.2987886944818307e-05, | |
| "loss": 1.4855, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 1.2507210151893867e-05, | |
| "loss": 1.4726, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 1.202653335896943e-05, | |
| "loss": 1.4687, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 1.1545856566044992e-05, | |
| "loss": 1.4915, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 1.1065179773120554e-05, | |
| "loss": 1.4793, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 1.0584502980196116e-05, | |
| "loss": 1.4818, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 1.0103826187271679e-05, | |
| "loss": 1.4771, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_gen_len": 16.986923076923077, | |
| "eval_loss": 1.3443900346755981, | |
| "eval_rouge1": 45.2057, | |
| "eval_rouge2": 27.9779, | |
| "eval_rougeL": 42.7648, | |
| "eval_rougeLsum": 43.3885, | |
| "eval_runtime": 559.194, | |
| "eval_samples_per_second": 18.598, | |
| "eval_steps_per_second": 1.162, | |
| "step": 41608 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 9.623149394347242e-06, | |
| "loss": 1.4658, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 9.142472601422804e-06, | |
| "loss": 1.469, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 8.661795808498366e-06, | |
| "loss": 1.4966, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "learning_rate": 8.181119015573928e-06, | |
| "loss": 1.4691, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "learning_rate": 7.700442222649491e-06, | |
| "loss": 1.4767, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 7.219765429725053e-06, | |
| "loss": 1.4875, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 6.739088636800615e-06, | |
| "loss": 1.473, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 6.258411843876178e-06, | |
| "loss": 1.4826, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 5.77773505095174e-06, | |
| "loss": 1.4683, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 5.297058258027303e-06, | |
| "loss": 1.4691, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_gen_len": 16.951634615384616, | |
| "eval_loss": 1.3431836366653442, | |
| "eval_rouge1": 45.197, | |
| "eval_rouge2": 27.8923, | |
| "eval_rougeL": 42.7387, | |
| "eval_rougeLsum": 43.3577, | |
| "eval_runtime": 562.3582, | |
| "eval_samples_per_second": 18.494, | |
| "eval_steps_per_second": 1.156, | |
| "step": 46809 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 4.816381465102865e-06, | |
| "loss": 1.4663, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 4.335704672178427e-06, | |
| "loss": 1.4658, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "learning_rate": 3.85502787925399e-06, | |
| "loss": 1.4679, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 3.3743510863295526e-06, | |
| "loss": 1.4573, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 9.42, | |
| "learning_rate": 2.8936742934051144e-06, | |
| "loss": 1.465, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 2.412997500480677e-06, | |
| "loss": 1.4582, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 9.61, | |
| "learning_rate": 1.9323207075562393e-06, | |
| "loss": 1.4736, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 1.4516439146318017e-06, | |
| "loss": 1.4768, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 9.70967121707364e-07, | |
| "loss": 1.4634, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "learning_rate": 4.902903287829264e-07, | |
| "loss": 1.4832, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 9.613535858488752e-09, | |
| "loss": 1.4719, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gen_len": 16.988557692307694, | |
| "eval_loss": 1.341736078262329, | |
| "eval_rouge1": 45.2143, | |
| "eval_rouge2": 27.9673, | |
| "eval_rougeL": 42.7712, | |
| "eval_rougeLsum": 43.3892, | |
| "eval_runtime": 603.039, | |
| "eval_samples_per_second": 17.246, | |
| "eval_steps_per_second": 1.078, | |
| "step": 52010 | |
| } | |
| ], | |
| "max_steps": 52010, | |
| "num_train_epochs": 10, | |
| "total_flos": 5.697455075308339e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |