| { |
| "best_metric": 0.8446398659966499, |
| "best_model_checkpoint": "../outputs/selector-flant5-large/checkpoint-2000", |
| "epoch": 9.578544061302683, |
| "eval_steps": 50, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.04, |
| "learning_rate": 4.980842911877395e-05, |
| "loss": 1.778, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.96168582375479e-05, |
| "loss": 0.7701, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.9425287356321845e-05, |
| "loss": 0.7086, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.9233716475095786e-05, |
| "loss": 0.6677, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.904214559386973e-05, |
| "loss": 0.6536, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_diversity": 0.9971524288107202, |
| "eval_diversity_acc_score": 0.4605775246977489, |
| "eval_gen_len": 7.812395309882747, |
| "eval_loss": 0.5738938450813293, |
| "eval_rouge1": 57.8899, |
| "eval_rouge2": 12.6047, |
| "eval_rougeL": 51.8718, |
| "eval_rougeLsum": 51.8989, |
| "eval_runtime": 259.011, |
| "eval_samples_per_second": 9.22, |
| "eval_steps_per_second": 0.29, |
| "eval_top1_acc": 0.2914572864321608, |
| "eval_top5_acc": 0.461892797319933, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 4.885057471264368e-05, |
| "loss": 0.6387, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.865900383141763e-05, |
| "loss": 0.6069, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.846743295019157e-05, |
| "loss": 0.5751, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.827586206896552e-05, |
| "loss": 0.5774, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.8084291187739464e-05, |
| "loss": 0.56, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.38, |
| "eval_diversity": 0.9974036850921274, |
| "eval_diversity_acc_score": 0.563023520730397, |
| "eval_gen_len": 8.209045226130653, |
| "eval_loss": 0.5000574588775635, |
| "eval_rouge1": 63.0013, |
| "eval_rouge2": 15.4523, |
| "eval_rougeL": 54.5133, |
| "eval_rougeLsum": 54.5485, |
| "eval_runtime": 258.5561, |
| "eval_samples_per_second": 9.236, |
| "eval_steps_per_second": 0.29, |
| "eval_top1_acc": 0.3655778894472362, |
| "eval_top5_acc": 0.5644891122278057, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.789272030651341e-05, |
| "loss": 0.5597, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.770114942528736e-05, |
| "loss": 0.5269, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.7509578544061307e-05, |
| "loss": 0.5391, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.7318007662835254e-05, |
| "loss": 0.5269, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.7126436781609195e-05, |
| "loss": 0.522, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_diversity": 0.9991624790619765, |
| "eval_diversity_acc_score": 0.6112966423406816, |
| "eval_gen_len": 9.055862646566164, |
| "eval_loss": 0.4494677782058716, |
| "eval_rouge1": 65.722, |
| "eval_rouge2": 18.757, |
| "eval_rougeL": 55.0562, |
| "eval_rougeLsum": 55.0696, |
| "eval_runtime": 269.4715, |
| "eval_samples_per_second": 8.862, |
| "eval_steps_per_second": 0.278, |
| "eval_top1_acc": 0.440536013400335, |
| "eval_top5_acc": 0.6118090452261307, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.693486590038315e-05, |
| "loss": 0.4861, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.674329501915709e-05, |
| "loss": 0.4875, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.655172413793104e-05, |
| "loss": 0.4616, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 4.6360153256704985e-05, |
| "loss": 0.4735, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.616858237547893e-05, |
| "loss": 0.4749, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_diversity": 0.9992462311557788, |
| "eval_diversity_acc_score": 0.6958737363534591, |
| "eval_gen_len": 8.375963149078727, |
| "eval_loss": 0.3974936604499817, |
| "eval_rouge1": 70.0759, |
| "eval_rouge2": 21.0497, |
| "eval_rougeL": 57.4729, |
| "eval_rougeLsum": 57.4872, |
| "eval_runtime": 257.1324, |
| "eval_samples_per_second": 9.287, |
| "eval_steps_per_second": 0.292, |
| "eval_top1_acc": 0.5238693467336684, |
| "eval_top5_acc": 0.6963986599664992, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 4.597701149425287e-05, |
| "loss": 0.4621, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 4.578544061302682e-05, |
| "loss": 0.4486, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 4.559386973180077e-05, |
| "loss": 0.4207, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 4.5402298850574716e-05, |
| "loss": 0.4358, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 4.5210727969348656e-05, |
| "loss": 0.4317, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.96, |
| "eval_diversity": 0.9990787269681742, |
| "eval_diversity_acc_score": 0.7442885491107127, |
| "eval_gen_len": 8.662730318257957, |
| "eval_loss": 0.36668309569358826, |
| "eval_rouge1": 72.4745, |
| "eval_rouge2": 22.2432, |
| "eval_rougeL": 58.5635, |
| "eval_rougeLsum": 58.5311, |
| "eval_runtime": 259.7656, |
| "eval_samples_per_second": 9.193, |
| "eval_steps_per_second": 0.289, |
| "eval_top1_acc": 0.5615577889447236, |
| "eval_top5_acc": 0.7449748743718593, |
| "step": 250 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.501915708812261e-05, |
| "loss": 0.3997, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 4.482758620689655e-05, |
| "loss": 0.4008, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 4.46360153256705e-05, |
| "loss": 0.4149, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 4.4444444444444447e-05, |
| "loss": 0.4011, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 4.4252873563218394e-05, |
| "loss": 0.4081, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.15, |
| "eval_diversity": 0.9973199329983251, |
| "eval_diversity_acc_score": 0.7530016077035093, |
| "eval_gen_len": 8.735092127303183, |
| "eval_loss": 0.3589172661304474, |
| "eval_rouge1": 73.4852, |
| "eval_rouge2": 24.0508, |
| "eval_rougeL": 59.7434, |
| "eval_rougeLsum": 59.7093, |
| "eval_runtime": 260.2817, |
| "eval_samples_per_second": 9.175, |
| "eval_steps_per_second": 0.288, |
| "eval_top1_acc": 0.5841708542713567, |
| "eval_top5_acc": 0.7550251256281407, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 4.406130268199234e-05, |
| "loss": 0.4031, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 4.386973180076628e-05, |
| "loss": 0.3817, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 4.367816091954024e-05, |
| "loss": 0.3675, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 4.348659003831418e-05, |
| "loss": 0.3735, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 4.3295019157088125e-05, |
| "loss": 0.3589, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_diversity": 0.9973199329983251, |
| "eval_diversity_acc_score": 0.7513310550519208, |
| "eval_gen_len": 8.730067001675042, |
| "eval_loss": 0.34788036346435547, |
| "eval_rouge1": 74.4994, |
| "eval_rouge2": 24.7767, |
| "eval_rougeL": 60.1356, |
| "eval_rougeLsum": 60.1576, |
| "eval_runtime": 259.0814, |
| "eval_samples_per_second": 9.217, |
| "eval_steps_per_second": 0.289, |
| "eval_top1_acc": 0.6072026800670016, |
| "eval_top5_acc": 0.7533500837520938, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 4.3103448275862066e-05, |
| "loss": 0.363, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 4.291187739463602e-05, |
| "loss": 0.3818, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 4.272030651340996e-05, |
| "loss": 0.3923, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 4.252873563218391e-05, |
| "loss": 0.3658, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 4.2337164750957856e-05, |
| "loss": 0.3577, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.53, |
| "eval_diversity": 0.996398659966499, |
| "eval_diversity_acc_score": 0.774420399035939, |
| "eval_gen_len": 8.874036850921273, |
| "eval_loss": 0.3251946270465851, |
| "eval_rouge1": 75.9238, |
| "eval_rouge2": 25.328, |
| "eval_rougeL": 61.0871, |
| "eval_rougeLsum": 61.0974, |
| "eval_runtime": 261.1155, |
| "eval_samples_per_second": 9.145, |
| "eval_steps_per_second": 0.287, |
| "eval_top1_acc": 0.6210217755443886, |
| "eval_top5_acc": 0.7772194304857621, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 4.21455938697318e-05, |
| "loss": 0.3783, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 4.195402298850575e-05, |
| "loss": 0.3702, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 4.17624521072797e-05, |
| "loss": 0.3672, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 4.1570881226053646e-05, |
| "loss": 0.3559, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 4.1379310344827587e-05, |
| "loss": 0.3505, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.72, |
| "eval_diversity": 0.9944723618090453, |
| "eval_diversity_acc_score": 0.7845837226332669, |
| "eval_gen_len": 8.663400335008376, |
| "eval_loss": 0.3230862319469452, |
| "eval_rouge1": 77.1227, |
| "eval_rouge2": 25.5898, |
| "eval_rougeL": 61.409, |
| "eval_rougeLsum": 61.3954, |
| "eval_runtime": 258.3273, |
| "eval_samples_per_second": 9.244, |
| "eval_steps_per_second": 0.29, |
| "eval_top1_acc": 0.6444723618090452, |
| "eval_top5_acc": 0.7889447236180904, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 4.1187739463601534e-05, |
| "loss": 0.3635, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 4.099616858237548e-05, |
| "loss": 0.3467, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 4.080459770114943e-05, |
| "loss": 0.3584, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 4.061302681992337e-05, |
| "loss": 0.3612, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 4.0421455938697324e-05, |
| "loss": 0.3481, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_diversity": 0.9920435510887772, |
| "eval_diversity_acc_score": 0.7951303839128641, |
| "eval_gen_len": 8.802512562814071, |
| "eval_loss": 0.30857834219932556, |
| "eval_rouge1": 77.5017, |
| "eval_rouge2": 26.6018, |
| "eval_rougeL": 61.8965, |
| "eval_rougeLsum": 61.8749, |
| "eval_runtime": 262.3142, |
| "eval_samples_per_second": 9.104, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.6486599664991625, |
| "eval_top5_acc": 0.8015075376884422, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 4.0229885057471265e-05, |
| "loss": 0.3467, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 4.003831417624521e-05, |
| "loss": 0.3375, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 3.984674329501916e-05, |
| "loss": 0.3157, |
| "step": 530 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 3.965517241379311e-05, |
| "loss": 0.3262, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 3.9463601532567055e-05, |
| "loss": 0.3162, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.11, |
| "eval_diversity": 0.9840871021775545, |
| "eval_diversity_acc_score": 0.7904016172431112, |
| "eval_gen_len": 8.64715242881072, |
| "eval_loss": 0.3194829821586609, |
| "eval_rouge1": 76.978, |
| "eval_rouge2": 27.1217, |
| "eval_rougeL": 61.65, |
| "eval_rougeLsum": 61.6392, |
| "eval_runtime": 260.1985, |
| "eval_samples_per_second": 9.178, |
| "eval_steps_per_second": 0.288, |
| "eval_top1_acc": 0.6507537688442211, |
| "eval_top5_acc": 0.8031825795644891, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 3.9272030651340996e-05, |
| "loss": 0.3136, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 3.908045977011495e-05, |
| "loss": 0.3109, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 0.3214, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 3.869731800766284e-05, |
| "loss": 0.3037, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 3.850574712643678e-05, |
| "loss": 0.3186, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.3, |
| "eval_diversity": 0.9772194304857621, |
| "eval_diversity_acc_score": 0.790205494249584, |
| "eval_gen_len": 8.95427135678392, |
| "eval_loss": 0.3062250316143036, |
| "eval_rouge1": 79.0951, |
| "eval_rouge2": 28.0151, |
| "eval_rougeL": 63.4889, |
| "eval_rougeLsum": 63.5022, |
| "eval_runtime": 264.4315, |
| "eval_samples_per_second": 9.031, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.6654103852596315, |
| "eval_top5_acc": 0.8086264656616415, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 3.831417624521073e-05, |
| "loss": 0.2991, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 3.8122605363984674e-05, |
| "loss": 0.3125, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 3.793103448275862e-05, |
| "loss": 0.3268, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 3.773946360153257e-05, |
| "loss": 0.3072, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 3.7547892720306517e-05, |
| "loss": 0.3036, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_diversity": 0.9886934673366834, |
| "eval_diversity_acc_score": 0.8019678585557604, |
| "eval_gen_len": 8.947319932998324, |
| "eval_loss": 0.29410791397094727, |
| "eval_rouge1": 79.359, |
| "eval_rouge2": 28.0151, |
| "eval_rougeL": 63.4979, |
| "eval_rougeLsum": 63.5317, |
| "eval_runtime": 265.1551, |
| "eval_samples_per_second": 9.006, |
| "eval_steps_per_second": 0.283, |
| "eval_top1_acc": 0.6687604690117253, |
| "eval_top5_acc": 0.8111390284757118, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 3.735632183908046e-05, |
| "loss": 0.303, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 3.716475095785441e-05, |
| "loss": 0.2922, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 3.697318007662835e-05, |
| "loss": 0.3199, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 3.67816091954023e-05, |
| "loss": 0.2995, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 3.659003831417625e-05, |
| "loss": 0.3181, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.68, |
| "eval_diversity": 0.9834170854271357, |
| "eval_diversity_acc_score": 0.800982508859204, |
| "eval_gen_len": 8.873115577889447, |
| "eval_loss": 0.29473522305488586, |
| "eval_rouge1": 80.2598, |
| "eval_rouge2": 28.3396, |
| "eval_rougeL": 63.7071, |
| "eval_rougeLsum": 63.7019, |
| "eval_runtime": 262.2032, |
| "eval_samples_per_second": 9.107, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.681323283082077, |
| "eval_top5_acc": 0.8144891122278057, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 3.6398467432950195e-05, |
| "loss": 0.2876, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 3.620689655172414e-05, |
| "loss": 0.3192, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.601532567049808e-05, |
| "loss": 0.305, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 3.582375478927204e-05, |
| "loss": 0.3119, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 3.563218390804598e-05, |
| "loss": 0.3083, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.87, |
| "eval_diversity": 0.9801507537688444, |
| "eval_diversity_acc_score": 0.809814672188413, |
| "eval_gen_len": 8.773366834170854, |
| "eval_loss": 0.2853524386882782, |
| "eval_rouge1": 80.6082, |
| "eval_rouge2": 29.1806, |
| "eval_rougeL": 64.2514, |
| "eval_rougeLsum": 64.3269, |
| "eval_runtime": 261.3604, |
| "eval_samples_per_second": 9.137, |
| "eval_steps_per_second": 0.287, |
| "eval_top1_acc": 0.6888609715242882, |
| "eval_top5_acc": 0.826214405360134, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 3.5440613026819926e-05, |
| "loss": 0.3051, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 3.5249042145593867e-05, |
| "loss": 0.2947, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 3.505747126436782e-05, |
| "loss": 0.3014, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.03, |
| "learning_rate": 3.486590038314176e-05, |
| "loss": 0.2859, |
| "step": 790 |
| }, |
| { |
| "epoch": 3.07, |
| "learning_rate": 3.467432950191571e-05, |
| "loss": 0.2711, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.07, |
| "eval_diversity": 0.9801507537688442, |
| "eval_diversity_acc_score": 0.7970907721185492, |
| "eval_gen_len": 8.883333333333333, |
| "eval_loss": 0.29576873779296875, |
| "eval_rouge1": 79.1907, |
| "eval_rouge2": 28.2873, |
| "eval_rougeL": 63.1109, |
| "eval_rougeLsum": 63.0771, |
| "eval_runtime": 264.3541, |
| "eval_samples_per_second": 9.033, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.673785594639866, |
| "eval_top5_acc": 0.8132328308207705, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.1, |
| "learning_rate": 3.4482758620689657e-05, |
| "loss": 0.2722, |
| "step": 810 |
| }, |
| { |
| "epoch": 3.14, |
| "learning_rate": 3.4291187739463604e-05, |
| "loss": 0.2732, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.18, |
| "learning_rate": 3.409961685823755e-05, |
| "loss": 0.2813, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.22, |
| "learning_rate": 3.390804597701149e-05, |
| "loss": 0.2695, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 3.371647509578545e-05, |
| "loss": 0.2784, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.26, |
| "eval_diversity": 0.9744556113902848, |
| "eval_diversity_acc_score": 0.8018447556038147, |
| "eval_gen_len": 8.97110552763819, |
| "eval_loss": 0.2884664237499237, |
| "eval_rouge1": 80.383, |
| "eval_rouge2": 29.7334, |
| "eval_rougeL": 64.3104, |
| "eval_rougeLsum": 64.3013, |
| "eval_runtime": 263.9554, |
| "eval_samples_per_second": 9.047, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.6871859296482412, |
| "eval_top5_acc": 0.8228643216080402, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.3, |
| "learning_rate": 3.352490421455939e-05, |
| "loss": 0.2861, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 3.3333333333333335e-05, |
| "loss": 0.2734, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.37, |
| "learning_rate": 3.314176245210728e-05, |
| "loss": 0.2934, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.41, |
| "learning_rate": 3.295019157088123e-05, |
| "loss": 0.2922, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.45, |
| "learning_rate": 3.275862068965517e-05, |
| "loss": 0.2989, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.45, |
| "eval_diversity": 0.9690117252931323, |
| "eval_diversity_acc_score": 0.8002056625955012, |
| "eval_gen_len": 8.771608040201006, |
| "eval_loss": 0.2843467593193054, |
| "eval_rouge1": 80.7629, |
| "eval_rouge2": 29.0655, |
| "eval_rougeL": 64.2694, |
| "eval_rougeLsum": 64.2597, |
| "eval_runtime": 262.4217, |
| "eval_samples_per_second": 9.1, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.6909547738693468, |
| "eval_top5_acc": 0.8257956448911222, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.49, |
| "learning_rate": 3.256704980842912e-05, |
| "loss": 0.2825, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 3.2375478927203066e-05, |
| "loss": 0.2697, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.56, |
| "learning_rate": 3.218390804597701e-05, |
| "loss": 0.2686, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.6, |
| "learning_rate": 3.1992337164750954e-05, |
| "loss": 0.2726, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.64, |
| "learning_rate": 3.180076628352491e-05, |
| "loss": 0.2802, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.64, |
| "eval_diversity": 0.9685929648241206, |
| "eval_diversity_acc_score": 0.798237418247014, |
| "eval_gen_len": 8.875544388609715, |
| "eval_loss": 0.2851749658584595, |
| "eval_rouge1": 80.8877, |
| "eval_rouge2": 29.4312, |
| "eval_rougeL": 64.5336, |
| "eval_rougeLsum": 64.5501, |
| "eval_runtime": 262.1654, |
| "eval_samples_per_second": 9.109, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.6863484087102177, |
| "eval_top5_acc": 0.8241206030150754, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.68, |
| "learning_rate": 3.160919540229885e-05, |
| "loss": 0.2681, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 3.1417624521072797e-05, |
| "loss": 0.2783, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 3.1226053639846744e-05, |
| "loss": 0.2709, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.79, |
| "learning_rate": 3.103448275862069e-05, |
| "loss": 0.2765, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 3.084291187739464e-05, |
| "loss": 0.26, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.83, |
| "eval_diversity": 0.9541038525963149, |
| "eval_diversity_acc_score": 0.7922897569926686, |
| "eval_gen_len": 8.867169179229482, |
| "eval_loss": 0.2813727855682373, |
| "eval_rouge1": 81.6469, |
| "eval_rouge2": 29.7606, |
| "eval_rougeL": 65.0097, |
| "eval_rougeLsum": 65.0255, |
| "eval_runtime": 263.7716, |
| "eval_samples_per_second": 9.053, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.6989112227805695, |
| "eval_top5_acc": 0.8304020100502513, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.87, |
| "learning_rate": 3.065134099616858e-05, |
| "loss": 0.2776, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 3.045977011494253e-05, |
| "loss": 0.2688, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.95, |
| "learning_rate": 3.0268199233716475e-05, |
| "loss": 0.2625, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 3.0076628352490422e-05, |
| "loss": 0.2805, |
| "step": 1040 |
| }, |
| { |
| "epoch": 4.02, |
| "learning_rate": 2.988505747126437e-05, |
| "loss": 0.2624, |
| "step": 1050 |
| }, |
| { |
| "epoch": 4.02, |
| "eval_diversity": 0.9459798994974874, |
| "eval_diversity_acc_score": 0.7803938031867882, |
| "eval_gen_len": 9.172361809045226, |
| "eval_loss": 0.28669536113739014, |
| "eval_rouge1": 81.5126, |
| "eval_rouge2": 29.2881, |
| "eval_rougeL": 64.6202, |
| "eval_rougeLsum": 64.661, |
| "eval_runtime": 269.8422, |
| "eval_samples_per_second": 8.85, |
| "eval_steps_per_second": 0.278, |
| "eval_top1_acc": 0.6922110552763819, |
| "eval_top5_acc": 0.8249581239530989, |
| "step": 1050 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 2.9693486590038317e-05, |
| "loss": 0.2652, |
| "step": 1060 |
| }, |
| { |
| "epoch": 4.1, |
| "learning_rate": 2.950191570881226e-05, |
| "loss": 0.2489, |
| "step": 1070 |
| }, |
| { |
| "epoch": 4.14, |
| "learning_rate": 2.9310344827586206e-05, |
| "loss": 0.2476, |
| "step": 1080 |
| }, |
| { |
| "epoch": 4.18, |
| "learning_rate": 2.9118773946360157e-05, |
| "loss": 0.2497, |
| "step": 1090 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 2.89272030651341e-05, |
| "loss": 0.2416, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.21, |
| "eval_diversity": 0.9379396984924624, |
| "eval_diversity_acc_score": 0.7776886947299312, |
| "eval_gen_len": 8.726298157453936, |
| "eval_loss": 0.28321516513824463, |
| "eval_rouge1": 82.226, |
| "eval_rouge2": 29.256, |
| "eval_rougeL": 65.1371, |
| "eval_rougeLsum": 65.1636, |
| "eval_runtime": 261.9648, |
| "eval_samples_per_second": 9.116, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.7043551088777219, |
| "eval_top5_acc": 0.8291457286432161, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 2.8735632183908045e-05, |
| "loss": 0.2541, |
| "step": 1110 |
| }, |
| { |
| "epoch": 4.29, |
| "learning_rate": 2.8544061302681996e-05, |
| "loss": 0.2628, |
| "step": 1120 |
| }, |
| { |
| "epoch": 4.33, |
| "learning_rate": 2.835249042145594e-05, |
| "loss": 0.2447, |
| "step": 1130 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 2.8160919540229884e-05, |
| "loss": 0.2571, |
| "step": 1140 |
| }, |
| { |
| "epoch": 4.41, |
| "learning_rate": 2.796934865900383e-05, |
| "loss": 0.2654, |
| "step": 1150 |
| }, |
| { |
| "epoch": 4.41, |
| "eval_diversity": 0.9344221105527638, |
| "eval_diversity_acc_score": 0.7759459988384132, |
| "eval_gen_len": 8.95284757118928, |
| "eval_loss": 0.28788745403289795, |
| "eval_rouge1": 81.7831, |
| "eval_rouge2": 28.8072, |
| "eval_rougeL": 64.4422, |
| "eval_rougeLsum": 64.4486, |
| "eval_runtime": 262.7438, |
| "eval_samples_per_second": 9.089, |
| "eval_steps_per_second": 0.285, |
| "eval_top1_acc": 0.6976549413735343, |
| "eval_top5_acc": 0.8304020100502513, |
| "step": 1150 |
| }, |
| { |
| "epoch": 4.44, |
| "learning_rate": 2.777777777777778e-05, |
| "loss": 0.2439, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.48, |
| "learning_rate": 2.7586206896551727e-05, |
| "loss": 0.2462, |
| "step": 1170 |
| }, |
| { |
| "epoch": 4.52, |
| "learning_rate": 2.739463601532567e-05, |
| "loss": 0.2695, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 2.720306513409962e-05, |
| "loss": 0.2608, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.6, |
| "learning_rate": 2.7011494252873566e-05, |
| "loss": 0.2578, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.6, |
| "eval_diversity": 0.9426298157453936, |
| "eval_diversity_acc_score": 0.7827616937282729, |
| "eval_gen_len": 9.063316582914572, |
| "eval_loss": 0.2836114168167114, |
| "eval_rouge1": 81.8936, |
| "eval_rouge2": 29.0194, |
| "eval_rougeL": 64.7299, |
| "eval_rougeLsum": 64.7349, |
| "eval_runtime": 267.0818, |
| "eval_samples_per_second": 8.941, |
| "eval_steps_per_second": 0.281, |
| "eval_top1_acc": 0.6993299832495813, |
| "eval_top5_acc": 0.8304020100502513, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.64, |
| "learning_rate": 2.681992337164751e-05, |
| "loss": 0.2623, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.67, |
| "learning_rate": 2.662835249042146e-05, |
| "loss": 0.2567, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.71, |
| "learning_rate": 2.6436781609195405e-05, |
| "loss": 0.2579, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.75, |
| "learning_rate": 2.624521072796935e-05, |
| "loss": 0.2612, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.79, |
| "learning_rate": 2.6053639846743293e-05, |
| "loss": 0.2762, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.79, |
| "eval_diversity": 0.9530988274706867, |
| "eval_diversity_acc_score": 0.794249022892239, |
| "eval_gen_len": 8.855443886097152, |
| "eval_loss": 0.27563899755477905, |
| "eval_rouge1": 81.9006, |
| "eval_rouge2": 29.6943, |
| "eval_rougeL": 65.0267, |
| "eval_rougeLsum": 64.99, |
| "eval_runtime": 263.2049, |
| "eval_samples_per_second": 9.073, |
| "eval_steps_per_second": 0.285, |
| "eval_top1_acc": 0.7022613065326633, |
| "eval_top5_acc": 0.8333333333333334, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.83, |
| "learning_rate": 2.5862068965517244e-05, |
| "loss": 0.2509, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.87, |
| "learning_rate": 2.5670498084291188e-05, |
| "loss": 0.2418, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.9, |
| "learning_rate": 2.5478927203065132e-05, |
| "loss": 0.2619, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.94, |
| "learning_rate": 2.5287356321839083e-05, |
| "loss": 0.2421, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.98, |
| "learning_rate": 2.5095785440613027e-05, |
| "loss": 0.2552, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.98, |
| "eval_diversity": 0.9605527638190956, |
| "eval_diversity_acc_score": 0.8016673610935752, |
| "eval_gen_len": 8.926716917922947, |
| "eval_loss": 0.2745411694049835, |
| "eval_rouge1": 81.8394, |
| "eval_rouge2": 29.6434, |
| "eval_rougeL": 65.0188, |
| "eval_rougeLsum": 65.0416, |
| "eval_runtime": 262.7573, |
| "eval_samples_per_second": 9.088, |
| "eval_steps_per_second": 0.285, |
| "eval_top1_acc": 0.7001675041876047, |
| "eval_top5_acc": 0.8345896147403685, |
| "step": 1300 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 2.4904214559386975e-05, |
| "loss": 0.2431, |
| "step": 1310 |
| }, |
| { |
| "epoch": 5.06, |
| "learning_rate": 2.4712643678160922e-05, |
| "loss": 0.235, |
| "step": 1320 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 2.4521072796934867e-05, |
| "loss": 0.2456, |
| "step": 1330 |
| }, |
| { |
| "epoch": 5.13, |
| "learning_rate": 2.4329501915708814e-05, |
| "loss": 0.2337, |
| "step": 1340 |
| }, |
| { |
| "epoch": 5.17, |
| "learning_rate": 2.413793103448276e-05, |
| "loss": 0.2343, |
| "step": 1350 |
| }, |
| { |
| "epoch": 5.17, |
| "eval_diversity": 0.9341708542713568, |
| "eval_diversity_acc_score": 0.7835612316187303, |
| "eval_gen_len": 8.740954773869348, |
| "eval_loss": 0.2820148169994354, |
| "eval_rouge1": 82.1941, |
| "eval_rouge2": 29.6343, |
| "eval_rougeL": 64.9266, |
| "eval_rougeLsum": 64.9399, |
| "eval_runtime": 262.223, |
| "eval_samples_per_second": 9.107, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.708961474036851, |
| "eval_top5_acc": 0.8387772194304858, |
| "step": 1350 |
| }, |
| { |
| "epoch": 5.21, |
| "learning_rate": 2.3946360153256706e-05, |
| "loss": 0.2373, |
| "step": 1360 |
| }, |
| { |
| "epoch": 5.25, |
| "learning_rate": 2.3754789272030653e-05, |
| "loss": 0.2306, |
| "step": 1370 |
| }, |
| { |
| "epoch": 5.29, |
| "learning_rate": 2.3563218390804597e-05, |
| "loss": 0.2409, |
| "step": 1380 |
| }, |
| { |
| "epoch": 5.33, |
| "learning_rate": 2.3371647509578545e-05, |
| "loss": 0.2403, |
| "step": 1390 |
| }, |
| { |
| "epoch": 5.36, |
| "learning_rate": 2.3180076628352492e-05, |
| "loss": 0.2412, |
| "step": 1400 |
| }, |
| { |
| "epoch": 5.36, |
| "eval_diversity": 0.9512562814070353, |
| "eval_diversity_acc_score": 0.7966970531047197, |
| "eval_gen_len": 8.790619765494137, |
| "eval_loss": 0.2815363109111786, |
| "eval_rouge1": 81.4294, |
| "eval_rouge2": 29.2504, |
| "eval_rougeL": 64.4476, |
| "eval_rougeLsum": 64.4682, |
| "eval_runtime": 262.6046, |
| "eval_samples_per_second": 9.094, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.6968174204355109, |
| "eval_top5_acc": 0.8375209380234506, |
| "step": 1400 |
| }, |
| { |
| "epoch": 5.4, |
| "learning_rate": 2.2988505747126437e-05, |
| "loss": 0.2387, |
| "step": 1410 |
| }, |
| { |
| "epoch": 5.44, |
| "learning_rate": 2.2796934865900384e-05, |
| "loss": 0.25, |
| "step": 1420 |
| }, |
| { |
| "epoch": 5.48, |
| "learning_rate": 2.2605363984674328e-05, |
| "loss": 0.243, |
| "step": 1430 |
| }, |
| { |
| "epoch": 5.52, |
| "learning_rate": 2.2413793103448276e-05, |
| "loss": 0.2377, |
| "step": 1440 |
| }, |
| { |
| "epoch": 5.56, |
| "learning_rate": 2.2222222222222223e-05, |
| "loss": 0.2286, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.56, |
| "eval_diversity": 0.9507537688442211, |
| "eval_diversity_acc_score": 0.7966743264058989, |
| "eval_gen_len": 8.748324958123954, |
| "eval_loss": 0.2779872715473175, |
| "eval_rouge1": 82.237, |
| "eval_rouge2": 29.1436, |
| "eval_rougeL": 64.9086, |
| "eval_rougeLsum": 64.894, |
| "eval_runtime": 260.7535, |
| "eval_samples_per_second": 9.158, |
| "eval_steps_per_second": 0.288, |
| "eval_top1_acc": 0.7056113902847572, |
| "eval_top5_acc": 0.8379396984924623, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.59, |
| "learning_rate": 2.203065134099617e-05, |
| "loss": 0.2314, |
| "step": 1460 |
| }, |
| { |
| "epoch": 5.63, |
| "learning_rate": 2.183908045977012e-05, |
| "loss": 0.2529, |
| "step": 1470 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 2.1647509578544062e-05, |
| "loss": 0.2422, |
| "step": 1480 |
| }, |
| { |
| "epoch": 5.71, |
| "learning_rate": 2.145593869731801e-05, |
| "loss": 0.2569, |
| "step": 1490 |
| }, |
| { |
| "epoch": 5.75, |
| "learning_rate": 2.1264367816091954e-05, |
| "loss": 0.2448, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.75, |
| "eval_diversity": 0.9510887772194304, |
| "eval_diversity_acc_score": 0.8001412702821757, |
| "eval_gen_len": 8.686515912897823, |
| "eval_loss": 0.27611294388771057, |
| "eval_rouge1": 82.3455, |
| "eval_rouge2": 29.5652, |
| "eval_rougeL": 65.1557, |
| "eval_rougeLsum": 65.1937, |
| "eval_runtime": 260.5733, |
| "eval_samples_per_second": 9.164, |
| "eval_steps_per_second": 0.288, |
| "eval_top1_acc": 0.708961474036851, |
| "eval_top5_acc": 0.8412897822445561, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.79, |
| "learning_rate": 2.10727969348659e-05, |
| "loss": 0.2218, |
| "step": 1510 |
| }, |
| { |
| "epoch": 5.82, |
| "learning_rate": 2.088122605363985e-05, |
| "loss": 0.2489, |
| "step": 1520 |
| }, |
| { |
| "epoch": 5.86, |
| "learning_rate": 2.0689655172413793e-05, |
| "loss": 0.236, |
| "step": 1530 |
| }, |
| { |
| "epoch": 5.9, |
| "learning_rate": 2.049808429118774e-05, |
| "loss": 0.2386, |
| "step": 1540 |
| }, |
| { |
| "epoch": 5.94, |
| "learning_rate": 2.0306513409961685e-05, |
| "loss": 0.2403, |
| "step": 1550 |
| }, |
| { |
| "epoch": 5.94, |
| "eval_diversity": 0.9521775544388611, |
| "eval_diversity_acc_score": 0.7950762326428346, |
| "eval_gen_len": 8.964154103852596, |
| "eval_loss": 0.2784104347229004, |
| "eval_rouge1": 82.1858, |
| "eval_rouge2": 30.3783, |
| "eval_rougeL": 65.4133, |
| "eval_rougeLsum": 65.4379, |
| "eval_runtime": 263.695, |
| "eval_samples_per_second": 9.056, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.7051926298157454, |
| "eval_top5_acc": 0.8350083752093802, |
| "step": 1550 |
| }, |
| { |
| "epoch": 5.98, |
| "learning_rate": 2.0114942528735632e-05, |
| "loss": 0.2366, |
| "step": 1560 |
| }, |
| { |
| "epoch": 6.02, |
| "learning_rate": 1.992337164750958e-05, |
| "loss": 0.2359, |
| "step": 1570 |
| }, |
| { |
| "epoch": 6.05, |
| "learning_rate": 1.9731800766283527e-05, |
| "loss": 0.2426, |
| "step": 1580 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 1.9540229885057475e-05, |
| "loss": 0.2197, |
| "step": 1590 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 1.934865900383142e-05, |
| "loss": 0.228, |
| "step": 1600 |
| }, |
| { |
| "epoch": 6.13, |
| "eval_diversity": 0.933249581239531, |
| "eval_diversity_acc_score": 0.7823976807544141, |
| "eval_gen_len": 8.937688442211055, |
| "eval_loss": 0.28123462200164795, |
| "eval_rouge1": 82.1667, |
| "eval_rouge2": 29.8374, |
| "eval_rougeL": 65.1285, |
| "eval_rougeLsum": 65.1456, |
| "eval_runtime": 264.3876, |
| "eval_samples_per_second": 9.032, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.7043551088777219, |
| "eval_top5_acc": 0.838358458961474, |
| "step": 1600 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 1.9157088122605367e-05, |
| "loss": 0.2231, |
| "step": 1610 |
| }, |
| { |
| "epoch": 6.21, |
| "learning_rate": 1.896551724137931e-05, |
| "loss": 0.2258, |
| "step": 1620 |
| }, |
| { |
| "epoch": 6.25, |
| "learning_rate": 1.8773946360153258e-05, |
| "loss": 0.2309, |
| "step": 1630 |
| }, |
| { |
| "epoch": 6.28, |
| "learning_rate": 1.8582375478927206e-05, |
| "loss": 0.2233, |
| "step": 1640 |
| }, |
| { |
| "epoch": 6.32, |
| "learning_rate": 1.839080459770115e-05, |
| "loss": 0.2194, |
| "step": 1650 |
| }, |
| { |
| "epoch": 6.32, |
| "eval_diversity": 0.9319095477386935, |
| "eval_diversity_acc_score": 0.7820547460922704, |
| "eval_gen_len": 8.90787269681742, |
| "eval_loss": 0.2776533365249634, |
| "eval_rouge1": 82.5985, |
| "eval_rouge2": 29.903, |
| "eval_rougeL": 65.3189, |
| "eval_rougeLsum": 65.3435, |
| "eval_runtime": 261.2073, |
| "eval_samples_per_second": 9.142, |
| "eval_steps_per_second": 0.287, |
| "eval_top1_acc": 0.7102177554438861, |
| "eval_top5_acc": 0.8391959798994975, |
| "step": 1650 |
| }, |
| { |
| "epoch": 6.36, |
| "learning_rate": 1.8199233716475097e-05, |
| "loss": 0.2266, |
| "step": 1660 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 1.800766283524904e-05, |
| "loss": 0.2282, |
| "step": 1670 |
| }, |
| { |
| "epoch": 6.44, |
| "learning_rate": 1.781609195402299e-05, |
| "loss": 0.2335, |
| "step": 1680 |
| }, |
| { |
| "epoch": 6.48, |
| "learning_rate": 1.7624521072796933e-05, |
| "loss": 0.2283, |
| "step": 1690 |
| }, |
| { |
| "epoch": 6.51, |
| "learning_rate": 1.743295019157088e-05, |
| "loss": 0.2114, |
| "step": 1700 |
| }, |
| { |
| "epoch": 6.51, |
| "eval_diversity": 0.9252931323283081, |
| "eval_diversity_acc_score": 0.7703026579014558, |
| "eval_gen_len": 9.045142378559463, |
| "eval_loss": 0.2881404757499695, |
| "eval_rouge1": 82.6706, |
| "eval_rouge2": 30.2596, |
| "eval_rougeL": 65.4771, |
| "eval_rougeLsum": 65.478, |
| "eval_runtime": 263.9641, |
| "eval_samples_per_second": 9.047, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.7102177554438861, |
| "eval_top5_acc": 0.8324958123953099, |
| "step": 1700 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 1.7241379310344828e-05, |
| "loss": 0.2419, |
| "step": 1710 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 1.7049808429118776e-05, |
| "loss": 0.2238, |
| "step": 1720 |
| }, |
| { |
| "epoch": 6.63, |
| "learning_rate": 1.6858237547892723e-05, |
| "loss": 0.2138, |
| "step": 1730 |
| }, |
| { |
| "epoch": 6.67, |
| "learning_rate": 1.6666666666666667e-05, |
| "loss": 0.2143, |
| "step": 1740 |
| }, |
| { |
| "epoch": 6.7, |
| "learning_rate": 1.6475095785440615e-05, |
| "loss": 0.2175, |
| "step": 1750 |
| }, |
| { |
| "epoch": 6.7, |
| "eval_diversity": 0.9329145728643214, |
| "eval_diversity_acc_score": 0.7832888268814759, |
| "eval_gen_len": 8.92286432160804, |
| "eval_loss": 0.28171148896217346, |
| "eval_rouge1": 82.4316, |
| "eval_rouge2": 29.7131, |
| "eval_rougeL": 65.0899, |
| "eval_rougeLsum": 65.0778, |
| "eval_runtime": 263.5754, |
| "eval_samples_per_second": 9.06, |
| "eval_steps_per_second": 0.285, |
| "eval_top1_acc": 0.7114740368509213, |
| "eval_top5_acc": 0.8396147403685092, |
| "step": 1750 |
| }, |
| { |
| "epoch": 6.74, |
| "learning_rate": 1.628352490421456e-05, |
| "loss": 0.2216, |
| "step": 1760 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 1.6091954022988507e-05, |
| "loss": 0.2214, |
| "step": 1770 |
| }, |
| { |
| "epoch": 6.82, |
| "learning_rate": 1.5900383141762454e-05, |
| "loss": 0.2128, |
| "step": 1780 |
| }, |
| { |
| "epoch": 6.86, |
| "learning_rate": 1.5708812260536398e-05, |
| "loss": 0.2241, |
| "step": 1790 |
| }, |
| { |
| "epoch": 6.9, |
| "learning_rate": 1.5517241379310346e-05, |
| "loss": 0.2319, |
| "step": 1800 |
| }, |
| { |
| "epoch": 6.9, |
| "eval_diversity": 0.9417922948073701, |
| "eval_diversity_acc_score": 0.7911370784688377, |
| "eval_gen_len": 8.944388609715244, |
| "eval_loss": 0.2768917381763458, |
| "eval_rouge1": 82.2516, |
| "eval_rouge2": 29.6531, |
| "eval_rougeL": 65.0499, |
| "eval_rougeLsum": 65.0305, |
| "eval_runtime": 263.7989, |
| "eval_samples_per_second": 9.052, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.7068676716917923, |
| "eval_top5_acc": 0.8400335008375209, |
| "step": 1800 |
| }, |
| { |
| "epoch": 6.93, |
| "learning_rate": 1.532567049808429e-05, |
| "loss": 0.2207, |
| "step": 1810 |
| }, |
| { |
| "epoch": 6.97, |
| "learning_rate": 1.5134099616858237e-05, |
| "loss": 0.2158, |
| "step": 1820 |
| }, |
| { |
| "epoch": 7.01, |
| "learning_rate": 1.4942528735632185e-05, |
| "loss": 0.2125, |
| "step": 1830 |
| }, |
| { |
| "epoch": 7.05, |
| "learning_rate": 1.475095785440613e-05, |
| "loss": 0.2146, |
| "step": 1840 |
| }, |
| { |
| "epoch": 7.09, |
| "learning_rate": 1.4559386973180078e-05, |
| "loss": 0.2169, |
| "step": 1850 |
| }, |
| { |
| "epoch": 7.09, |
| "eval_diversity": 0.9326633165829145, |
| "eval_diversity_acc_score": 0.7803439307088204, |
| "eval_gen_len": 9.041876046901173, |
| "eval_loss": 0.28187987208366394, |
| "eval_rouge1": 82.3565, |
| "eval_rouge2": 29.7711, |
| "eval_rougeL": 65.166, |
| "eval_rougeLsum": 65.1746, |
| "eval_runtime": 265.8812, |
| "eval_samples_per_second": 8.981, |
| "eval_steps_per_second": 0.282, |
| "eval_top1_acc": 0.7081239530988275, |
| "eval_top5_acc": 0.8366834170854272, |
| "step": 1850 |
| }, |
| { |
| "epoch": 7.13, |
| "learning_rate": 1.4367816091954022e-05, |
| "loss": 0.2035, |
| "step": 1860 |
| }, |
| { |
| "epoch": 7.16, |
| "learning_rate": 1.417624521072797e-05, |
| "loss": 0.2261, |
| "step": 1870 |
| }, |
| { |
| "epoch": 7.2, |
| "learning_rate": 1.3984674329501916e-05, |
| "loss": 0.2152, |
| "step": 1880 |
| }, |
| { |
| "epoch": 7.24, |
| "learning_rate": 1.3793103448275863e-05, |
| "loss": 0.2164, |
| "step": 1890 |
| }, |
| { |
| "epoch": 7.28, |
| "learning_rate": 1.360153256704981e-05, |
| "loss": 0.2078, |
| "step": 1900 |
| }, |
| { |
| "epoch": 7.28, |
| "eval_diversity": 0.9261306532663316, |
| "eval_diversity_acc_score": 0.776041640362617, |
| "eval_gen_len": 8.926716917922947, |
| "eval_loss": 0.2834629416465759, |
| "eval_rouge1": 82.539, |
| "eval_rouge2": 30.453, |
| "eval_rougeL": 65.4308, |
| "eval_rougeLsum": 65.4135, |
| "eval_runtime": 264.7002, |
| "eval_samples_per_second": 9.022, |
| "eval_steps_per_second": 0.283, |
| "eval_top1_acc": 0.711892797319933, |
| "eval_top5_acc": 0.8379396984924623, |
| "step": 1900 |
| }, |
| { |
| "epoch": 7.32, |
| "learning_rate": 1.3409961685823755e-05, |
| "loss": 0.222, |
| "step": 1910 |
| }, |
| { |
| "epoch": 7.36, |
| "learning_rate": 1.3218390804597702e-05, |
| "loss": 0.2111, |
| "step": 1920 |
| }, |
| { |
| "epoch": 7.39, |
| "learning_rate": 1.3026819923371647e-05, |
| "loss": 0.2204, |
| "step": 1930 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 1.2835249042145594e-05, |
| "loss": 0.2036, |
| "step": 1940 |
| }, |
| { |
| "epoch": 7.47, |
| "learning_rate": 1.2643678160919542e-05, |
| "loss": 0.2162, |
| "step": 1950 |
| }, |
| { |
| "epoch": 7.47, |
| "eval_diversity": 0.9309045226130653, |
| "eval_diversity_acc_score": 0.7812113330471453, |
| "eval_gen_len": 8.932914572864322, |
| "eval_loss": 0.28672072291374207, |
| "eval_rouge1": 82.5227, |
| "eval_rouge2": 29.7564, |
| "eval_rougeL": 65.1538, |
| "eval_rougeLsum": 65.2082, |
| "eval_runtime": 264.2788, |
| "eval_samples_per_second": 9.036, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.7102177554438861, |
| "eval_top5_acc": 0.8391959798994975, |
| "step": 1950 |
| }, |
| { |
| "epoch": 7.51, |
| "learning_rate": 1.2452107279693487e-05, |
| "loss": 0.2036, |
| "step": 1960 |
| }, |
| { |
| "epoch": 7.55, |
| "learning_rate": 1.2260536398467433e-05, |
| "loss": 0.2041, |
| "step": 1970 |
| }, |
| { |
| "epoch": 7.59, |
| "learning_rate": 1.206896551724138e-05, |
| "loss": 0.213, |
| "step": 1980 |
| }, |
| { |
| "epoch": 7.62, |
| "learning_rate": 1.1877394636015327e-05, |
| "loss": 0.2082, |
| "step": 1990 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 1.1685823754789272e-05, |
| "loss": 0.2301, |
| "step": 2000 |
| }, |
| { |
| "epoch": 7.66, |
| "eval_diversity": 0.9356783919597992, |
| "eval_diversity_acc_score": 0.7903112716008857, |
| "eval_gen_len": 8.80393634840871, |
| "eval_loss": 0.28299883008003235, |
| "eval_rouge1": 82.6601, |
| "eval_rouge2": 30.2938, |
| "eval_rougeL": 65.3958, |
| "eval_rougeLsum": 65.4056, |
| "eval_runtime": 262.0892, |
| "eval_samples_per_second": 9.111, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.7156616415410385, |
| "eval_top5_acc": 0.8446398659966499, |
| "step": 2000 |
| }, |
| { |
| "epoch": 7.7, |
| "learning_rate": 1.1494252873563218e-05, |
| "loss": 0.2302, |
| "step": 2010 |
| }, |
| { |
| "epoch": 7.74, |
| "learning_rate": 1.1302681992337164e-05, |
| "loss": 0.2115, |
| "step": 2020 |
| }, |
| { |
| "epoch": 7.78, |
| "learning_rate": 1.1111111111111112e-05, |
| "loss": 0.2132, |
| "step": 2030 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 1.091954022988506e-05, |
| "loss": 0.1904, |
| "step": 2040 |
| }, |
| { |
| "epoch": 7.85, |
| "learning_rate": 1.0727969348659005e-05, |
| "loss": 0.2085, |
| "step": 2050 |
| }, |
| { |
| "epoch": 7.85, |
| "eval_diversity": 0.9409547738693468, |
| "eval_diversity_acc_score": 0.7896454634983965, |
| "eval_gen_len": 8.924623115577889, |
| "eval_loss": 0.28207850456237793, |
| "eval_rouge1": 82.8141, |
| "eval_rouge2": 30.1884, |
| "eval_rougeL": 65.4939, |
| "eval_rougeLsum": 65.4672, |
| "eval_runtime": 261.4923, |
| "eval_samples_per_second": 9.132, |
| "eval_steps_per_second": 0.287, |
| "eval_top1_acc": 0.7139865996649917, |
| "eval_top5_acc": 0.8391959798994975, |
| "step": 2050 |
| }, |
| { |
| "epoch": 7.89, |
| "learning_rate": 1.053639846743295e-05, |
| "loss": 0.2172, |
| "step": 2060 |
| }, |
| { |
| "epoch": 7.93, |
| "learning_rate": 1.0344827586206897e-05, |
| "loss": 0.2197, |
| "step": 2070 |
| }, |
| { |
| "epoch": 7.97, |
| "learning_rate": 1.0153256704980842e-05, |
| "loss": 0.2146, |
| "step": 2080 |
| }, |
| { |
| "epoch": 8.01, |
| "learning_rate": 9.96168582375479e-06, |
| "loss": 0.2126, |
| "step": 2090 |
| }, |
| { |
| "epoch": 8.05, |
| "learning_rate": 9.770114942528738e-06, |
| "loss": 0.2046, |
| "step": 2100 |
| }, |
| { |
| "epoch": 8.05, |
| "eval_diversity": 0.9336683417085426, |
| "eval_diversity_acc_score": 0.787440552847992, |
| "eval_gen_len": 8.958123953098827, |
| "eval_loss": 0.2814452052116394, |
| "eval_rouge1": 82.9831, |
| "eval_rouge2": 29.9009, |
| "eval_rougeL": 65.3683, |
| "eval_rougeLsum": 65.3521, |
| "eval_runtime": 263.8434, |
| "eval_samples_per_second": 9.051, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.716499162479062, |
| "eval_top5_acc": 0.8433835845896147, |
| "step": 2100 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 9.578544061302683e-06, |
| "loss": 0.199, |
| "step": 2110 |
| }, |
| { |
| "epoch": 8.12, |
| "learning_rate": 9.386973180076629e-06, |
| "loss": 0.2085, |
| "step": 2120 |
| }, |
| { |
| "epoch": 8.16, |
| "learning_rate": 9.195402298850575e-06, |
| "loss": 0.2072, |
| "step": 2130 |
| }, |
| { |
| "epoch": 8.2, |
| "learning_rate": 9.00383141762452e-06, |
| "loss": 0.2165, |
| "step": 2140 |
| }, |
| { |
| "epoch": 8.24, |
| "learning_rate": 8.812260536398467e-06, |
| "loss": 0.2081, |
| "step": 2150 |
| }, |
| { |
| "epoch": 8.24, |
| "eval_diversity": 0.9404522613065327, |
| "eval_diversity_acc_score": 0.7935558235061405, |
| "eval_gen_len": 8.853852596314908, |
| "eval_loss": 0.28319600224494934, |
| "eval_rouge1": 82.7936, |
| "eval_rouge2": 29.8332, |
| "eval_rougeL": 65.3755, |
| "eval_rougeLsum": 65.3981, |
| "eval_runtime": 262.4445, |
| "eval_samples_per_second": 9.099, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.7139865996649917, |
| "eval_top5_acc": 0.8438023450586265, |
| "step": 2150 |
| }, |
| { |
| "epoch": 8.28, |
| "learning_rate": 8.620689655172414e-06, |
| "loss": 0.21, |
| "step": 2160 |
| }, |
| { |
| "epoch": 8.31, |
| "learning_rate": 8.429118773946362e-06, |
| "loss": 0.2048, |
| "step": 2170 |
| }, |
| { |
| "epoch": 8.35, |
| "learning_rate": 8.237547892720307e-06, |
| "loss": 0.2064, |
| "step": 2180 |
| }, |
| { |
| "epoch": 8.39, |
| "learning_rate": 8.045977011494253e-06, |
| "loss": 0.2117, |
| "step": 2190 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 7.854406130268199e-06, |
| "loss": 0.2122, |
| "step": 2200 |
| }, |
| { |
| "epoch": 8.43, |
| "eval_diversity": 0.9345896147403686, |
| "eval_diversity_acc_score": 0.7878261702145569, |
| "eval_gen_len": 8.889363484087102, |
| "eval_loss": 0.28129222989082336, |
| "eval_rouge1": 82.8109, |
| "eval_rouge2": 29.6413, |
| "eval_rougeL": 65.1439, |
| "eval_rougeLsum": 65.1444, |
| "eval_runtime": 263.3671, |
| "eval_samples_per_second": 9.067, |
| "eval_steps_per_second": 0.285, |
| "eval_top1_acc": 0.7177554438860971, |
| "eval_top5_acc": 0.842964824120603, |
| "step": 2200 |
| }, |
| { |
| "epoch": 8.47, |
| "learning_rate": 7.662835249042145e-06, |
| "loss": 0.2083, |
| "step": 2210 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 7.4712643678160925e-06, |
| "loss": 0.2005, |
| "step": 2220 |
| }, |
| { |
| "epoch": 8.54, |
| "learning_rate": 7.279693486590039e-06, |
| "loss": 0.1907, |
| "step": 2230 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 7.088122605363985e-06, |
| "loss": 0.2218, |
| "step": 2240 |
| }, |
| { |
| "epoch": 8.62, |
| "learning_rate": 6.896551724137932e-06, |
| "loss": 0.2003, |
| "step": 2250 |
| }, |
| { |
| "epoch": 8.62, |
| "eval_diversity": 0.9270519262981576, |
| "eval_diversity_acc_score": 0.7791428878058635, |
| "eval_gen_len": 8.881742043551089, |
| "eval_loss": 0.2844929099082947, |
| "eval_rouge1": 83.034, |
| "eval_rouge2": 30.4788, |
| "eval_rougeL": 65.7633, |
| "eval_rougeLsum": 65.73, |
| "eval_runtime": 261.6511, |
| "eval_samples_per_second": 9.127, |
| "eval_steps_per_second": 0.287, |
| "eval_top1_acc": 0.7173366834170855, |
| "eval_top5_acc": 0.8404522613065326, |
| "step": 2250 |
| }, |
| { |
| "epoch": 8.66, |
| "learning_rate": 6.7049808429118775e-06, |
| "loss": 0.2096, |
| "step": 2260 |
| }, |
| { |
| "epoch": 8.7, |
| "learning_rate": 6.513409961685823e-06, |
| "loss": 0.2095, |
| "step": 2270 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 6.321839080459771e-06, |
| "loss": 0.2039, |
| "step": 2280 |
| }, |
| { |
| "epoch": 8.77, |
| "learning_rate": 6.130268199233717e-06, |
| "loss": 0.2083, |
| "step": 2290 |
| }, |
| { |
| "epoch": 8.81, |
| "learning_rate": 5.938697318007663e-06, |
| "loss": 0.2006, |
| "step": 2300 |
| }, |
| { |
| "epoch": 8.81, |
| "eval_diversity": 0.9334170854271356, |
| "eval_diversity_acc_score": 0.7837107438364351, |
| "eval_gen_len": 9.044807370184255, |
| "eval_loss": 0.28238752484321594, |
| "eval_rouge1": 82.723, |
| "eval_rouge2": 30.6002, |
| "eval_rougeL": 65.6739, |
| "eval_rougeLsum": 65.6627, |
| "eval_runtime": 264.0211, |
| "eval_samples_per_second": 9.045, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.7114740368509213, |
| "eval_top5_acc": 0.8396147403685092, |
| "step": 2300 |
| }, |
| { |
| "epoch": 8.85, |
| "learning_rate": 5.747126436781609e-06, |
| "loss": 0.2078, |
| "step": 2310 |
| }, |
| { |
| "epoch": 8.89, |
| "learning_rate": 5.555555555555556e-06, |
| "loss": 0.2165, |
| "step": 2320 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 5.3639846743295025e-06, |
| "loss": 0.1978, |
| "step": 2330 |
| }, |
| { |
| "epoch": 8.97, |
| "learning_rate": 5.172413793103448e-06, |
| "loss": 0.2148, |
| "step": 2340 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 4.980842911877395e-06, |
| "loss": 0.1969, |
| "step": 2350 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_diversity": 0.9299832495812396, |
| "eval_diversity_acc_score": 0.7831642859748211, |
| "eval_gen_len": 8.99782244556114, |
| "eval_loss": 0.28314632177352905, |
| "eval_rouge1": 82.7251, |
| "eval_rouge2": 29.9379, |
| "eval_rougeL": 65.3631, |
| "eval_rougeLsum": 65.3789, |
| "eval_runtime": 263.4212, |
| "eval_samples_per_second": 9.065, |
| "eval_steps_per_second": 0.285, |
| "eval_top1_acc": 0.7123115577889447, |
| "eval_top5_acc": 0.8421273031825796, |
| "step": 2350 |
| }, |
| { |
| "epoch": 9.04, |
| "learning_rate": 4.789272030651342e-06, |
| "loss": 0.1949, |
| "step": 2360 |
| }, |
| { |
| "epoch": 9.08, |
| "learning_rate": 4.5977011494252875e-06, |
| "loss": 0.2049, |
| "step": 2370 |
| }, |
| { |
| "epoch": 9.12, |
| "learning_rate": 4.406130268199233e-06, |
| "loss": 0.1923, |
| "step": 2380 |
| }, |
| { |
| "epoch": 9.16, |
| "learning_rate": 4.214559386973181e-06, |
| "loss": 0.2011, |
| "step": 2390 |
| }, |
| { |
| "epoch": 9.2, |
| "learning_rate": 4.022988505747127e-06, |
| "loss": 0.2116, |
| "step": 2400 |
| }, |
| { |
| "epoch": 9.2, |
| "eval_diversity": 0.9221105527638191, |
| "eval_diversity_acc_score": 0.7761483295876368, |
| "eval_gen_len": 8.993132328308208, |
| "eval_loss": 0.28838592767715454, |
| "eval_rouge1": 82.8682, |
| "eval_rouge2": 29.9749, |
| "eval_rougeL": 65.4914, |
| "eval_rougeLsum": 65.4569, |
| "eval_runtime": 264.9163, |
| "eval_samples_per_second": 9.014, |
| "eval_steps_per_second": 0.283, |
| "eval_top1_acc": 0.7135678391959799, |
| "eval_top5_acc": 0.8417085427135679, |
| "step": 2400 |
| }, |
| { |
| "epoch": 9.23, |
| "learning_rate": 3.8314176245210725e-06, |
| "loss": 0.2055, |
| "step": 2410 |
| }, |
| { |
| "epoch": 9.27, |
| "learning_rate": 3.6398467432950196e-06, |
| "loss": 0.1902, |
| "step": 2420 |
| }, |
| { |
| "epoch": 9.31, |
| "learning_rate": 3.448275862068966e-06, |
| "loss": 0.2033, |
| "step": 2430 |
| }, |
| { |
| "epoch": 9.35, |
| "learning_rate": 3.2567049808429117e-06, |
| "loss": 0.1972, |
| "step": 2440 |
| }, |
| { |
| "epoch": 9.39, |
| "learning_rate": 3.0651340996168583e-06, |
| "loss": 0.1855, |
| "step": 2450 |
| }, |
| { |
| "epoch": 9.39, |
| "eval_diversity": 0.9212730318257957, |
| "eval_diversity_acc_score": 0.7781439301476674, |
| "eval_gen_len": 8.914740368509213, |
| "eval_loss": 0.286485493183136, |
| "eval_rouge1": 82.9214, |
| "eval_rouge2": 29.7397, |
| "eval_rougeL": 65.3115, |
| "eval_rougeLsum": 65.3068, |
| "eval_runtime": 263.7954, |
| "eval_samples_per_second": 9.052, |
| "eval_steps_per_second": 0.284, |
| "eval_top1_acc": 0.7169179229480737, |
| "eval_top5_acc": 0.8446398659966499, |
| "step": 2450 |
| }, |
| { |
| "epoch": 9.43, |
| "learning_rate": 2.8735632183908046e-06, |
| "loss": 0.2004, |
| "step": 2460 |
| }, |
| { |
| "epoch": 9.46, |
| "learning_rate": 2.6819923371647512e-06, |
| "loss": 0.2065, |
| "step": 2470 |
| }, |
| { |
| "epoch": 9.5, |
| "learning_rate": 2.4904214559386975e-06, |
| "loss": 0.2016, |
| "step": 2480 |
| }, |
| { |
| "epoch": 9.54, |
| "learning_rate": 2.2988505747126437e-06, |
| "loss": 0.1982, |
| "step": 2490 |
| }, |
| { |
| "epoch": 9.58, |
| "learning_rate": 2.1072796934865904e-06, |
| "loss": 0.1868, |
| "step": 2500 |
| }, |
| { |
| "epoch": 9.58, |
| "eval_diversity": 0.9215242881072027, |
| "eval_diversity_acc_score": 0.7764266615040585, |
| "eval_gen_len": 8.890954773869346, |
| "eval_loss": 0.2884398102760315, |
| "eval_rouge1": 82.6232, |
| "eval_rouge2": 30.0851, |
| "eval_rougeL": 65.3581, |
| "eval_rougeLsum": 65.3426, |
| "eval_runtime": 262.4143, |
| "eval_samples_per_second": 9.1, |
| "eval_steps_per_second": 0.286, |
| "eval_top1_acc": 0.7135678391959799, |
| "eval_top5_acc": 0.8425460636515912, |
| "step": 2500 |
| }, |
| { |
| "epoch": 9.58, |
| "step": 2500, |
| "total_flos": 3.6097365676179456e+17, |
| "train_loss": 0.29164553117752073, |
| "train_runtime": 24833.869, |
| "train_samples_per_second": 6.725, |
| "train_steps_per_second": 0.105 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 2610, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 50, |
| "total_flos": 3.6097365676179456e+17, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|