| { | |
| "best_metric": 49.33, | |
| "best_model_checkpoint": "outputs/UniPoll-t5/checkpoint-60760", | |
| "epoch": 10.0, | |
| "global_step": 60760, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 2.9753127057274523e-05, | |
| "loss": 16.5215, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.9506254114549046e-05, | |
| "loss": 3.5706, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 2.9259381171823572e-05, | |
| "loss": 2.9107, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.901250822909809e-05, | |
| "loss": 2.6763, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.8765635286372614e-05, | |
| "loss": 2.4391, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.8518762343647136e-05, | |
| "loss": 2.2868, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.827188940092166e-05, | |
| "loss": 2.2185, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.802501645819618e-05, | |
| "loss": 2.0841, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.7778143515470708e-05, | |
| "loss": 2.013, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 2.7531270572745227e-05, | |
| "loss": 1.9524, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.728439763001975e-05, | |
| "loss": 1.9129, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.7037524687294272e-05, | |
| "loss": 1.8501, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_choices_bleu1": 34.2743, | |
| "eval_choices_bleu2": 27.071, | |
| "eval_choices_bleu3": 23.8714, | |
| "eval_choices_bleu4": 19.9582, | |
| "eval_choices_rouge1": 43.056, | |
| "eval_choices_rouge2": 30.797, | |
| "eval_choices_rougeL": 40.79, | |
| "eval_choices_rougeLsum": 30.362, | |
| "eval_loss": 1.7218139171600342, | |
| "eval_mean_rouge1": 44.783500000000004, | |
| "eval_runtime": 613.1086, | |
| "eval_samples_per_second": 3.303, | |
| "eval_steps_per_second": 0.414, | |
| "eval_title_bleu1": 39.1598, | |
| "eval_title_bleu2": 24.8472, | |
| "eval_title_bleu3": 17.1189, | |
| "eval_title_bleu4": 11.4654, | |
| "eval_title_rouge1": 46.511, | |
| "eval_title_rouge2": 28.336, | |
| "eval_title_rougeL": 43.814, | |
| "eval_title_rougeLsum": 28.185, | |
| "step": 6076 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.6790651744568795e-05, | |
| "loss": 1.7659, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.654377880184332e-05, | |
| "loss": 1.7064, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.6296905859117843e-05, | |
| "loss": 1.6739, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 2.6050032916392366e-05, | |
| "loss": 1.6589, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.5803159973666885e-05, | |
| "loss": 1.6193, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 2.5556287030941408e-05, | |
| "loss": 1.6175, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 2.530941408821593e-05, | |
| "loss": 1.6042, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.5062541145490456e-05, | |
| "loss": 1.5438, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.481566820276498e-05, | |
| "loss": 1.5275, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 2.45687952600395e-05, | |
| "loss": 1.5144, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.432192231731402e-05, | |
| "loss": 1.4911, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 2.4075049374588544e-05, | |
| "loss": 1.4301, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_choices_bleu1": 36.2304, | |
| "eval_choices_bleu2": 29.2436, | |
| "eval_choices_bleu3": 26.0301, | |
| "eval_choices_bleu4": 21.8164, | |
| "eval_choices_rouge1": 45.164, | |
| "eval_choices_rouge2": 33.104, | |
| "eval_choices_rougeL": 42.835, | |
| "eval_choices_rougeLsum": 32.498, | |
| "eval_loss": 1.6042917966842651, | |
| "eval_mean_rouge1": 47.2485, | |
| "eval_runtime": 613.8133, | |
| "eval_samples_per_second": 3.299, | |
| "eval_steps_per_second": 0.414, | |
| "eval_title_bleu1": 41.7754, | |
| "eval_title_bleu2": 27.1102, | |
| "eval_title_bleu3": 18.344, | |
| "eval_title_bleu4": 12.3234, | |
| "eval_title_rouge1": 49.333, | |
| "eval_title_rouge2": 30.676, | |
| "eval_title_rougeL": 46.561, | |
| "eval_title_rougeLsum": 30.748, | |
| "step": 12152 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 2.3828176431863066e-05, | |
| "loss": 1.4002, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 2.3581303489137592e-05, | |
| "loss": 1.3791, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 2.3334430546412115e-05, | |
| "loss": 1.3332, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.3087557603686637e-05, | |
| "loss": 1.3577, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 2.284068466096116e-05, | |
| "loss": 1.3483, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 2.259381171823568e-05, | |
| "loss": 1.3584, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 2.2346938775510205e-05, | |
| "loss": 1.3186, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.2100065832784728e-05, | |
| "loss": 1.3283, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.185319289005925e-05, | |
| "loss": 1.2867, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 2.1606319947333773e-05, | |
| "loss": 1.2766, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.1359447004608296e-05, | |
| "loss": 1.3025, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 2.111257406188282e-05, | |
| "loss": 1.2736, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_choices_bleu1": 36.9274, | |
| "eval_choices_bleu2": 30.039, | |
| "eval_choices_bleu3": 26.4107, | |
| "eval_choices_bleu4": 22.0831, | |
| "eval_choices_rouge1": 46.229, | |
| "eval_choices_rouge2": 33.898, | |
| "eval_choices_rougeL": 43.746, | |
| "eval_choices_rougeLsum": 33.141, | |
| "eval_loss": 1.5690504312515259, | |
| "eval_mean_rouge1": 48.167500000000004, | |
| "eval_runtime": 614.3575, | |
| "eval_samples_per_second": 3.296, | |
| "eval_steps_per_second": 0.413, | |
| "eval_title_bleu1": 42.3694, | |
| "eval_title_bleu2": 28.0839, | |
| "eval_title_bleu3": 19.3916, | |
| "eval_title_bleu4": 13.1408, | |
| "eval_title_rouge1": 50.106, | |
| "eval_title_rouge2": 31.608, | |
| "eval_title_rougeL": 47.1, | |
| "eval_title_rougeLsum": 31.399, | |
| "step": 18228 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 2.086570111915734e-05, | |
| "loss": 1.2176, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 2.0618828176431864e-05, | |
| "loss": 1.2037, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 2.0371955233706386e-05, | |
| "loss": 1.2104, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 2.012508229098091e-05, | |
| "loss": 1.1522, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 1.987820934825543e-05, | |
| "loss": 1.1969, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.9631336405529957e-05, | |
| "loss": 1.1861, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 1.9384463462804477e-05, | |
| "loss": 1.1536, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 1.9137590520079e-05, | |
| "loss": 1.1394, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 1.8890717577353522e-05, | |
| "loss": 1.1229, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.8643844634628044e-05, | |
| "loss": 1.1233, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 1.8396971691902567e-05, | |
| "loss": 1.1265, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 1.8150098749177093e-05, | |
| "loss": 1.1438, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_choices_bleu1": 38.2715, | |
| "eval_choices_bleu2": 30.7803, | |
| "eval_choices_bleu3": 26.9954, | |
| "eval_choices_bleu4": 22.6626, | |
| "eval_choices_rouge1": 46.767, | |
| "eval_choices_rouge2": 34.301, | |
| "eval_choices_rougeL": 44.062, | |
| "eval_choices_rougeLsum": 33.605, | |
| "eval_loss": 1.5627468824386597, | |
| "eval_mean_rouge1": 48.6515, | |
| "eval_runtime": 625.5501, | |
| "eval_samples_per_second": 3.237, | |
| "eval_steps_per_second": 0.406, | |
| "eval_title_bleu1": 42.7056, | |
| "eval_title_bleu2": 28.0993, | |
| "eval_title_bleu3": 19.498, | |
| "eval_title_bleu4": 12.7399, | |
| "eval_title_rouge1": 50.536, | |
| "eval_title_rouge2": 31.852, | |
| "eval_title_rougeL": 47.453, | |
| "eval_title_rougeLsum": 31.678, | |
| "step": 24304 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 1.7903225806451616e-05, | |
| "loss": 1.0962, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 1.7656352863726135e-05, | |
| "loss": 1.0664, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 1.7409479921000658e-05, | |
| "loss": 1.0874, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 1.716260697827518e-05, | |
| "loss": 1.0608, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 1.6915734035549703e-05, | |
| "loss": 1.0367, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.666886109282423e-05, | |
| "loss": 1.0397, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 1.642198815009875e-05, | |
| "loss": 1.0378, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 1.617511520737327e-05, | |
| "loss": 1.0143, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 1.5928242264647793e-05, | |
| "loss": 1.0186, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 1.5681369321922316e-05, | |
| "loss": 1.0243, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 1.5434496379196842e-05, | |
| "loss": 1.038, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.5187623436471364e-05, | |
| "loss": 1.0026, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_choices_bleu1": 37.513, | |
| "eval_choices_bleu2": 30.5119, | |
| "eval_choices_bleu3": 26.8236, | |
| "eval_choices_bleu4": 22.3797, | |
| "eval_choices_rouge1": 46.999, | |
| "eval_choices_rouge2": 34.451, | |
| "eval_choices_rougeL": 44.427, | |
| "eval_choices_rougeLsum": 33.652, | |
| "eval_loss": 1.5795127153396606, | |
| "eval_mean_rouge1": 48.658500000000004, | |
| "eval_runtime": 619.9047, | |
| "eval_samples_per_second": 3.267, | |
| "eval_steps_per_second": 0.41, | |
| "eval_title_bleu1": 42.3561, | |
| "eval_title_bleu2": 27.804, | |
| "eval_title_bleu3": 19.0851, | |
| "eval_title_bleu4": 12.6803, | |
| "eval_title_rouge1": 50.318, | |
| "eval_title_rouge2": 31.494, | |
| "eval_title_rougeL": 47.297, | |
| "eval_title_rougeLsum": 31.461, | |
| "step": 30380 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 1.4940750493745885e-05, | |
| "loss": 1.0107, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 1.4693877551020408e-05, | |
| "loss": 0.9664, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 1.4447004608294932e-05, | |
| "loss": 0.9342, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 1.4200131665569453e-05, | |
| "loss": 0.9632, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 1.3953258722843976e-05, | |
| "loss": 0.9448, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 1.37063857801185e-05, | |
| "loss": 0.9669, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 1.3459512837393023e-05, | |
| "loss": 0.9361, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 1.3212639894667544e-05, | |
| "loss": 0.9445, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 1.2965766951942068e-05, | |
| "loss": 0.9515, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 1.271889400921659e-05, | |
| "loss": 0.9488, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 1.2472021066491112e-05, | |
| "loss": 0.9145, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 1.2225148123765636e-05, | |
| "loss": 0.9493, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_choices_bleu1": 38.7421, | |
| "eval_choices_bleu2": 31.1954, | |
| "eval_choices_bleu3": 27.0903, | |
| "eval_choices_bleu4": 22.4639, | |
| "eval_choices_rouge1": 47.617, | |
| "eval_choices_rouge2": 34.86, | |
| "eval_choices_rougeL": 44.679, | |
| "eval_choices_rougeLsum": 34.003, | |
| "eval_loss": 1.5864702463150024, | |
| "eval_mean_rouge1": 48.966499999999996, | |
| "eval_runtime": 615.3469, | |
| "eval_samples_per_second": 3.291, | |
| "eval_steps_per_second": 0.413, | |
| "eval_title_bleu1": 42.4031, | |
| "eval_title_bleu2": 28.2011, | |
| "eval_title_bleu3": 19.3663, | |
| "eval_title_bleu4": 12.8818, | |
| "eval_title_rouge1": 50.316, | |
| "eval_title_rouge2": 31.841, | |
| "eval_title_rougeL": 47.179, | |
| "eval_title_rougeLsum": 31.509, | |
| "step": 36456 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 1.1978275181040158e-05, | |
| "loss": 0.9359, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 1.1731402238314681e-05, | |
| "loss": 0.9096, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 1.1484529295589204e-05, | |
| "loss": 0.9069, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 1.1237656352863726e-05, | |
| "loss": 0.89, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 1.099078341013825e-05, | |
| "loss": 0.8761, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "learning_rate": 1.0743910467412772e-05, | |
| "loss": 0.8919, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 1.0497037524687294e-05, | |
| "loss": 0.8869, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 1.0250164581961818e-05, | |
| "loss": 0.8912, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 1.000329163923634e-05, | |
| "loss": 0.8885, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 9.756418696510862e-06, | |
| "loss": 0.8691, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 9.509545753785386e-06, | |
| "loss": 0.8882, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 9.262672811059907e-06, | |
| "loss": 0.8822, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 9.01579986833443e-06, | |
| "loss": 0.8622, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_choices_bleu1": 38.5191, | |
| "eval_choices_bleu2": 31.1743, | |
| "eval_choices_bleu3": 27.2188, | |
| "eval_choices_bleu4": 22.5257, | |
| "eval_choices_rouge1": 47.345, | |
| "eval_choices_rouge2": 34.733, | |
| "eval_choices_rougeL": 44.637, | |
| "eval_choices_rougeLsum": 33.956, | |
| "eval_loss": 1.6173765659332275, | |
| "eval_mean_rouge1": 49.0085, | |
| "eval_runtime": 624.2904, | |
| "eval_samples_per_second": 3.244, | |
| "eval_steps_per_second": 0.407, | |
| "eval_title_bleu1": 42.7395, | |
| "eval_title_bleu2": 28.2813, | |
| "eval_title_bleu3": 19.4836, | |
| "eval_title_bleu4": 12.7587, | |
| "eval_title_rouge1": 50.672, | |
| "eval_title_rouge2": 32.013, | |
| "eval_title_rougeL": 47.817, | |
| "eval_title_rougeLsum": 31.899, | |
| "step": 42532 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 8.768926925608954e-06, | |
| "loss": 0.8468, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 8.522053982883475e-06, | |
| "loss": 0.8209, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 8.275181040158e-06, | |
| "loss": 0.8379, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 8.028308097432522e-06, | |
| "loss": 0.8544, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 7.781435154707045e-06, | |
| "loss": 0.8394, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 7.534562211981568e-06, | |
| "loss": 0.8285, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 7.28768926925609e-06, | |
| "loss": 0.8333, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 7.65, | |
| "learning_rate": 7.0408163265306125e-06, | |
| "loss": 0.8359, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 7.74, | |
| "learning_rate": 6.793943383805135e-06, | |
| "loss": 0.8176, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 6.547070441079658e-06, | |
| "loss": 0.842, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 6.30019749835418e-06, | |
| "loss": 0.8351, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 6.053324555628703e-06, | |
| "loss": 0.8367, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_choices_bleu1": 38.7622, | |
| "eval_choices_bleu2": 31.0867, | |
| "eval_choices_bleu3": 26.9654, | |
| "eval_choices_bleu4": 22.4556, | |
| "eval_choices_rouge1": 47.531, | |
| "eval_choices_rouge2": 34.756, | |
| "eval_choices_rougeL": 44.649, | |
| "eval_choices_rougeLsum": 33.959, | |
| "eval_loss": 1.6293848752975464, | |
| "eval_mean_rouge1": 49.2605, | |
| "eval_runtime": 622.1469, | |
| "eval_samples_per_second": 3.255, | |
| "eval_steps_per_second": 0.408, | |
| "eval_title_bleu1": 43.0901, | |
| "eval_title_bleu2": 28.6471, | |
| "eval_title_bleu3": 19.7616, | |
| "eval_title_bleu4": 12.9726, | |
| "eval_title_rouge1": 50.99, | |
| "eval_title_rouge2": 32.44, | |
| "eval_title_rougeL": 47.948, | |
| "eval_title_rougeLsum": 32.24, | |
| "step": 48608 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 5.8064516129032256e-06, | |
| "loss": 0.8004, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 5.559578670177749e-06, | |
| "loss": 0.789, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 5.312705727452272e-06, | |
| "loss": 0.7843, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 5.065832784726793e-06, | |
| "loss": 0.8102, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 4.818959842001317e-06, | |
| "loss": 0.7979, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 4.5720868992758395e-06, | |
| "loss": 0.8262, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 4.325213956550362e-06, | |
| "loss": 0.8149, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 4.078341013824885e-06, | |
| "loss": 0.8178, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 3.831468071099407e-06, | |
| "loss": 0.7796, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 3.5845951283739304e-06, | |
| "loss": 0.8036, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 3.337722185648453e-06, | |
| "loss": 0.8169, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 3.0908492429229756e-06, | |
| "loss": 0.8138, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_choices_bleu1": 38.8037, | |
| "eval_choices_bleu2": 31.1809, | |
| "eval_choices_bleu3": 27.0585, | |
| "eval_choices_bleu4": 22.3774, | |
| "eval_choices_rouge1": 47.629, | |
| "eval_choices_rouge2": 34.778, | |
| "eval_choices_rougeL": 44.722, | |
| "eval_choices_rougeLsum": 33.944, | |
| "eval_loss": 1.6443856954574585, | |
| "eval_mean_rouge1": 49.32, | |
| "eval_runtime": 624.8802, | |
| "eval_samples_per_second": 3.241, | |
| "eval_steps_per_second": 0.406, | |
| "eval_title_bleu1": 43.0453, | |
| "eval_title_bleu2": 28.6763, | |
| "eval_title_bleu3": 19.7491, | |
| "eval_title_bleu4": 13.004, | |
| "eval_title_rouge1": 51.011, | |
| "eval_title_rouge2": 32.398, | |
| "eval_title_rougeL": 47.974, | |
| "eval_title_rougeLsum": 32.251, | |
| "step": 54684 | |
| }, | |
| { | |
| "epoch": 9.05, | |
| "learning_rate": 2.8439763001974987e-06, | |
| "loss": 0.7764, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 2.5971033574720213e-06, | |
| "loss": 0.7939, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "learning_rate": 2.3502304147465435e-06, | |
| "loss": 0.8069, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 2.1033574720210665e-06, | |
| "loss": 0.7649, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 1.8564845292955893e-06, | |
| "loss": 0.7977, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 1.609611586570112e-06, | |
| "loss": 0.803, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 9.55, | |
| "learning_rate": 1.3627386438446346e-06, | |
| "loss": 0.7813, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 1.1158657011191574e-06, | |
| "loss": 0.7645, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 8.689927583936801e-07, | |
| "loss": 0.7865, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 6.221198156682028e-07, | |
| "loss": 0.7818, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 3.752468729427255e-07, | |
| "loss": 0.7572, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "learning_rate": 1.283739302172482e-07, | |
| "loss": 0.7788, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_choices_bleu1": 38.8515, | |
| "eval_choices_bleu2": 31.2992, | |
| "eval_choices_bleu3": 27.2073, | |
| "eval_choices_bleu4": 22.4633, | |
| "eval_choices_rouge1": 47.688, | |
| "eval_choices_rouge2": 34.913, | |
| "eval_choices_rougeL": 44.77, | |
| "eval_choices_rougeLsum": 34.038, | |
| "eval_loss": 1.6531261205673218, | |
| "eval_mean_rouge1": 49.33, | |
| "eval_runtime": 620.225, | |
| "eval_samples_per_second": 3.265, | |
| "eval_steps_per_second": 0.41, | |
| "eval_title_bleu1": 43.0602, | |
| "eval_title_bleu2": 28.6495, | |
| "eval_title_bleu3": 19.8146, | |
| "eval_title_bleu4": 13.144, | |
| "eval_title_rouge1": 50.972, | |
| "eval_title_rouge2": 32.375, | |
| "eval_title_rougeL": 47.919, | |
| "eval_title_rougeLsum": 32.201, | |
| "step": 60760 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 60760, | |
| "total_flos": 1.262557342857001e+17, | |
| "train_loss": 1.288209796626761, | |
| "train_runtime": 17551.2786, | |
| "train_samples_per_second": 27.692, | |
| "train_steps_per_second": 3.462 | |
| } | |
| ], | |
| "max_steps": 60760, | |
| "num_train_epochs": 10, | |
| "total_flos": 1.262557342857001e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |