| { |
| "best_metric": 2.63411021232605, |
| "best_model_checkpoint": "model/best_model_test_0423_small/checkpoint-55000", |
| "epoch": 3.0, |
| "global_step": 57807, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05, |
| "learning_rate": 9.827010569654195e-05, |
| "loss": 5.8165, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_gen_len": 18.0056, |
| "eval_loss": 3.6540932655334473, |
| "eval_rouge1": 11.6734, |
| "eval_rouge2": 3.9865, |
| "eval_rougeL": 11.5734, |
| "eval_rougeLsum": 11.5375, |
| "eval_runtime": 831.6458, |
| "eval_samples_per_second": 5.793, |
| "eval_steps_per_second": 2.897, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 9.654021139308389e-05, |
| "loss": 4.306, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_gen_len": 16.8948, |
| "eval_loss": 3.4290754795074463, |
| "eval_rouge1": 12.0417, |
| "eval_rouge2": 3.8419, |
| "eval_rougeL": 11.9231, |
| "eval_rougeLsum": 11.9223, |
| "eval_runtime": 769.6461, |
| "eval_samples_per_second": 6.26, |
| "eval_steps_per_second": 3.13, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 9.481031708962583e-05, |
| "loss": 4.1091, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_gen_len": 19.4016, |
| "eval_loss": 3.364335060119629, |
| "eval_rouge1": 13.661, |
| "eval_rouge2": 4.5171, |
| "eval_rougeL": 13.5123, |
| "eval_rougeLsum": 13.5076, |
| "eval_runtime": 875.4047, |
| "eval_samples_per_second": 5.504, |
| "eval_steps_per_second": 2.752, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 9.308042278616777e-05, |
| "loss": 3.9637, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_gen_len": 18.4288, |
| "eval_loss": 3.2573604583740234, |
| "eval_rouge1": 13.8443, |
| "eval_rouge2": 4.1761, |
| "eval_rougeL": 13.689, |
| "eval_rougeLsum": 13.6927, |
| "eval_runtime": 771.3843, |
| "eval_samples_per_second": 6.246, |
| "eval_steps_per_second": 3.123, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 9.135052848270971e-05, |
| "loss": 3.8205, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_gen_len": 21.5776, |
| "eval_loss": 3.2433691024780273, |
| "eval_rouge1": 13.5371, |
| "eval_rouge2": 4.3639, |
| "eval_rougeL": 13.3551, |
| "eval_rougeLsum": 13.3552, |
| "eval_runtime": 903.4907, |
| "eval_samples_per_second": 5.333, |
| "eval_steps_per_second": 2.666, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 8.962063417925165e-05, |
| "loss": 3.7262, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_gen_len": 21.5548, |
| "eval_loss": 3.1689953804016113, |
| "eval_rouge1": 14.3668, |
| "eval_rouge2": 4.8048, |
| "eval_rougeL": 14.2191, |
| "eval_rougeLsum": 14.1906, |
| "eval_runtime": 870.3487, |
| "eval_samples_per_second": 5.536, |
| "eval_steps_per_second": 2.768, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.78907398757936e-05, |
| "loss": 3.6887, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_gen_len": 20.89, |
| "eval_loss": 3.0656516551971436, |
| "eval_rouge1": 14.3265, |
| "eval_rouge2": 4.436, |
| "eval_rougeL": 14.212, |
| "eval_rougeLsum": 14.205, |
| "eval_runtime": 840.9965, |
| "eval_samples_per_second": 5.729, |
| "eval_steps_per_second": 2.864, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 8.616084557233554e-05, |
| "loss": 3.6337, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.42, |
| "eval_gen_len": 20.3651, |
| "eval_loss": 3.031759262084961, |
| "eval_rouge1": 14.6809, |
| "eval_rouge2": 4.8345, |
| "eval_rougeL": 14.5378, |
| "eval_rougeLsum": 14.5331, |
| "eval_runtime": 836.4852, |
| "eval_samples_per_second": 5.76, |
| "eval_steps_per_second": 2.88, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 8.443095126887747e-05, |
| "loss": 3.5443, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.47, |
| "eval_gen_len": 21.7742, |
| "eval_loss": 3.0553905963897705, |
| "eval_rouge1": 15.3372, |
| "eval_rouge2": 4.9163, |
| "eval_rougeL": 15.1794, |
| "eval_rougeLsum": 15.1781, |
| "eval_runtime": 893.8221, |
| "eval_samples_per_second": 5.39, |
| "eval_steps_per_second": 2.695, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 8.270105696541942e-05, |
| "loss": 3.5203, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_gen_len": 20.8113, |
| "eval_loss": 2.9792585372924805, |
| "eval_rouge1": 14.9278, |
| "eval_rouge2": 4.9656, |
| "eval_rougeL": 14.7491, |
| "eval_rougeLsum": 14.743, |
| "eval_runtime": 848.3297, |
| "eval_samples_per_second": 5.679, |
| "eval_steps_per_second": 2.84, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.097116266196136e-05, |
| "loss": 3.4936, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_gen_len": 23.4274, |
| "eval_loss": 3.0078511238098145, |
| "eval_rouge1": 15.7705, |
| "eval_rouge2": 5.1453, |
| "eval_rougeL": 15.5582, |
| "eval_rougeLsum": 15.5756, |
| "eval_runtime": 944.685, |
| "eval_samples_per_second": 5.1, |
| "eval_steps_per_second": 2.55, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 7.92412683585033e-05, |
| "loss": 3.4592, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_gen_len": 22.7007, |
| "eval_loss": 2.972140312194824, |
| "eval_rouge1": 15.0201, |
| "eval_rouge2": 5.1612, |
| "eval_rougeL": 14.8508, |
| "eval_rougeLsum": 14.8198, |
| "eval_runtime": 914.5833, |
| "eval_samples_per_second": 5.268, |
| "eval_steps_per_second": 2.634, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 7.751137405504524e-05, |
| "loss": 3.377, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_gen_len": 23.4427, |
| "eval_loss": 3.01123309135437, |
| "eval_rouge1": 15.9595, |
| "eval_rouge2": 5.1133, |
| "eval_rougeL": 15.78, |
| "eval_rougeLsum": 15.7774, |
| "eval_runtime": 950.6422, |
| "eval_samples_per_second": 5.068, |
| "eval_steps_per_second": 2.534, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 7.578147975158718e-05, |
| "loss": 3.4158, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.73, |
| "eval_gen_len": 21.6009, |
| "eval_loss": 2.9238853454589844, |
| "eval_rouge1": 14.7984, |
| "eval_rouge2": 5.051, |
| "eval_rougeL": 14.6943, |
| "eval_rougeLsum": 14.6581, |
| "eval_runtime": 878.6968, |
| "eval_samples_per_second": 5.483, |
| "eval_steps_per_second": 2.742, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 7.405158544812913e-05, |
| "loss": 3.378, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.78, |
| "eval_gen_len": 22.0828, |
| "eval_loss": 2.889694929122925, |
| "eval_rouge1": 16.5128, |
| "eval_rouge2": 5.1923, |
| "eval_rougeL": 16.3523, |
| "eval_rougeLsum": 16.3265, |
| "eval_runtime": 902.1756, |
| "eval_samples_per_second": 5.34, |
| "eval_steps_per_second": 2.67, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 7.232169114467106e-05, |
| "loss": 3.3231, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_gen_len": 22.5807, |
| "eval_loss": 2.9346752166748047, |
| "eval_rouge1": 16.9997, |
| "eval_rouge2": 5.5524, |
| "eval_rougeL": 16.8534, |
| "eval_rougeLsum": 16.8737, |
| "eval_runtime": 895.2014, |
| "eval_samples_per_second": 5.382, |
| "eval_steps_per_second": 2.691, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.059179684121301e-05, |
| "loss": 3.3268, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_gen_len": 23.6988, |
| "eval_loss": 2.911571741104126, |
| "eval_rouge1": 16.0261, |
| "eval_rouge2": 5.4226, |
| "eval_rougeL": 15.9234, |
| "eval_rougeLsum": 15.914, |
| "eval_runtime": 962.7416, |
| "eval_samples_per_second": 5.004, |
| "eval_steps_per_second": 2.502, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 6.886190253775494e-05, |
| "loss": 3.3127, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_gen_len": 22.9481, |
| "eval_loss": 2.861004590988159, |
| "eval_rouge1": 16.6255, |
| "eval_rouge2": 5.3554, |
| "eval_rougeL": 16.4729, |
| "eval_rougeLsum": 16.4569, |
| "eval_runtime": 922.0145, |
| "eval_samples_per_second": 5.226, |
| "eval_steps_per_second": 2.613, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.713200823429688e-05, |
| "loss": 3.2664, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.99, |
| "eval_gen_len": 23.4423, |
| "eval_loss": 2.860574722290039, |
| "eval_rouge1": 17.7703, |
| "eval_rouge2": 5.9475, |
| "eval_rougeL": 17.6229, |
| "eval_rougeLsum": 17.6259, |
| "eval_runtime": 936.5594, |
| "eval_samples_per_second": 5.144, |
| "eval_steps_per_second": 2.572, |
| "step": 19000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 6.540211393083882e-05, |
| "loss": 3.1718, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.04, |
| "eval_gen_len": 23.0093, |
| "eval_loss": 2.8764114379882812, |
| "eval_rouge1": 17.301, |
| "eval_rouge2": 5.6262, |
| "eval_rougeL": 17.122, |
| "eval_rougeLsum": 17.1104, |
| "eval_runtime": 908.2265, |
| "eval_samples_per_second": 5.305, |
| "eval_steps_per_second": 2.652, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 6.367221962738077e-05, |
| "loss": 3.0987, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.09, |
| "eval_gen_len": 20.9697, |
| "eval_loss": 2.82820200920105, |
| "eval_rouge1": 16.4718, |
| "eval_rouge2": 5.2077, |
| "eval_rougeL": 16.3394, |
| "eval_rougeLsum": 16.3401, |
| "eval_runtime": 831.2333, |
| "eval_samples_per_second": 5.796, |
| "eval_steps_per_second": 2.898, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 6.19423253239227e-05, |
| "loss": 3.1486, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_gen_len": 22.7291, |
| "eval_loss": 2.823465347290039, |
| "eval_rouge1": 18.5594, |
| "eval_rouge2": 5.9469, |
| "eval_rougeL": 18.3882, |
| "eval_rougeLsum": 18.3799, |
| "eval_runtime": 901.4834, |
| "eval_samples_per_second": 5.345, |
| "eval_steps_per_second": 2.672, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 6.021243102046465e-05, |
| "loss": 3.1435, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_gen_len": 22.9612, |
| "eval_loss": 2.826120615005493, |
| "eval_rouge1": 18.111, |
| "eval_rouge2": 6.0309, |
| "eval_rougeL": 17.9593, |
| "eval_rougeLsum": 17.9613, |
| "eval_runtime": 912.4414, |
| "eval_samples_per_second": 5.28, |
| "eval_steps_per_second": 2.64, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 5.84825367170066e-05, |
| "loss": 3.1049, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.25, |
| "eval_gen_len": 22.5558, |
| "eval_loss": 2.8067939281463623, |
| "eval_rouge1": 17.124, |
| "eval_rouge2": 5.5675, |
| "eval_rougeL": 16.9714, |
| "eval_rougeLsum": 16.9876, |
| "eval_runtime": 903.448, |
| "eval_samples_per_second": 5.333, |
| "eval_steps_per_second": 2.666, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 5.675264241354854e-05, |
| "loss": 3.1357, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_gen_len": 23.0075, |
| "eval_loss": 2.801447629928589, |
| "eval_rouge1": 17.3916, |
| "eval_rouge2": 5.8671, |
| "eval_rougeL": 17.2148, |
| "eval_rougeLsum": 17.2502, |
| "eval_runtime": 917.374, |
| "eval_samples_per_second": 5.252, |
| "eval_steps_per_second": 2.626, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 5.5022748110090474e-05, |
| "loss": 3.0904, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.35, |
| "eval_gen_len": 22.1492, |
| "eval_loss": 2.7790260314941406, |
| "eval_rouge1": 17.419, |
| "eval_rouge2": 5.6689, |
| "eval_rougeL": 17.3125, |
| "eval_rougeLsum": 17.3058, |
| "eval_runtime": 879.4764, |
| "eval_samples_per_second": 5.478, |
| "eval_steps_per_second": 2.739, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 5.3292853806632414e-05, |
| "loss": 3.0877, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.4, |
| "eval_gen_len": 21.7522, |
| "eval_loss": 2.7462034225463867, |
| "eval_rouge1": 17.0605, |
| "eval_rouge2": 5.4735, |
| "eval_rougeL": 16.9414, |
| "eval_rougeLsum": 16.9378, |
| "eval_runtime": 878.5335, |
| "eval_samples_per_second": 5.484, |
| "eval_steps_per_second": 2.742, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 5.1562959503174354e-05, |
| "loss": 3.0694, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_gen_len": 23.2005, |
| "eval_loss": 2.75631046295166, |
| "eval_rouge1": 17.752, |
| "eval_rouge2": 5.8889, |
| "eval_rougeL": 17.5967, |
| "eval_rougeLsum": 17.619, |
| "eval_runtime": 928.0873, |
| "eval_samples_per_second": 5.191, |
| "eval_steps_per_second": 2.596, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 4.98330651997163e-05, |
| "loss": 3.0498, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.51, |
| "eval_gen_len": 21.9369, |
| "eval_loss": 2.752108335494995, |
| "eval_rouge1": 17.9056, |
| "eval_rouge2": 5.7754, |
| "eval_rougeL": 17.7624, |
| "eval_rougeLsum": 17.7836, |
| "eval_runtime": 872.1773, |
| "eval_samples_per_second": 5.524, |
| "eval_steps_per_second": 2.762, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 4.810317089625824e-05, |
| "loss": 3.0566, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.56, |
| "eval_gen_len": 22.2358, |
| "eval_loss": 2.7468161582946777, |
| "eval_rouge1": 18.6531, |
| "eval_rouge2": 6.0538, |
| "eval_rougeL": 18.5397, |
| "eval_rougeLsum": 18.5038, |
| "eval_runtime": 878.1686, |
| "eval_samples_per_second": 5.486, |
| "eval_steps_per_second": 2.743, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 4.637327659280018e-05, |
| "loss": 3.0489, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.61, |
| "eval_gen_len": 22.0108, |
| "eval_loss": 2.7450203895568848, |
| "eval_rouge1": 18.4869, |
| "eval_rouge2": 5.9297, |
| "eval_rougeL": 18.3139, |
| "eval_rougeLsum": 18.3169, |
| "eval_runtime": 856.3376, |
| "eval_samples_per_second": 5.626, |
| "eval_steps_per_second": 2.813, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 4.464338228934212e-05, |
| "loss": 3.0247, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.66, |
| "eval_gen_len": 22.2071, |
| "eval_loss": 2.744947671890259, |
| "eval_rouge1": 18.5192, |
| "eval_rouge2": 5.9966, |
| "eval_rougeL": 18.3721, |
| "eval_rougeLsum": 18.3569, |
| "eval_runtime": 887.9355, |
| "eval_samples_per_second": 5.426, |
| "eval_steps_per_second": 2.713, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 4.291348798588406e-05, |
| "loss": 2.9877, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.71, |
| "eval_gen_len": 21.4595, |
| "eval_loss": 2.7159626483917236, |
| "eval_rouge1": 18.1655, |
| "eval_rouge2": 5.9294, |
| "eval_rougeL": 18.0304, |
| "eval_rougeLsum": 18.0836, |
| "eval_runtime": 847.8313, |
| "eval_samples_per_second": 5.683, |
| "eval_steps_per_second": 2.841, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 4.118359368242601e-05, |
| "loss": 3.0383, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_gen_len": 22.9732, |
| "eval_loss": 2.720228433609009, |
| "eval_rouge1": 18.4959, |
| "eval_rouge2": 6.2413, |
| "eval_rougeL": 18.3363, |
| "eval_rougeLsum": 18.3431, |
| "eval_runtime": 911.6184, |
| "eval_samples_per_second": 5.285, |
| "eval_steps_per_second": 2.643, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 3.945369937896794e-05, |
| "loss": 3.041, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.82, |
| "eval_gen_len": 21.9435, |
| "eval_loss": 2.6947648525238037, |
| "eval_rouge1": 17.5306, |
| "eval_rouge2": 5.8119, |
| "eval_rougeL": 17.4011, |
| "eval_rougeLsum": 17.4149, |
| "eval_runtime": 881.522, |
| "eval_samples_per_second": 5.466, |
| "eval_steps_per_second": 2.733, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 3.772380507550989e-05, |
| "loss": 2.9285, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.87, |
| "eval_gen_len": 22.5174, |
| "eval_loss": 2.6956820487976074, |
| "eval_rouge1": 18.6418, |
| "eval_rouge2": 6.1394, |
| "eval_rougeL": 18.514, |
| "eval_rougeLsum": 18.4823, |
| "eval_runtime": 891.2207, |
| "eval_samples_per_second": 5.406, |
| "eval_steps_per_second": 2.703, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 3.599391077205183e-05, |
| "loss": 3.0556, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.92, |
| "eval_gen_len": 22.9315, |
| "eval_loss": 2.7000110149383545, |
| "eval_rouge1": 18.7387, |
| "eval_rouge2": 6.0585, |
| "eval_rougeL": 18.5761, |
| "eval_rougeLsum": 18.574, |
| "eval_runtime": 896.5509, |
| "eval_samples_per_second": 5.374, |
| "eval_steps_per_second": 2.687, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 3.426401646859377e-05, |
| "loss": 3.0033, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.97, |
| "eval_gen_len": 22.4726, |
| "eval_loss": 2.697437047958374, |
| "eval_rouge1": 17.9387, |
| "eval_rouge2": 6.1387, |
| "eval_rougeL": 17.8271, |
| "eval_rougeLsum": 17.8111, |
| "eval_runtime": 892.312, |
| "eval_samples_per_second": 5.399, |
| "eval_steps_per_second": 2.7, |
| "step": 38000 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 3.253412216513571e-05, |
| "loss": 2.9207, |
| "step": 39000 |
| }, |
| { |
| "epoch": 2.02, |
| "eval_gen_len": 23.0274, |
| "eval_loss": 2.699842929840088, |
| "eval_rouge1": 18.6073, |
| "eval_rouge2": 6.1906, |
| "eval_rougeL": 18.3891, |
| "eval_rougeLsum": 18.4103, |
| "eval_runtime": 911.8188, |
| "eval_samples_per_second": 5.284, |
| "eval_steps_per_second": 2.642, |
| "step": 39000 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 3.080422786167765e-05, |
| "loss": 2.8922, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.08, |
| "eval_gen_len": 22.0697, |
| "eval_loss": 2.67978572845459, |
| "eval_rouge1": 18.4017, |
| "eval_rouge2": 6.2244, |
| "eval_rougeL": 18.2321, |
| "eval_rougeLsum": 18.2296, |
| "eval_runtime": 869.5208, |
| "eval_samples_per_second": 5.541, |
| "eval_steps_per_second": 2.77, |
| "step": 40000 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 2.9074333558219595e-05, |
| "loss": 2.8938, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.13, |
| "eval_gen_len": 21.7017, |
| "eval_loss": 2.666600227355957, |
| "eval_rouge1": 18.8016, |
| "eval_rouge2": 6.2066, |
| "eval_rougeL": 18.6411, |
| "eval_rougeLsum": 18.6353, |
| "eval_runtime": 845.165, |
| "eval_samples_per_second": 5.701, |
| "eval_steps_per_second": 2.85, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 2.7344439254761532e-05, |
| "loss": 2.9124, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.18, |
| "eval_gen_len": 21.4303, |
| "eval_loss": 2.6605563163757324, |
| "eval_rouge1": 18.7544, |
| "eval_rouge2": 6.3533, |
| "eval_rougeL": 18.5923, |
| "eval_rougeLsum": 18.5739, |
| "eval_runtime": 843.6756, |
| "eval_samples_per_second": 5.711, |
| "eval_steps_per_second": 2.855, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 2.5614544951303476e-05, |
| "loss": 2.8597, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.23, |
| "eval_gen_len": 22.3352, |
| "eval_loss": 2.694676399230957, |
| "eval_rouge1": 18.8672, |
| "eval_rouge2": 6.4526, |
| "eval_rougeL": 18.7416, |
| "eval_rougeLsum": 18.7482, |
| "eval_runtime": 884.9439, |
| "eval_samples_per_second": 5.444, |
| "eval_steps_per_second": 2.722, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.388465064784542e-05, |
| "loss": 2.8435, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.28, |
| "eval_gen_len": 21.9081, |
| "eval_loss": 2.6738336086273193, |
| "eval_rouge1": 18.9405, |
| "eval_rouge2": 6.356, |
| "eval_rougeL": 18.7791, |
| "eval_rougeLsum": 18.7729, |
| "eval_runtime": 862.6512, |
| "eval_samples_per_second": 5.585, |
| "eval_steps_per_second": 2.793, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 2.215475634438736e-05, |
| "loss": 2.8672, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.34, |
| "eval_gen_len": 21.8869, |
| "eval_loss": 2.6733603477478027, |
| "eval_rouge1": 18.7509, |
| "eval_rouge2": 6.3991, |
| "eval_rougeL": 18.6175, |
| "eval_rougeLsum": 18.5828, |
| "eval_runtime": 863.0328, |
| "eval_samples_per_second": 5.583, |
| "eval_steps_per_second": 2.791, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 2.04248620409293e-05, |
| "loss": 2.899, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.39, |
| "eval_gen_len": 21.7694, |
| "eval_loss": 2.6575164794921875, |
| "eval_rouge1": 18.5529, |
| "eval_rouge2": 6.3489, |
| "eval_rougeL": 18.4139, |
| "eval_rougeLsum": 18.401, |
| "eval_runtime": 860.1836, |
| "eval_samples_per_second": 5.601, |
| "eval_steps_per_second": 2.801, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.869496773747124e-05, |
| "loss": 2.8616, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.44, |
| "eval_gen_len": 21.5685, |
| "eval_loss": 2.6484768390655518, |
| "eval_rouge1": 18.7563, |
| "eval_rouge2": 6.268, |
| "eval_rougeL": 18.6368, |
| "eval_rougeLsum": 18.6253, |
| "eval_runtime": 854.7636, |
| "eval_samples_per_second": 5.637, |
| "eval_steps_per_second": 2.818, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 1.696507343401318e-05, |
| "loss": 2.8937, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.49, |
| "eval_gen_len": 22.3337, |
| "eval_loss": 2.648592472076416, |
| "eval_rouge1": 18.6525, |
| "eval_rouge2": 6.3426, |
| "eval_rougeL": 18.5184, |
| "eval_rougeLsum": 18.5129, |
| "eval_runtime": 882.9047, |
| "eval_samples_per_second": 5.457, |
| "eval_steps_per_second": 2.728, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 1.5235179130555125e-05, |
| "loss": 2.8446, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.54, |
| "eval_gen_len": 22.3331, |
| "eval_loss": 2.657186508178711, |
| "eval_rouge1": 18.6529, |
| "eval_rouge2": 6.2655, |
| "eval_rougeL": 18.4915, |
| "eval_rougeLsum": 18.4764, |
| "eval_runtime": 873.4651, |
| "eval_samples_per_second": 5.516, |
| "eval_steps_per_second": 2.758, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 1.3505284827097065e-05, |
| "loss": 2.8676, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.59, |
| "eval_gen_len": 22.132, |
| "eval_loss": 2.6608123779296875, |
| "eval_rouge1": 19.0913, |
| "eval_rouge2": 6.494, |
| "eval_rougeL": 18.929, |
| "eval_rougeLsum": 18.9233, |
| "eval_runtime": 867.8739, |
| "eval_samples_per_second": 5.551, |
| "eval_steps_per_second": 2.776, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 1.1775390523639005e-05, |
| "loss": 2.8794, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.65, |
| "eval_gen_len": 22.2414, |
| "eval_loss": 2.6582980155944824, |
| "eval_rouge1": 18.7648, |
| "eval_rouge2": 6.459, |
| "eval_rougeL": 18.6276, |
| "eval_rougeLsum": 18.6125, |
| "eval_runtime": 877.0489, |
| "eval_samples_per_second": 5.493, |
| "eval_steps_per_second": 2.747, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 1.0045496220180947e-05, |
| "loss": 2.8836, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.7, |
| "eval_gen_len": 22.2551, |
| "eval_loss": 2.6512138843536377, |
| "eval_rouge1": 18.7243, |
| "eval_rouge2": 6.3865, |
| "eval_rougeL": 18.5848, |
| "eval_rougeLsum": 18.5763, |
| "eval_runtime": 879.8072, |
| "eval_samples_per_second": 5.476, |
| "eval_steps_per_second": 2.738, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 8.315601916722888e-06, |
| "loss": 2.8174, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.75, |
| "eval_gen_len": 22.1243, |
| "eval_loss": 2.640946865081787, |
| "eval_rouge1": 18.9393, |
| "eval_rouge2": 6.3914, |
| "eval_rougeL": 18.7733, |
| "eval_rougeLsum": 18.7715, |
| "eval_runtime": 881.5366, |
| "eval_samples_per_second": 5.465, |
| "eval_steps_per_second": 2.733, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 6.58570761326483e-06, |
| "loss": 2.8494, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.8, |
| "eval_gen_len": 21.7638, |
| "eval_loss": 2.639634132385254, |
| "eval_rouge1": 18.6126, |
| "eval_rouge2": 6.4389, |
| "eval_rougeL": 18.4673, |
| "eval_rougeLsum": 18.4516, |
| "eval_runtime": 860.8517, |
| "eval_samples_per_second": 5.597, |
| "eval_steps_per_second": 2.798, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 4.855813309806771e-06, |
| "loss": 2.9025, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.85, |
| "eval_gen_len": 22.1086, |
| "eval_loss": 2.63411021232605, |
| "eval_rouge1": 18.7681, |
| "eval_rouge2": 6.3762, |
| "eval_rougeL": 18.6081, |
| "eval_rougeLsum": 18.6173, |
| "eval_runtime": 872.623, |
| "eval_samples_per_second": 5.521, |
| "eval_steps_per_second": 2.761, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 3.1259190063487117e-06, |
| "loss": 2.8754, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.91, |
| "eval_gen_len": 22.3497, |
| "eval_loss": 2.638812780380249, |
| "eval_rouge1": 19.0828, |
| "eval_rouge2": 6.5203, |
| "eval_rougeL": 18.9334, |
| "eval_rougeLsum": 18.9285, |
| "eval_runtime": 879.763, |
| "eval_samples_per_second": 5.476, |
| "eval_steps_per_second": 2.738, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 1.3960247028906535e-06, |
| "loss": 2.8489, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.96, |
| "eval_gen_len": 21.9321, |
| "eval_loss": 2.637495756149292, |
| "eval_rouge1": 18.9219, |
| "eval_rouge2": 6.4922, |
| "eval_rougeL": 18.763, |
| "eval_rougeLsum": 18.7437, |
| "eval_runtime": 865.2523, |
| "eval_samples_per_second": 5.568, |
| "eval_steps_per_second": 2.784, |
| "step": 57000 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 57807, |
| "total_flos": 8.129568206380646e+16, |
| "train_loss": 3.210809704903007, |
| "train_runtime": 55943.4873, |
| "train_samples_per_second": 2.067, |
| "train_steps_per_second": 1.033 |
| } |
| ], |
| "max_steps": 57807, |
| "num_train_epochs": 3, |
| "total_flos": 8.129568206380646e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|