| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 17.770034843205575, | |
| "global_step": 5100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 2.4902343750000002e-05, | |
| "loss": 3.791, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_average_rogue": 0.2125, | |
| "eval_loss": 3.328416347503662, | |
| "eval_rouge1_fmeasure": 0.3288, | |
| "eval_rouge1_precision": 0.3115, | |
| "eval_rouge1_recall": 0.3637, | |
| "eval_rouge2_fmeasure": 0.06, | |
| "eval_rouge2_precision": 0.0569, | |
| "eval_rouge2_recall": 0.0664, | |
| "eval_rougeL_fmeasure": 0.145, | |
| "eval_rougeL_precision": 0.1366, | |
| "eval_rougeL_recall": 0.1631, | |
| "eval_rougeLsum_fmeasure": 0.3161, | |
| "eval_rougeLsum_precision": 0.2997, | |
| "eval_rougeLsum_recall": 0.3496, | |
| "eval_runtime": 3350.2964, | |
| "eval_samples_per_second": 0.038, | |
| "eval_steps_per_second": 0.038, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 4.9804687500000004e-05, | |
| "loss": 3.3793, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_average_rogue": 0.2109, | |
| "eval_loss": 3.268017292022705, | |
| "eval_rouge1_fmeasure": 0.3285, | |
| "eval_rouge1_precision": 0.3068, | |
| "eval_rouge1_recall": 0.3699, | |
| "eval_rouge2_fmeasure": 0.0571, | |
| "eval_rouge2_precision": 0.053, | |
| "eval_rouge2_recall": 0.065, | |
| "eval_rougeL_fmeasure": 0.1417, | |
| "eval_rougeL_precision": 0.1315, | |
| "eval_rougeL_recall": 0.1631, | |
| "eval_rougeLsum_fmeasure": 0.3163, | |
| "eval_rougeLsum_precision": 0.2954, | |
| "eval_rougeLsum_recall": 0.356, | |
| "eval_runtime": 3338.0119, | |
| "eval_samples_per_second": 0.038, | |
| "eval_steps_per_second": 0.038, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 4.758033664881408e-05, | |
| "loss": 3.1826, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_average_rogue": 0.214, | |
| "eval_loss": 3.245297431945801, | |
| "eval_rouge1_fmeasure": 0.3293, | |
| "eval_rouge1_precision": 0.3099, | |
| "eval_rouge1_recall": 0.3668, | |
| "eval_rouge2_fmeasure": 0.0644, | |
| "eval_rouge2_precision": 0.0605, | |
| "eval_rouge2_recall": 0.0717, | |
| "eval_rougeL_fmeasure": 0.145, | |
| "eval_rougeL_precision": 0.1356, | |
| "eval_rougeL_recall": 0.1647, | |
| "eval_rougeLsum_fmeasure": 0.3173, | |
| "eval_rougeLsum_precision": 0.2986, | |
| "eval_rougeLsum_recall": 0.3531, | |
| "eval_runtime": 3330.8537, | |
| "eval_samples_per_second": 0.038, | |
| "eval_steps_per_second": 0.038, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 4.5141545524101e-05, | |
| "loss": 3.0133, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_average_rogue": 0.1948, | |
| "eval_loss": 3.2580819129943848, | |
| "eval_rouge1_fmeasure": 0.2983, | |
| "eval_rouge1_precision": 0.2893, | |
| "eval_rouge1_recall": 0.3239, | |
| "eval_rouge2_fmeasure": 0.0545, | |
| "eval_rouge2_precision": 0.0527, | |
| "eval_rouge2_recall": 0.0591, | |
| "eval_rougeL_fmeasure": 0.1397, | |
| "eval_rougeL_precision": 0.1356, | |
| "eval_rougeL_recall": 0.1534, | |
| "eval_rougeLsum_fmeasure": 0.2865, | |
| "eval_rougeLsum_precision": 0.2776, | |
| "eval_rougeLsum_recall": 0.3105, | |
| "eval_runtime": 3289.5263, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 4.2702754399387915e-05, | |
| "loss": 2.8569, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "eval_average_rogue": 0.2256, | |
| "eval_loss": 3.2716007232666016, | |
| "eval_rouge1_fmeasure": 0.3491, | |
| "eval_rouge1_precision": 0.3326, | |
| "eval_rouge1_recall": 0.3835, | |
| "eval_rouge2_fmeasure": 0.0671, | |
| "eval_rouge2_precision": 0.0641, | |
| "eval_rouge2_recall": 0.0732, | |
| "eval_rougeL_fmeasure": 0.1516, | |
| "eval_rougeL_precision": 0.1436, | |
| "eval_rougeL_recall": 0.1701, | |
| "eval_rougeLsum_fmeasure": 0.3346, | |
| "eval_rougeLsum_precision": 0.3189, | |
| "eval_rougeLsum_recall": 0.368, | |
| "eval_runtime": 3297.3203, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 4.026396327467483e-05, | |
| "loss": 2.7008, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "eval_average_rogue": 0.2285, | |
| "eval_loss": 3.313244104385376, | |
| "eval_rouge1_fmeasure": 0.3531, | |
| "eval_rouge1_precision": 0.3326, | |
| "eval_rouge1_recall": 0.3925, | |
| "eval_rouge2_fmeasure": 0.0693, | |
| "eval_rouge2_precision": 0.0653, | |
| "eval_rouge2_recall": 0.0772, | |
| "eval_rougeL_fmeasure": 0.1521, | |
| "eval_rougeL_precision": 0.1427, | |
| "eval_rougeL_recall": 0.1725, | |
| "eval_rougeLsum_fmeasure": 0.3396, | |
| "eval_rougeLsum_precision": 0.3202, | |
| "eval_rougeLsum_recall": 0.3776, | |
| "eval_runtime": 3297.2909, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 3.7825172149961744e-05, | |
| "loss": 2.5657, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "eval_average_rogue": 0.2241, | |
| "eval_loss": 3.349586009979248, | |
| "eval_rouge1_fmeasure": 0.3473, | |
| "eval_rouge1_precision": 0.3291, | |
| "eval_rouge1_recall": 0.385, | |
| "eval_rouge2_fmeasure": 0.0658, | |
| "eval_rouge2_precision": 0.0622, | |
| "eval_rouge2_recall": 0.0731, | |
| "eval_rougeL_fmeasure": 0.1504, | |
| "eval_rougeL_precision": 0.1416, | |
| "eval_rougeL_recall": 0.17, | |
| "eval_rougeLsum_fmeasure": 0.3326, | |
| "eval_rougeLsum_precision": 0.3154, | |
| "eval_rougeLsum_recall": 0.3683, | |
| "eval_runtime": 3292.4741, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "learning_rate": 3.538638102524866e-05, | |
| "loss": 2.4327, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 7.11, | |
| "eval_average_rogue": 0.2298, | |
| "eval_loss": 3.437542200088501, | |
| "eval_rouge1_fmeasure": 0.3551, | |
| "eval_rouge1_precision": 0.3347, | |
| "eval_rouge1_recall": 0.3946, | |
| "eval_rouge2_fmeasure": 0.0703, | |
| "eval_rouge2_precision": 0.0664, | |
| "eval_rouge2_recall": 0.0778, | |
| "eval_rougeL_fmeasure": 0.1527, | |
| "eval_rougeL_precision": 0.1433, | |
| "eval_rougeL_recall": 0.1732, | |
| "eval_rougeLsum_fmeasure": 0.341, | |
| "eval_rougeLsum_precision": 0.3218, | |
| "eval_rougeLsum_recall": 0.3788, | |
| "eval_runtime": 3284.3798, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 3.294758990053558e-05, | |
| "loss": 2.3352, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_average_rogue": 0.2284, | |
| "eval_loss": 3.4355413913726807, | |
| "eval_rouge1_fmeasure": 0.3551, | |
| "eval_rouge1_precision": 0.3334, | |
| "eval_rouge1_recall": 0.397, | |
| "eval_rouge2_fmeasure": 0.0682, | |
| "eval_rouge2_precision": 0.064, | |
| "eval_rouge2_recall": 0.0762, | |
| "eval_rougeL_fmeasure": 0.1507, | |
| "eval_rougeL_precision": 0.1406, | |
| "eval_rougeL_recall": 0.1721, | |
| "eval_rougeLsum_fmeasure": 0.3396, | |
| "eval_rougeLsum_precision": 0.3189, | |
| "eval_rougeLsum_recall": 0.3797, | |
| "eval_runtime": 3319.5759, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 3.0508798775822494e-05, | |
| "loss": 2.2068, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "eval_average_rogue": 0.2319, | |
| "eval_loss": 3.517580032348633, | |
| "eval_rouge1_fmeasure": 0.3593, | |
| "eval_rouge1_precision": 0.3378, | |
| "eval_rouge1_recall": 0.4005, | |
| "eval_rouge2_fmeasure": 0.0711, | |
| "eval_rouge2_precision": 0.0669, | |
| "eval_rouge2_recall": 0.0793, | |
| "eval_rougeL_fmeasure": 0.153, | |
| "eval_rougeL_precision": 0.1431, | |
| "eval_rougeL_recall": 0.1738, | |
| "eval_rougeLsum_fmeasure": 0.3441, | |
| "eval_rougeLsum_precision": 0.3239, | |
| "eval_rougeLsum_recall": 0.383, | |
| "eval_runtime": 3268.2597, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "learning_rate": 2.8070007651109415e-05, | |
| "loss": 2.1177, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 9.77, | |
| "eval_average_rogue": 0.2322, | |
| "eval_loss": 3.5942444801330566, | |
| "eval_rouge1_fmeasure": 0.3615, | |
| "eval_rouge1_precision": 0.3407, | |
| "eval_rouge1_recall": 0.402, | |
| "eval_rouge2_fmeasure": 0.0691, | |
| "eval_rouge2_precision": 0.0652, | |
| "eval_rouge2_recall": 0.0771, | |
| "eval_rougeL_fmeasure": 0.1516, | |
| "eval_rougeL_precision": 0.1422, | |
| "eval_rougeL_recall": 0.1722, | |
| "eval_rougeLsum_fmeasure": 0.3465, | |
| "eval_rougeLsum_precision": 0.3267, | |
| "eval_rougeLsum_recall": 0.3853, | |
| "eval_runtime": 3329.9183, | |
| "eval_samples_per_second": 0.038, | |
| "eval_steps_per_second": 0.038, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "learning_rate": 2.563121652639633e-05, | |
| "loss": 2.0452, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 10.66, | |
| "eval_average_rogue": 0.2329, | |
| "eval_loss": 3.618927001953125, | |
| "eval_rouge1_fmeasure": 0.3627, | |
| "eval_rouge1_precision": 0.3451, | |
| "eval_rouge1_recall": 0.3992, | |
| "eval_rouge2_fmeasure": 0.0688, | |
| "eval_rouge2_precision": 0.0655, | |
| "eval_rouge2_recall": 0.0756, | |
| "eval_rougeL_fmeasure": 0.152, | |
| "eval_rougeL_precision": 0.1437, | |
| "eval_rougeL_recall": 0.1712, | |
| "eval_rougeLsum_fmeasure": 0.3482, | |
| "eval_rougeLsum_precision": 0.3317, | |
| "eval_rougeLsum_recall": 0.383, | |
| "eval_runtime": 3288.5395, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "learning_rate": 2.3192425401683247e-05, | |
| "loss": 1.9276, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "eval_average_rogue": 0.235, | |
| "eval_loss": 3.685786247253418, | |
| "eval_rouge1_fmeasure": 0.3662, | |
| "eval_rouge1_precision": 0.3468, | |
| "eval_rouge1_recall": 0.4051, | |
| "eval_rouge2_fmeasure": 0.0706, | |
| "eval_rouge2_precision": 0.0668, | |
| "eval_rouge2_recall": 0.0788, | |
| "eval_rougeL_fmeasure": 0.1525, | |
| "eval_rougeL_precision": 0.1437, | |
| "eval_rougeL_recall": 0.1723, | |
| "eval_rougeLsum_fmeasure": 0.3508, | |
| "eval_rougeLsum_precision": 0.3328, | |
| "eval_rougeLsum_recall": 0.3883, | |
| "eval_runtime": 3283.2817, | |
| "eval_samples_per_second": 0.039, | |
| "eval_steps_per_second": 0.039, | |
| "step": 3315 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "learning_rate": 2.0753634276970162e-05, | |
| "loss": 1.9006, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "eval_average_rogue": 0.2331, | |
| "eval_loss": 3.7175817489624023, | |
| "eval_rouge1_fmeasure": 0.3627, | |
| "eval_rouge1_precision": 0.3429, | |
| "eval_rouge1_recall": 0.4025, | |
| "eval_rouge2_fmeasure": 0.0698, | |
| "eval_rouge2_precision": 0.0659, | |
| "eval_rouge2_recall": 0.0783, | |
| "eval_rougeL_fmeasure": 0.152, | |
| "eval_rougeL_precision": 0.1429, | |
| "eval_rougeL_recall": 0.1722, | |
| "eval_rougeLsum_fmeasure": 0.3477, | |
| "eval_rougeLsum_precision": 0.329, | |
| "eval_rougeLsum_recall": 0.386, | |
| "eval_runtime": 3384.322, | |
| "eval_samples_per_second": 0.038, | |
| "eval_steps_per_second": 0.038, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 1.8314843152257076e-05, | |
| "loss": 1.8247, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "eval_average_rogue": 0.2358, | |
| "eval_loss": 3.7242326736450195, | |
| "eval_rouge1_fmeasure": 0.3679, | |
| "eval_rouge1_precision": 0.3481, | |
| "eval_rouge1_recall": 0.4077, | |
| "eval_rouge2_fmeasure": 0.0698, | |
| "eval_rouge2_precision": 0.066, | |
| "eval_rouge2_recall": 0.0773, | |
| "eval_rougeL_fmeasure": 0.1537, | |
| "eval_rougeL_precision": 0.1444, | |
| "eval_rougeL_recall": 0.1741, | |
| "eval_rougeLsum_fmeasure": 0.3517, | |
| "eval_rougeLsum_precision": 0.3323, | |
| "eval_rougeLsum_recall": 0.3894, | |
| "eval_runtime": 3438.3144, | |
| "eval_samples_per_second": 0.037, | |
| "eval_steps_per_second": 0.037, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 1.5876052027543994e-05, | |
| "loss": 1.7352, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "eval_average_rogue": 0.2336, | |
| "eval_loss": 3.790210723876953, | |
| "eval_rouge1_fmeasure": 0.3648, | |
| "eval_rouge1_precision": 0.3457, | |
| "eval_rouge1_recall": 0.4025, | |
| "eval_rouge2_fmeasure": 0.0702, | |
| "eval_rouge2_precision": 0.0667, | |
| "eval_rouge2_recall": 0.0776, | |
| "eval_rougeL_fmeasure": 0.1508, | |
| "eval_rougeL_precision": 0.1422, | |
| "eval_rougeL_recall": 0.1702, | |
| "eval_rougeLsum_fmeasure": 0.3486, | |
| "eval_rougeLsum_precision": 0.3307, | |
| "eval_rougeLsum_recall": 0.3849, | |
| "eval_runtime": 3429.0398, | |
| "eval_samples_per_second": 0.037, | |
| "eval_steps_per_second": 0.037, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "learning_rate": 1.3437260902830912e-05, | |
| "loss": 1.7091, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 15.1, | |
| "eval_average_rogue": 0.2351, | |
| "eval_loss": 3.8391542434692383, | |
| "eval_rouge1_fmeasure": 0.3664, | |
| "eval_rouge1_precision": 0.346, | |
| "eval_rouge1_recall": 0.4069, | |
| "eval_rouge2_fmeasure": 0.0706, | |
| "eval_rouge2_precision": 0.0666, | |
| "eval_rouge2_recall": 0.0786, | |
| "eval_rougeL_fmeasure": 0.1527, | |
| "eval_rougeL_precision": 0.1435, | |
| "eval_rougeL_recall": 0.1728, | |
| "eval_rougeLsum_fmeasure": 0.3509, | |
| "eval_rougeLsum_precision": 0.331, | |
| "eval_rougeLsum_recall": 0.3892, | |
| "eval_runtime": 3445.0892, | |
| "eval_samples_per_second": 0.037, | |
| "eval_steps_per_second": 0.037, | |
| "step": 4335 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "learning_rate": 1.0998469778117827e-05, | |
| "loss": 1.654, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "eval_average_rogue": 0.2347, | |
| "eval_loss": 3.8251237869262695, | |
| "eval_rouge1_fmeasure": 0.3674, | |
| "eval_rouge1_precision": 0.3475, | |
| "eval_rouge1_recall": 0.4065, | |
| "eval_rouge2_fmeasure": 0.0692, | |
| "eval_rouge2_precision": 0.0655, | |
| "eval_rouge2_recall": 0.0767, | |
| "eval_rougeL_fmeasure": 0.1515, | |
| "eval_rougeL_precision": 0.1425, | |
| "eval_rougeL_recall": 0.1713, | |
| "eval_rougeLsum_fmeasure": 0.3508, | |
| "eval_rougeLsum_precision": 0.3318, | |
| "eval_rougeLsum_recall": 0.3883, | |
| "eval_runtime": 3431.7145, | |
| "eval_samples_per_second": 0.037, | |
| "eval_steps_per_second": 0.037, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 16.88, | |
| "learning_rate": 8.559678653404744e-06, | |
| "loss": 1.6034, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 16.88, | |
| "eval_average_rogue": 0.2342, | |
| "eval_loss": 3.8599014282226562, | |
| "eval_rouge1_fmeasure": 0.3653, | |
| "eval_rouge1_precision": 0.3449, | |
| "eval_rouge1_recall": 0.4056, | |
| "eval_rouge2_fmeasure": 0.0694, | |
| "eval_rouge2_precision": 0.0655, | |
| "eval_rouge2_recall": 0.0771, | |
| "eval_rougeL_fmeasure": 0.1531, | |
| "eval_rougeL_precision": 0.1438, | |
| "eval_rougeL_recall": 0.1738, | |
| "eval_rougeLsum_fmeasure": 0.3491, | |
| "eval_rougeLsum_precision": 0.3295, | |
| "eval_rougeLsum_recall": 0.3877, | |
| "eval_runtime": 3434.4477, | |
| "eval_samples_per_second": 0.037, | |
| "eval_steps_per_second": 0.037, | |
| "step": 4845 | |
| }, | |
| { | |
| "epoch": 17.77, | |
| "learning_rate": 6.120887528691661e-06, | |
| "loss": 1.5801, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 17.77, | |
| "eval_average_rogue": 0.2336, | |
| "eval_loss": 3.8798491954803467, | |
| "eval_rouge1_fmeasure": 0.3647, | |
| "eval_rouge1_precision": 0.3432, | |
| "eval_rouge1_recall": 0.4057, | |
| "eval_rouge2_fmeasure": 0.0682, | |
| "eval_rouge2_precision": 0.0643, | |
| "eval_rouge2_recall": 0.0758, | |
| "eval_rougeL_fmeasure": 0.1523, | |
| "eval_rougeL_precision": 0.1426, | |
| "eval_rougeL_recall": 0.1732, | |
| "eval_rougeLsum_fmeasure": 0.3491, | |
| "eval_rougeLsum_precision": 0.329, | |
| "eval_rougeLsum_recall": 0.3882, | |
| "eval_runtime": 3446.3776, | |
| "eval_samples_per_second": 0.037, | |
| "eval_steps_per_second": 0.037, | |
| "step": 5100 | |
| } | |
| ], | |
| "max_steps": 5740, | |
| "num_train_epochs": 20, | |
| "total_flos": 1.37710446575616e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |