| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.78531558608845, | |
| "eval_steps": 500, | |
| "global_step": 720, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.0001, | |
| "loss": 8.9608, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_bleu": 0.2352, | |
| "eval_bp": 0.828, | |
| "eval_counts_1": 2306, | |
| "eval_counts_2": 50, | |
| "eval_counts_3": 12, | |
| "eval_counts_4": 2, | |
| "eval_exact_match": 0.0, | |
| "eval_f1": 0.0092, | |
| "eval_gen_len": 3.1969, | |
| "eval_loss": 2.8882896900177, | |
| "eval_precisions_1": 12.9, | |
| "eval_precisions_2": 0.319, | |
| "eval_precisions_3": 0.0891, | |
| "eval_precisions_4": 0.0178, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.0081, | |
| "eval_rouge2": 0.0022, | |
| "eval_rougeL": 0.0078, | |
| "eval_rougeLsum": 0.0078, | |
| "eval_runtime": 386.3015, | |
| "eval_samples_per_second": 5.705, | |
| "eval_steps_per_second": 1.426, | |
| "eval_sys_len": 17876, | |
| "eval_totals_1": 17876, | |
| "eval_totals_2": 15672, | |
| "eval_totals_3": 13468, | |
| "eval_totals_4": 11264, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 0.0001, | |
| "loss": 3.2364, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_bleu": 6.7083, | |
| "eval_bp": 0.9954, | |
| "eval_counts_1": 6125, | |
| "eval_counts_2": 1727, | |
| "eval_counts_3": 687, | |
| "eval_counts_4": 277, | |
| "eval_exact_match": 0.0018, | |
| "eval_f1": 0.2514, | |
| "eval_gen_len": 11.8072, | |
| "eval_loss": 1.9241770505905151, | |
| "eval_precisions_1": 28.9571, | |
| "eval_precisions_2": 9.1144, | |
| "eval_precisions_3": 4.103, | |
| "eval_precisions_4": 1.9051, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.2457, | |
| "eval_rouge2": 0.1026, | |
| "eval_rougeL": 0.2345, | |
| "eval_rougeLsum": 0.2346, | |
| "eval_runtime": 440.0537, | |
| "eval_samples_per_second": 5.008, | |
| "eval_steps_per_second": 1.252, | |
| "eval_sys_len": 21152, | |
| "eval_totals_1": 21152, | |
| "eval_totals_2": 18948, | |
| "eval_totals_3": 16744, | |
| "eval_totals_4": 14540, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0001, | |
| "loss": 2.4963, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 9.1493, | |
| "eval_bp": 0.752, | |
| "eval_counts_1": 6903, | |
| "eval_counts_2": 2271, | |
| "eval_counts_3": 975, | |
| "eval_counts_4": 409, | |
| "eval_exact_match": 0.01, | |
| "eval_f1": 0.2909, | |
| "eval_gen_len": 12.176, | |
| "eval_loss": 1.6558014154434204, | |
| "eval_precisions_1": 41.7428, | |
| "eval_precisions_2": 15.8446, | |
| "eval_precisions_3": 8.0386, | |
| "eval_precisions_4": 4.1209, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.2966, | |
| "eval_rouge2": 0.1415, | |
| "eval_rougeL": 0.2854, | |
| "eval_rougeLsum": 0.2852, | |
| "eval_runtime": 434.1741, | |
| "eval_samples_per_second": 5.076, | |
| "eval_steps_per_second": 1.269, | |
| "eval_sys_len": 16537, | |
| "eval_totals_1": 16537, | |
| "eval_totals_2": 14333, | |
| "eval_totals_3": 12129, | |
| "eval_totals_4": 9925, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 0.0001, | |
| "loss": 2.2314, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "eval_bleu": 10.187, | |
| "eval_bp": 0.7573, | |
| "eval_counts_1": 7160, | |
| "eval_counts_2": 2440, | |
| "eval_counts_3": 1098, | |
| "eval_counts_4": 501, | |
| "eval_exact_match": 0.0136, | |
| "eval_f1": 0.3069, | |
| "eval_gen_len": 12.157, | |
| "eval_loss": 1.5771422386169434, | |
| "eval_precisions_1": 43.0625, | |
| "eval_precisions_2": 16.9174, | |
| "eval_precisions_3": 8.986, | |
| "eval_precisions_4": 5.0025, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.314, | |
| "eval_rouge2": 0.1535, | |
| "eval_rougeL": 0.3028, | |
| "eval_rougeLsum": 0.3028, | |
| "eval_runtime": 436.5308, | |
| "eval_samples_per_second": 5.049, | |
| "eval_steps_per_second": 1.262, | |
| "eval_sys_len": 16627, | |
| "eval_totals_1": 16627, | |
| "eval_totals_2": 14423, | |
| "eval_totals_3": 12219, | |
| "eval_totals_4": 10015, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 0.0001, | |
| "loss": 2.0578, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "eval_bleu": 11.0621, | |
| "eval_bp": 0.7961, | |
| "eval_counts_1": 7447, | |
| "eval_counts_2": 2625, | |
| "eval_counts_3": 1214, | |
| "eval_counts_4": 566, | |
| "eval_exact_match": 0.0163, | |
| "eval_f1": 0.32, | |
| "eval_gen_len": 12.5585, | |
| "eval_loss": 1.5346813201904297, | |
| "eval_precisions_1": 43.0338, | |
| "eval_precisions_2": 17.383, | |
| "eval_precisions_3": 9.413, | |
| "eval_precisions_4": 5.2932, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3286, | |
| "eval_rouge2": 0.1628, | |
| "eval_rougeL": 0.3146, | |
| "eval_rougeLsum": 0.3146, | |
| "eval_runtime": 444.2911, | |
| "eval_samples_per_second": 4.961, | |
| "eval_steps_per_second": 1.24, | |
| "eval_sys_len": 17305, | |
| "eval_totals_1": 17305, | |
| "eval_totals_2": 15101, | |
| "eval_totals_3": 12897, | |
| "eval_totals_4": 10693, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8928, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 5.99, | |
| "eval_bleu": 11.4063, | |
| "eval_bp": 0.7556, | |
| "eval_counts_1": 7396, | |
| "eval_counts_2": 2659, | |
| "eval_counts_3": 1257, | |
| "eval_counts_4": 611, | |
| "eval_exact_match": 0.0177, | |
| "eval_f1": 0.3234, | |
| "eval_gen_len": 12.1692, | |
| "eval_loss": 1.512817144393921, | |
| "eval_precisions_1": 44.5596, | |
| "eval_precisions_2": 18.473, | |
| "eval_precisions_3": 10.3117, | |
| "eval_precisions_4": 6.1186, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3326, | |
| "eval_rouge2": 0.1684, | |
| "eval_rougeL": 0.3198, | |
| "eval_rougeLsum": 0.3198, | |
| "eval_runtime": 441.07, | |
| "eval_samples_per_second": 4.997, | |
| "eval_steps_per_second": 1.249, | |
| "eval_sys_len": 16598, | |
| "eval_totals_1": 16598, | |
| "eval_totals_2": 14394, | |
| "eval_totals_3": 12190, | |
| "eval_totals_4": 9986, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8573, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_bleu": 11.8292, | |
| "eval_bp": 0.7631, | |
| "eval_counts_1": 7531, | |
| "eval_counts_2": 2758, | |
| "eval_counts_3": 1313, | |
| "eval_counts_4": 641, | |
| "eval_exact_match": 0.0163, | |
| "eval_f1": 0.327, | |
| "eval_gen_len": 12.3035, | |
| "eval_loss": 1.4735780954360962, | |
| "eval_precisions_1": 45.0203, | |
| "eval_precisions_2": 18.9893, | |
| "eval_precisions_3": 10.6575, | |
| "eval_precisions_4": 6.3365, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3349, | |
| "eval_rouge2": 0.1717, | |
| "eval_rougeL": 0.3216, | |
| "eval_rougeLsum": 0.3216, | |
| "eval_runtime": 442.6304, | |
| "eval_samples_per_second": 4.979, | |
| "eval_steps_per_second": 1.245, | |
| "eval_sys_len": 16728, | |
| "eval_totals_1": 16728, | |
| "eval_totals_2": 14524, | |
| "eval_totals_3": 12320, | |
| "eval_totals_4": 10116, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7361, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 12.2208, | |
| "eval_bp": 0.7747, | |
| "eval_counts_1": 7658, | |
| "eval_counts_2": 2849, | |
| "eval_counts_3": 1368, | |
| "eval_counts_4": 668, | |
| "eval_exact_match": 0.0181, | |
| "eval_f1": 0.3334, | |
| "eval_gen_len": 12.4628, | |
| "eval_loss": 1.4544174671173096, | |
| "eval_precisions_1": 45.2387, | |
| "eval_precisions_2": 19.3494, | |
| "eval_precisions_3": 10.9265, | |
| "eval_precisions_4": 6.4754, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3414, | |
| "eval_rouge2": 0.1762, | |
| "eval_rougeL": 0.3283, | |
| "eval_rougeLsum": 0.3284, | |
| "eval_runtime": 442.3648, | |
| "eval_samples_per_second": 4.982, | |
| "eval_steps_per_second": 1.246, | |
| "eval_sys_len": 16928, | |
| "eval_totals_1": 16928, | |
| "eval_totals_2": 14724, | |
| "eval_totals_3": 12520, | |
| "eval_totals_4": 10316, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7162, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_bleu": 12.4536, | |
| "eval_bp": 0.767, | |
| "eval_counts_1": 7703, | |
| "eval_counts_2": 2891, | |
| "eval_counts_3": 1390, | |
| "eval_counts_4": 694, | |
| "eval_exact_match": 0.0159, | |
| "eval_f1": 0.3374, | |
| "eval_gen_len": 12.4174, | |
| "eval_loss": 1.4459445476531982, | |
| "eval_precisions_1": 45.8648, | |
| "eval_precisions_2": 19.8136, | |
| "eval_precisions_3": 11.2214, | |
| "eval_precisions_4": 6.8153, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3454, | |
| "eval_rouge2": 0.1785, | |
| "eval_rougeL": 0.3325, | |
| "eval_rougeLsum": 0.3323, | |
| "eval_runtime": 436.4836, | |
| "eval_samples_per_second": 5.049, | |
| "eval_steps_per_second": 1.262, | |
| "eval_sys_len": 16795, | |
| "eval_totals_1": 16795, | |
| "eval_totals_2": 14591, | |
| "eval_totals_3": 12387, | |
| "eval_totals_4": 10183, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6589, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "eval_bleu": 12.8553, | |
| "eval_bp": 0.8002, | |
| "eval_counts_1": 7889, | |
| "eval_counts_2": 2983, | |
| "eval_counts_3": 1449, | |
| "eval_counts_4": 719, | |
| "eval_exact_match": 0.0172, | |
| "eval_f1": 0.3435, | |
| "eval_gen_len": 12.7101, | |
| "eval_loss": 1.438312292098999, | |
| "eval_precisions_1": 45.4017, | |
| "eval_precisions_2": 19.6612, | |
| "eval_precisions_3": 11.1737, | |
| "eval_precisions_4": 6.6797, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3519, | |
| "eval_rouge2": 0.1816, | |
| "eval_rougeL": 0.3375, | |
| "eval_rougeLsum": 0.3372, | |
| "eval_runtime": 449.3427, | |
| "eval_samples_per_second": 4.905, | |
| "eval_steps_per_second": 1.226, | |
| "eval_sys_len": 17376, | |
| "eval_totals_1": 17376, | |
| "eval_totals_2": 15172, | |
| "eval_totals_3": 12968, | |
| "eval_totals_4": 10764, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.5571, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_bleu": 12.9671, | |
| "eval_bp": 0.7894, | |
| "eval_counts_1": 7889, | |
| "eval_counts_2": 2994, | |
| "eval_counts_3": 1457, | |
| "eval_counts_4": 736, | |
| "eval_exact_match": 0.02, | |
| "eval_f1": 0.3457, | |
| "eval_gen_len": 12.6466, | |
| "eval_loss": 1.4213731288909912, | |
| "eval_precisions_1": 45.9063, | |
| "eval_precisions_2": 19.9853, | |
| "eval_precisions_3": 11.4033, | |
| "eval_precisions_4": 6.9611, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3529, | |
| "eval_rouge2": 0.1845, | |
| "eval_rougeL": 0.3392, | |
| "eval_rougeLsum": 0.3393, | |
| "eval_runtime": 440.5687, | |
| "eval_samples_per_second": 5.003, | |
| "eval_steps_per_second": 1.251, | |
| "eval_sys_len": 17185, | |
| "eval_totals_1": 17185, | |
| "eval_totals_2": 14981, | |
| "eval_totals_3": 12777, | |
| "eval_totals_4": 10573, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 11.98, | |
| "learning_rate": 0.0001, | |
| "loss": 1.5502, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 11.98, | |
| "eval_bleu": 13.0741, | |
| "eval_bp": 0.7712, | |
| "eval_counts_1": 7930, | |
| "eval_counts_2": 3008, | |
| "eval_counts_3": 1477, | |
| "eval_counts_4": 741, | |
| "eval_exact_match": 0.0213, | |
| "eval_f1": 0.3541, | |
| "eval_gen_len": 12.4483, | |
| "eval_loss": 1.4135174751281738, | |
| "eval_precisions_1": 47.0121, | |
| "eval_precisions_2": 20.5128, | |
| "eval_precisions_3": 11.8539, | |
| "eval_precisions_4": 7.225, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3619, | |
| "eval_rouge2": 0.189, | |
| "eval_rougeL": 0.3492, | |
| "eval_rougeLsum": 0.3491, | |
| "eval_runtime": 443.1145, | |
| "eval_samples_per_second": 4.974, | |
| "eval_steps_per_second": 1.243, | |
| "eval_sys_len": 16868, | |
| "eval_totals_1": 16868, | |
| "eval_totals_2": 14664, | |
| "eval_totals_3": 12460, | |
| "eval_totals_4": 10256, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.4564, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bleu": 14.1014, | |
| "eval_bp": 0.8309, | |
| "eval_counts_1": 8268, | |
| "eval_counts_2": 3200, | |
| "eval_counts_3": 1616, | |
| "eval_counts_4": 837, | |
| "eval_exact_match": 0.0218, | |
| "eval_f1": 0.3647, | |
| "eval_gen_len": 13.2441, | |
| "eval_loss": 1.3942722082138062, | |
| "eval_precisions_1": 46.1152, | |
| "eval_precisions_2": 20.3498, | |
| "eval_precisions_3": 11.9518, | |
| "eval_precisions_4": 7.396, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3729, | |
| "eval_rouge2": 0.1974, | |
| "eval_rougeL": 0.3578, | |
| "eval_rougeLsum": 0.3576, | |
| "eval_runtime": 460.2282, | |
| "eval_samples_per_second": 4.789, | |
| "eval_steps_per_second": 1.197, | |
| "eval_sys_len": 17929, | |
| "eval_totals_1": 17929, | |
| "eval_totals_2": 15725, | |
| "eval_totals_3": 13521, | |
| "eval_totals_4": 11317, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.4522, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 13.99, | |
| "eval_bleu": 13.7526, | |
| "eval_bp": 0.7667, | |
| "eval_counts_1": 8047, | |
| "eval_counts_2": 3130, | |
| "eval_counts_3": 1564, | |
| "eval_counts_4": 811, | |
| "eval_exact_match": 0.0227, | |
| "eval_f1": 0.3627, | |
| "eval_gen_len": 12.515, | |
| "eval_loss": 1.3952871561050415, | |
| "eval_precisions_1": 47.9302, | |
| "eval_precisions_2": 21.4604, | |
| "eval_precisions_3": 12.6323, | |
| "eval_precisions_4": 7.9689, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3712, | |
| "eval_rouge2": 0.197, | |
| "eval_rougeL": 0.3582, | |
| "eval_rougeLsum": 0.3581, | |
| "eval_runtime": 437.5396, | |
| "eval_samples_per_second": 5.037, | |
| "eval_steps_per_second": 1.259, | |
| "eval_sys_len": 16789, | |
| "eval_totals_1": 16789, | |
| "eval_totals_2": 14585, | |
| "eval_totals_3": 12381, | |
| "eval_totals_4": 10177, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 14.98, | |
| "learning_rate": 0.0001, | |
| "loss": 1.407, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 14.98, | |
| "eval_bleu": 14.7315, | |
| "eval_bp": 0.8306, | |
| "eval_counts_1": 8498, | |
| "eval_counts_2": 3358, | |
| "eval_counts_3": 1703, | |
| "eval_counts_4": 877, | |
| "eval_exact_match": 0.0213, | |
| "eval_f1": 0.3772, | |
| "eval_gen_len": 13.2849, | |
| "eval_loss": 1.3759350776672363, | |
| "eval_precisions_1": 47.4139, | |
| "eval_precisions_2": 21.3627, | |
| "eval_precisions_3": 12.6008, | |
| "eval_precisions_4": 7.7535, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3856, | |
| "eval_rouge2": 0.2063, | |
| "eval_rougeL": 0.3709, | |
| "eval_rougeLsum": 0.3706, | |
| "eval_runtime": 453.6157, | |
| "eval_samples_per_second": 4.859, | |
| "eval_steps_per_second": 1.215, | |
| "eval_sys_len": 17923, | |
| "eval_totals_1": 17923, | |
| "eval_totals_2": 15719, | |
| "eval_totals_3": 13515, | |
| "eval_totals_4": 11311, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3294, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 15.99, | |
| "eval_bleu": 14.868, | |
| "eval_bp": 0.8044, | |
| "eval_counts_1": 8481, | |
| "eval_counts_2": 3407, | |
| "eval_counts_3": 1721, | |
| "eval_counts_4": 883, | |
| "eval_exact_match": 0.024, | |
| "eval_f1": 0.3822, | |
| "eval_gen_len": 12.9142, | |
| "eval_loss": 1.3775662183761597, | |
| "eval_precisions_1": 48.5989, | |
| "eval_precisions_2": 22.3454, | |
| "eval_precisions_3": 13.1948, | |
| "eval_precisions_4": 8.1465, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3907, | |
| "eval_rouge2": 0.211, | |
| "eval_rougeL": 0.3766, | |
| "eval_rougeLsum": 0.3766, | |
| "eval_runtime": 448.6685, | |
| "eval_samples_per_second": 4.912, | |
| "eval_steps_per_second": 1.228, | |
| "eval_sys_len": 17451, | |
| "eval_totals_1": 17451, | |
| "eval_totals_2": 15247, | |
| "eval_totals_3": 13043, | |
| "eval_totals_4": 10839, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3294, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "eval_bleu": 15.2312, | |
| "eval_bp": 0.835, | |
| "eval_counts_1": 8633, | |
| "eval_counts_2": 3464, | |
| "eval_counts_3": 1767, | |
| "eval_counts_4": 923, | |
| "eval_exact_match": 0.0263, | |
| "eval_f1": 0.3868, | |
| "eval_gen_len": 13.3103, | |
| "eval_loss": 1.380259394645691, | |
| "eval_precisions_1": 47.9505, | |
| "eval_precisions_2": 21.9241, | |
| "eval_precisions_3": 12.9965, | |
| "eval_precisions_4": 8.1022, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3946, | |
| "eval_rouge2": 0.2133, | |
| "eval_rougeL": 0.3801, | |
| "eval_rougeLsum": 0.3798, | |
| "eval_runtime": 456.612, | |
| "eval_samples_per_second": 4.827, | |
| "eval_steps_per_second": 1.207, | |
| "eval_sys_len": 18004, | |
| "eval_totals_1": 18004, | |
| "eval_totals_2": 15800, | |
| "eval_totals_3": 13596, | |
| "eval_totals_4": 11392, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2605, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 14.779, | |
| "eval_bp": 0.8255, | |
| "eval_counts_1": 8560, | |
| "eval_counts_2": 3376, | |
| "eval_counts_3": 1695, | |
| "eval_counts_4": 880, | |
| "eval_exact_match": 0.0231, | |
| "eval_f1": 0.3846, | |
| "eval_gen_len": 13.1665, | |
| "eval_loss": 1.3709588050842285, | |
| "eval_precisions_1": 48.009, | |
| "eval_precisions_2": 21.605, | |
| "eval_precisions_3": 12.6285, | |
| "eval_precisions_4": 7.8445, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3922, | |
| "eval_rouge2": 0.2092, | |
| "eval_rougeL": 0.3778, | |
| "eval_rougeLsum": 0.3775, | |
| "eval_runtime": 456.164, | |
| "eval_samples_per_second": 4.832, | |
| "eval_steps_per_second": 1.208, | |
| "eval_sys_len": 17830, | |
| "eval_totals_1": 17830, | |
| "eval_totals_2": 15626, | |
| "eval_totals_3": 13422, | |
| "eval_totals_4": 11218, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2667, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "eval_bleu": 15.0008, | |
| "eval_bp": 0.8257, | |
| "eval_counts_1": 8664, | |
| "eval_counts_2": 3455, | |
| "eval_counts_3": 1733, | |
| "eval_counts_4": 882, | |
| "eval_exact_match": 0.0227, | |
| "eval_f1": 0.3906, | |
| "eval_gen_len": 13.2232, | |
| "eval_loss": 1.3694192171096802, | |
| "eval_precisions_1": 48.5814, | |
| "eval_precisions_2": 22.1049, | |
| "eval_precisions_3": 12.9078, | |
| "eval_precisions_4": 7.8596, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.3987, | |
| "eval_rouge2": 0.2138, | |
| "eval_rougeL": 0.3853, | |
| "eval_rougeLsum": 0.3851, | |
| "eval_runtime": 454.2362, | |
| "eval_samples_per_second": 4.852, | |
| "eval_steps_per_second": 1.213, | |
| "eval_sys_len": 17834, | |
| "eval_totals_1": 17834, | |
| "eval_totals_2": 15630, | |
| "eval_totals_3": 13426, | |
| "eval_totals_4": 11222, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "learning_rate": 0.0001, | |
| "loss": 1.2074, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "eval_bleu": 15.0442, | |
| "eval_bp": 0.8369, | |
| "eval_counts_1": 8770, | |
| "eval_counts_2": 3465, | |
| "eval_counts_3": 1737, | |
| "eval_counts_4": 880, | |
| "eval_exact_match": 0.0227, | |
| "eval_f1": 0.3941, | |
| "eval_gen_len": 13.4424, | |
| "eval_loss": 1.365785837173462, | |
| "eval_precisions_1": 48.6169, | |
| "eval_precisions_2": 21.8819, | |
| "eval_precisions_3": 12.743, | |
| "eval_precisions_4": 7.7011, | |
| "eval_ref_len": 21250, | |
| "eval_rouge1": 0.4025, | |
| "eval_rouge2": 0.215, | |
| "eval_rougeL": 0.3883, | |
| "eval_rougeLsum": 0.3879, | |
| "eval_runtime": 459.1457, | |
| "eval_samples_per_second": 4.8, | |
| "eval_steps_per_second": 1.2, | |
| "eval_sys_len": 18039, | |
| "eval_totals_1": 18039, | |
| "eval_totals_2": 15835, | |
| "eval_totals_3": 13631, | |
| "eval_totals_4": 11427, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 19.79, | |
| "step": 720, | |
| "total_flos": 4.419252384883016e+17, | |
| "train_loss": 2.0875697082943385, | |
| "train_runtime": 23544.6757, | |
| "train_samples_per_second": 7.912, | |
| "train_steps_per_second": 0.031 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 720, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "total_flos": 4.419252384883016e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |