| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 22.123893805309734, | |
| "eval_steps": 500, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 52.477, | |
| "eval_gen_len": 15.9297, | |
| "eval_loss": 0.23837900161743164, | |
| "eval_meteor": 0.6883, | |
| "eval_runtime": 19.7008, | |
| "eval_samples_per_second": 26.699, | |
| "eval_steps_per_second": 0.863, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 61.8918, | |
| "eval_gen_len": 17.5057, | |
| "eval_loss": 0.19293558597564697, | |
| "eval_meteor": 0.7932, | |
| "eval_runtime": 14.8562, | |
| "eval_samples_per_second": 35.406, | |
| "eval_steps_per_second": 1.144, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_bleu": 65.2499, | |
| "eval_gen_len": 17.6084, | |
| "eval_loss": 0.1683175414800644, | |
| "eval_meteor": 0.8119, | |
| "eval_runtime": 14.8084, | |
| "eval_samples_per_second": 35.52, | |
| "eval_steps_per_second": 1.148, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 65.682, | |
| "eval_gen_len": 17.5951, | |
| "eval_loss": 0.17639563977718353, | |
| "eval_meteor": 0.8201, | |
| "eval_runtime": 15.1305, | |
| "eval_samples_per_second": 34.764, | |
| "eval_steps_per_second": 1.124, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 0.0002469026548672566, | |
| "loss": 0.2514, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_bleu": 65.9583, | |
| "eval_gen_len": 17.6046, | |
| "eval_loss": 0.18957138061523438, | |
| "eval_meteor": 0.8191, | |
| "eval_runtime": 15.1997, | |
| "eval_samples_per_second": 34.606, | |
| "eval_steps_per_second": 1.118, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu": 66.7228, | |
| "eval_gen_len": 17.616, | |
| "eval_loss": 0.18413878977298737, | |
| "eval_meteor": 0.8247, | |
| "eval_runtime": 14.9673, | |
| "eval_samples_per_second": 35.143, | |
| "eval_steps_per_second": 1.136, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_bleu": 65.9782, | |
| "eval_gen_len": 17.6065, | |
| "eval_loss": 0.18860693275928497, | |
| "eval_meteor": 0.82, | |
| "eval_runtime": 14.8993, | |
| "eval_samples_per_second": 35.304, | |
| "eval_steps_per_second": 1.141, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 65.6854, | |
| "eval_gen_len": 17.4943, | |
| "eval_loss": 0.17572908103466034, | |
| "eval_meteor": 0.8259, | |
| "eval_runtime": 14.925, | |
| "eval_samples_per_second": 35.243, | |
| "eval_steps_per_second": 1.139, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 0.00019380530973451326, | |
| "loss": 0.0582, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_bleu": 65.5495, | |
| "eval_gen_len": 17.5494, | |
| "eval_loss": 0.19610682129859924, | |
| "eval_meteor": 0.8194, | |
| "eval_runtime": 15.0953, | |
| "eval_samples_per_second": 34.845, | |
| "eval_steps_per_second": 1.126, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu": 66.5886, | |
| "eval_gen_len": 17.6008, | |
| "eval_loss": 0.20674178004264832, | |
| "eval_meteor": 0.8255, | |
| "eval_runtime": 14.9084, | |
| "eval_samples_per_second": 35.282, | |
| "eval_steps_per_second": 1.14, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_bleu": 66.8651, | |
| "eval_gen_len": 17.5494, | |
| "eval_loss": 0.21284210681915283, | |
| "eval_meteor": 0.8279, | |
| "eval_runtime": 15.4057, | |
| "eval_samples_per_second": 34.143, | |
| "eval_steps_per_second": 1.103, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bleu": 66.0318, | |
| "eval_gen_len": 17.6008, | |
| "eval_loss": 0.2158810794353485, | |
| "eval_meteor": 0.8188, | |
| "eval_runtime": 15.3536, | |
| "eval_samples_per_second": 34.259, | |
| "eval_steps_per_second": 1.107, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_bleu": 65.7773, | |
| "eval_gen_len": 17.635, | |
| "eval_loss": 0.21044525504112244, | |
| "eval_meteor": 0.8116, | |
| "eval_runtime": 14.9886, | |
| "eval_samples_per_second": 35.093, | |
| "eval_steps_per_second": 1.134, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 13.27, | |
| "learning_rate": 0.0001407079646017699, | |
| "loss": 0.0208, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_bleu": 65.4921, | |
| "eval_gen_len": 17.6559, | |
| "eval_loss": 0.231339693069458, | |
| "eval_meteor": 0.8044, | |
| "eval_runtime": 14.9755, | |
| "eval_samples_per_second": 35.124, | |
| "eval_steps_per_second": 1.135, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_bleu": 66.0398, | |
| "eval_gen_len": 17.6464, | |
| "eval_loss": 0.23520441353321075, | |
| "eval_meteor": 0.8128, | |
| "eval_runtime": 15.0741, | |
| "eval_samples_per_second": 34.894, | |
| "eval_steps_per_second": 1.128, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bleu": 67.0346, | |
| "eval_gen_len": 17.5304, | |
| "eval_loss": 0.21621014177799225, | |
| "eval_meteor": 0.8352, | |
| "eval_runtime": 15.0483, | |
| "eval_samples_per_second": 34.954, | |
| "eval_steps_per_second": 1.13, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_bleu": 66.4929, | |
| "eval_gen_len": 17.5551, | |
| "eval_loss": 0.25872427225112915, | |
| "eval_meteor": 0.8265, | |
| "eval_runtime": 14.9009, | |
| "eval_samples_per_second": 35.3, | |
| "eval_steps_per_second": 1.141, | |
| "step": 1921 | |
| }, | |
| { | |
| "epoch": 17.7, | |
| "learning_rate": 8.761061946902654e-05, | |
| "loss": 0.0085, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 66.3935, | |
| "eval_gen_len": 17.5875, | |
| "eval_loss": 0.24429753422737122, | |
| "eval_meteor": 0.8231, | |
| "eval_runtime": 15.0363, | |
| "eval_samples_per_second": 34.982, | |
| "eval_steps_per_second": 1.131, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_bleu": 66.582, | |
| "eval_gen_len": 17.5475, | |
| "eval_loss": 0.2678430378437042, | |
| "eval_meteor": 0.8292, | |
| "eval_runtime": 14.967, | |
| "eval_samples_per_second": 35.144, | |
| "eval_steps_per_second": 1.136, | |
| "step": 2147 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_bleu": 66.9099, | |
| "eval_gen_len": 17.5513, | |
| "eval_loss": 0.2873556911945343, | |
| "eval_meteor": 0.8307, | |
| "eval_runtime": 15.2473, | |
| "eval_samples_per_second": 34.498, | |
| "eval_steps_per_second": 1.115, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_bleu": 67.0252, | |
| "eval_gen_len": 17.5703, | |
| "eval_loss": 0.2893502712249756, | |
| "eval_meteor": 0.8283, | |
| "eval_runtime": 15.0711, | |
| "eval_samples_per_second": 34.901, | |
| "eval_steps_per_second": 1.128, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_bleu": 66.8167, | |
| "eval_gen_len": 17.5722, | |
| "eval_loss": 0.3005259931087494, | |
| "eval_meteor": 0.8266, | |
| "eval_runtime": 15.0539, | |
| "eval_samples_per_second": 34.941, | |
| "eval_steps_per_second": 1.129, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 22.12, | |
| "learning_rate": 3.451327433628318e-05, | |
| "loss": 0.0034, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2825, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "total_flos": 9808303958876160.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |