{ "best_metric": null, "best_model_checkpoint": null, "epoch": 22.123893805309734, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 52.477, "eval_gen_len": 15.9297, "eval_loss": 0.23837900161743164, "eval_meteor": 0.6883, "eval_runtime": 19.7008, "eval_samples_per_second": 26.699, "eval_steps_per_second": 0.863, "step": 113 }, { "epoch": 2.0, "eval_bleu": 61.8918, "eval_gen_len": 17.5057, "eval_loss": 0.19293558597564697, "eval_meteor": 0.7932, "eval_runtime": 14.8562, "eval_samples_per_second": 35.406, "eval_steps_per_second": 1.144, "step": 226 }, { "epoch": 3.0, "eval_bleu": 65.2499, "eval_gen_len": 17.6084, "eval_loss": 0.1683175414800644, "eval_meteor": 0.8119, "eval_runtime": 14.8084, "eval_samples_per_second": 35.52, "eval_steps_per_second": 1.148, "step": 339 }, { "epoch": 4.0, "eval_bleu": 65.682, "eval_gen_len": 17.5951, "eval_loss": 0.17639563977718353, "eval_meteor": 0.8201, "eval_runtime": 15.1305, "eval_samples_per_second": 34.764, "eval_steps_per_second": 1.124, "step": 452 }, { "epoch": 4.42, "learning_rate": 0.0002469026548672566, "loss": 0.2514, "step": 500 }, { "epoch": 5.0, "eval_bleu": 65.9583, "eval_gen_len": 17.6046, "eval_loss": 0.18957138061523438, "eval_meteor": 0.8191, "eval_runtime": 15.1997, "eval_samples_per_second": 34.606, "eval_steps_per_second": 1.118, "step": 565 }, { "epoch": 6.0, "eval_bleu": 66.7228, "eval_gen_len": 17.616, "eval_loss": 0.18413878977298737, "eval_meteor": 0.8247, "eval_runtime": 14.9673, "eval_samples_per_second": 35.143, "eval_steps_per_second": 1.136, "step": 678 }, { "epoch": 7.0, "eval_bleu": 65.9782, "eval_gen_len": 17.6065, "eval_loss": 0.18860693275928497, "eval_meteor": 0.82, "eval_runtime": 14.8993, "eval_samples_per_second": 35.304, "eval_steps_per_second": 1.141, "step": 791 }, { "epoch": 8.0, "eval_bleu": 65.6854, "eval_gen_len": 17.4943, "eval_loss": 0.17572908103466034, "eval_meteor": 0.8259, "eval_runtime": 14.925, "eval_samples_per_second": 35.243, "eval_steps_per_second": 1.139, "step": 904 }, { "epoch": 8.85, "learning_rate": 0.00019380530973451326, "loss": 0.0582, "step": 1000 }, { "epoch": 9.0, "eval_bleu": 65.5495, "eval_gen_len": 17.5494, "eval_loss": 0.19610682129859924, "eval_meteor": 0.8194, "eval_runtime": 15.0953, "eval_samples_per_second": 34.845, "eval_steps_per_second": 1.126, "step": 1017 }, { "epoch": 10.0, "eval_bleu": 66.5886, "eval_gen_len": 17.6008, "eval_loss": 0.20674178004264832, "eval_meteor": 0.8255, "eval_runtime": 14.9084, "eval_samples_per_second": 35.282, "eval_steps_per_second": 1.14, "step": 1130 }, { "epoch": 11.0, "eval_bleu": 66.8651, "eval_gen_len": 17.5494, "eval_loss": 0.21284210681915283, "eval_meteor": 0.8279, "eval_runtime": 15.4057, "eval_samples_per_second": 34.143, "eval_steps_per_second": 1.103, "step": 1243 }, { "epoch": 12.0, "eval_bleu": 66.0318, "eval_gen_len": 17.6008, "eval_loss": 0.2158810794353485, "eval_meteor": 0.8188, "eval_runtime": 15.3536, "eval_samples_per_second": 34.259, "eval_steps_per_second": 1.107, "step": 1356 }, { "epoch": 13.0, "eval_bleu": 65.7773, "eval_gen_len": 17.635, "eval_loss": 0.21044525504112244, "eval_meteor": 0.8116, "eval_runtime": 14.9886, "eval_samples_per_second": 35.093, "eval_steps_per_second": 1.134, "step": 1469 }, { "epoch": 13.27, "learning_rate": 0.0001407079646017699, "loss": 0.0208, "step": 1500 }, { "epoch": 14.0, "eval_bleu": 65.4921, "eval_gen_len": 17.6559, "eval_loss": 0.231339693069458, "eval_meteor": 0.8044, "eval_runtime": 14.9755, "eval_samples_per_second": 35.124, "eval_steps_per_second": 1.135, "step": 1582 }, { "epoch": 15.0, "eval_bleu": 66.0398, "eval_gen_len": 17.6464, "eval_loss": 0.23520441353321075, "eval_meteor": 0.8128, "eval_runtime": 15.0741, "eval_samples_per_second": 34.894, "eval_steps_per_second": 1.128, "step": 1695 }, { "epoch": 16.0, "eval_bleu": 67.0346, "eval_gen_len": 17.5304, "eval_loss": 0.21621014177799225, "eval_meteor": 0.8352, "eval_runtime": 15.0483, "eval_samples_per_second": 34.954, "eval_steps_per_second": 1.13, "step": 1808 }, { "epoch": 17.0, "eval_bleu": 66.4929, "eval_gen_len": 17.5551, "eval_loss": 0.25872427225112915, "eval_meteor": 0.8265, "eval_runtime": 14.9009, "eval_samples_per_second": 35.3, "eval_steps_per_second": 1.141, "step": 1921 }, { "epoch": 17.7, "learning_rate": 8.761061946902654e-05, "loss": 0.0085, "step": 2000 }, { "epoch": 18.0, "eval_bleu": 66.3935, "eval_gen_len": 17.5875, "eval_loss": 0.24429753422737122, "eval_meteor": 0.8231, "eval_runtime": 15.0363, "eval_samples_per_second": 34.982, "eval_steps_per_second": 1.131, "step": 2034 }, { "epoch": 19.0, "eval_bleu": 66.582, "eval_gen_len": 17.5475, "eval_loss": 0.2678430378437042, "eval_meteor": 0.8292, "eval_runtime": 14.967, "eval_samples_per_second": 35.144, "eval_steps_per_second": 1.136, "step": 2147 }, { "epoch": 20.0, "eval_bleu": 66.9099, "eval_gen_len": 17.5513, "eval_loss": 0.2873556911945343, "eval_meteor": 0.8307, "eval_runtime": 15.2473, "eval_samples_per_second": 34.498, "eval_steps_per_second": 1.115, "step": 2260 }, { "epoch": 21.0, "eval_bleu": 67.0252, "eval_gen_len": 17.5703, "eval_loss": 0.2893502712249756, "eval_meteor": 0.8283, "eval_runtime": 15.0711, "eval_samples_per_second": 34.901, "eval_steps_per_second": 1.128, "step": 2373 }, { "epoch": 22.0, "eval_bleu": 66.8167, "eval_gen_len": 17.5722, "eval_loss": 0.3005259931087494, "eval_meteor": 0.8266, "eval_runtime": 15.0539, "eval_samples_per_second": 34.941, "eval_steps_per_second": 1.129, "step": 2486 }, { "epoch": 22.12, "learning_rate": 3.451327433628318e-05, "loss": 0.0034, "step": 2500 } ], "logging_steps": 500, "max_steps": 2825, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 500, "total_flos": 9808303958876160.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }