{ "best_metric": 0.29641835422253565, "best_model_checkpoint": "./summary/checkpoint-168", "epoch": 2.986666666666667, "eval_steps": 500, "global_step": 168, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14222222222222222, "grad_norm": 8.576995849609375, "learning_rate": 4.761904761904762e-05, "loss": 6.1269, "step": 8 }, { "epoch": 0.28444444444444444, "grad_norm": 7.30542516708374, "learning_rate": 4.523809523809524e-05, "loss": 4.2774, "step": 16 }, { "epoch": 0.4266666666666667, "grad_norm": 7.0902628898620605, "learning_rate": 4.2857142857142856e-05, "loss": 3.9068, "step": 24 }, { "epoch": 0.5688888888888889, "grad_norm": 8.697046279907227, "learning_rate": 4.047619047619048e-05, "loss": 3.3875, "step": 32 }, { "epoch": 0.7111111111111111, "grad_norm": 7.340813159942627, "learning_rate": 3.809523809523809e-05, "loss": 3.8833, "step": 40 }, { "epoch": 0.8533333333333334, "grad_norm": 8.722084045410156, "learning_rate": 3.571428571428572e-05, "loss": 3.4346, "step": 48 }, { "epoch": 0.9955555555555555, "grad_norm": 7.050210952758789, "learning_rate": 3.3333333333333335e-05, "loss": 3.5247, "step": 56 }, { "epoch": 0.9955555555555555, "eval_loss": 3.147299289703369, "eval_rouge-1": 0.3681179246506479, "eval_rouge-2": 0.20816045958286267, "eval_rouge-l": 0.2931656876171607, "eval_runtime": 217.5697, "eval_samples_per_second": 0.23, "eval_steps_per_second": 0.06, "step": 56 }, { "epoch": 1.1377777777777778, "grad_norm": 6.299299240112305, "learning_rate": 3.095238095238095e-05, "loss": 3.3798, "step": 64 }, { "epoch": 1.28, "grad_norm": 8.227340698242188, "learning_rate": 2.857142857142857e-05, "loss": 3.0201, "step": 72 }, { "epoch": 1.4222222222222223, "grad_norm": 7.490102291107178, "learning_rate": 2.6190476190476192e-05, "loss": 2.7197, "step": 80 }, { "epoch": 1.5644444444444443, "grad_norm": 5.702718734741211, "learning_rate": 2.380952380952381e-05, "loss": 2.7057, "step": 88 }, { "epoch": 1.7066666666666666, "grad_norm": 7.035086631774902, "learning_rate": 2.1428571428571428e-05, "loss": 2.6395, "step": 96 }, { "epoch": 1.8488888888888888, "grad_norm": 6.97080659866333, "learning_rate": 1.9047619047619046e-05, "loss": 2.8412, "step": 104 }, { "epoch": 1.991111111111111, "grad_norm": 5.344171047210693, "learning_rate": 1.6666666666666667e-05, "loss": 2.8042, "step": 112 }, { "epoch": 1.991111111111111, "eval_loss": 2.980454683303833, "eval_rouge-1": 0.358032604043401, "eval_rouge-2": 0.20148071328827782, "eval_rouge-l": 0.2943552293948257, "eval_runtime": 218.2497, "eval_samples_per_second": 0.229, "eval_steps_per_second": 0.06, "step": 112 }, { "epoch": 2.1333333333333333, "grad_norm": 6.205347537994385, "learning_rate": 1.4285714285714285e-05, "loss": 2.497, "step": 120 }, { "epoch": 2.2755555555555556, "grad_norm": 6.757753372192383, "learning_rate": 1.1904761904761905e-05, "loss": 2.4279, "step": 128 }, { "epoch": 2.417777777777778, "grad_norm": 6.1174821853637695, "learning_rate": 9.523809523809523e-06, "loss": 2.4113, "step": 136 }, { "epoch": 2.56, "grad_norm": 5.852235317230225, "learning_rate": 7.142857142857143e-06, "loss": 2.6189, "step": 144 }, { "epoch": 2.7022222222222223, "grad_norm": 6.898237228393555, "learning_rate": 4.7619047619047615e-06, "loss": 2.4112, "step": 152 }, { "epoch": 2.8444444444444446, "grad_norm": 7.234611511230469, "learning_rate": 2.3809523809523808e-06, "loss": 2.2202, "step": 160 }, { "epoch": 2.986666666666667, "grad_norm": 6.406458377838135, "learning_rate": 0.0, "loss": 2.3927, "step": 168 }, { "epoch": 2.986666666666667, "eval_loss": 2.968492269515991, "eval_rouge-1": 0.372908062309785, "eval_rouge-2": 0.2065141199985535, "eval_rouge-l": 0.29641835422253565, "eval_runtime": 217.7138, "eval_samples_per_second": 0.23, "eval_steps_per_second": 0.06, "step": 168 } ], "logging_steps": 8, "max_steps": 168, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 654660126941184.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }