{ "best_metric": 1.161091685295105, "best_model_checkpoint": "./BART_Legal/checkpoint-5984", "epoch": 20.0, "eval_steps": 500, "global_step": 7040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 21.0, "eval_loss": 1.371184229850769, "eval_rouge1": 16.2547, "eval_rouge2": 9.6949, "eval_rougeL": 14.0623, "eval_rougeLsum": 15.5626, "eval_runtime": 96.7786, "eval_samples_per_second": 14.528, "eval_steps_per_second": 0.909, "step": 352 }, { "epoch": 1.4204545454545454, "grad_norm": 4.283212184906006, "learning_rate": 1.975604259192243e-06, "loss": 1.6871, "step": 500 }, { "epoch": 2.0, "eval_gen_len": 21.0, "eval_loss": 1.296962857246399, "eval_rouge1": 15.9413, "eval_rouge2": 9.6091, "eval_rougeL": 13.8601, "eval_rougeLsum": 15.3188, "eval_runtime": 96.896, "eval_samples_per_second": 14.51, "eval_steps_per_second": 0.908, "step": 704 }, { "epoch": 2.840909090909091, "grad_norm": 3.6476199626922607, "learning_rate": 1.9028412780702233e-06, "loss": 1.4446, "step": 1000 }, { "epoch": 3.0, "eval_gen_len": 21.0, "eval_loss": 1.260762095451355, "eval_rouge1": 16.5266, "eval_rouge2": 10.0679, "eval_rougeL": 14.3339, "eval_rougeLsum": 15.8563, "eval_runtime": 96.5071, "eval_samples_per_second": 14.569, "eval_steps_per_second": 0.912, "step": 1056 }, { "epoch": 4.0, "eval_gen_len": 21.0, "eval_loss": 1.2416234016418457, "eval_rouge1": 16.6338, "eval_rouge2": 10.1766, "eval_rougeL": 14.5447, "eval_rougeLsum": 15.9669, "eval_runtime": 96.5225, "eval_samples_per_second": 14.567, "eval_steps_per_second": 0.912, "step": 1408 }, { "epoch": 4.261363636363637, "grad_norm": 4.507070064544678, "learning_rate": 1.7853169308807447e-06, "loss": 1.3688, "step": 1500 }, { "epoch": 5.0, "eval_gen_len": 21.0, "eval_loss": 1.232724905014038, "eval_rouge1": 16.7571, "eval_rouge2": 10.1866, "eval_rougeL": 14.6125, "eval_rougeLsum": 16.0794, "eval_runtime": 96.7579, "eval_samples_per_second": 14.531, "eval_steps_per_second": 0.909, "step": 1760 }, { "epoch": 5.681818181818182, "grad_norm": 7.366779327392578, "learning_rate": 1.6292047838965046e-06, "loss": 1.3173, "step": 2000 }, { "epoch": 6.0, "eval_gen_len": 21.0, "eval_loss": 1.2103159427642822, "eval_rouge1": 16.5799, "eval_rouge2": 10.1068, "eval_rougeL": 14.4487, "eval_rougeLsum": 15.9372, "eval_runtime": 96.3022, "eval_samples_per_second": 14.6, "eval_steps_per_second": 0.914, "step": 2112 }, { "epoch": 7.0, "eval_gen_len": 21.0, "eval_loss": 1.2007499933242798, "eval_rouge1": 16.5958, "eval_rouge2": 10.1498, "eval_rougeL": 14.4406, "eval_rougeLsum": 15.945, "eval_runtime": 96.3959, "eval_samples_per_second": 14.586, "eval_steps_per_second": 0.913, "step": 2464 }, { "epoch": 7.1022727272727275, "grad_norm": 3.5551233291625977, "learning_rate": 1.4416215204103047e-06, "loss": 1.2818, "step": 2500 }, { "epoch": 8.0, "eval_gen_len": 21.0, "eval_loss": 1.193101167678833, "eval_rouge1": 16.6412, "eval_rouge2": 10.1469, "eval_rougeL": 14.4385, "eval_rougeLsum": 15.9981, "eval_runtime": 96.0432, "eval_samples_per_second": 14.639, "eval_steps_per_second": 0.916, "step": 2816 }, { "epoch": 8.522727272727273, "grad_norm": 3.9627692699432373, "learning_rate": 1.2321433978624924e-06, "loss": 1.2533, "step": 3000 }, { "epoch": 9.0, "eval_gen_len": 21.0, "eval_loss": 1.1837595701217651, "eval_rouge1": 16.6114, "eval_rouge2": 10.1789, "eval_rougeL": 14.4309, "eval_rougeLsum": 15.9703, "eval_runtime": 95.5751, "eval_samples_per_second": 14.711, "eval_steps_per_second": 0.921, "step": 3168 }, { "epoch": 9.943181818181818, "grad_norm": 3.2599680423736572, "learning_rate": 1.0111559924935877e-06, "loss": 1.2269, "step": 3500 }, { "epoch": 10.0, "eval_gen_len": 21.0, "eval_loss": 1.1798343658447266, "eval_rouge1": 16.6425, "eval_rouge2": 10.1569, "eval_rougeL": 14.5234, "eval_rougeLsum": 15.9968, "eval_runtime": 96.0029, "eval_samples_per_second": 14.645, "eval_steps_per_second": 0.917, "step": 3520 }, { "epoch": 11.0, "eval_gen_len": 21.0, "eval_loss": 1.1735297441482544, "eval_rouge1": 16.7312, "eval_rouge2": 10.2697, "eval_rougeL": 14.6245, "eval_rougeLsum": 16.0927, "eval_runtime": 439.0443, "eval_samples_per_second": 3.202, "eval_steps_per_second": 0.2, "step": 3872 }, { "epoch": 11.363636363636363, "grad_norm": 3.0771420001983643, "learning_rate": 7.896154916072532e-07, "loss": 1.2102, "step": 4000 }, { "epoch": 12.0, "eval_gen_len": 21.0, "eval_loss": 1.1649082899093628, "eval_rouge1": 16.5778, "eval_rouge2": 10.126, "eval_rougeL": 14.4729, "eval_rougeLsum": 15.948, "eval_runtime": 428.9181, "eval_samples_per_second": 3.278, "eval_steps_per_second": 0.205, "step": 4224 }, { "epoch": 12.784090909090908, "grad_norm": 3.9918179512023926, "learning_rate": 5.785055040612244e-07, "loss": 1.2001, "step": 4500 }, { "epoch": 13.0, "eval_gen_len": 21.0, "eval_loss": 1.1660524606704712, "eval_rouge1": 16.6632, "eval_rouge2": 10.1667, "eval_rougeL": 14.5389, "eval_rougeLsum": 16.0237, "eval_runtime": 433.402, "eval_samples_per_second": 3.244, "eval_steps_per_second": 0.203, "step": 4576 }, { "epoch": 14.0, "eval_gen_len": 21.0, "eval_loss": 1.1648682355880737, "eval_rouge1": 16.7154, "eval_rouge2": 10.2296, "eval_rougeL": 14.5912, "eval_rougeLsum": 16.0608, "eval_runtime": 438.5153, "eval_samples_per_second": 3.206, "eval_steps_per_second": 0.201, "step": 4928 }, { "epoch": 14.204545454545455, "grad_norm": 3.055126667022705, "learning_rate": 3.8864559256318376e-07, "loss": 1.1922, "step": 5000 }, { "epoch": 15.0, "eval_gen_len": 21.0, "eval_loss": 1.1646767854690552, "eval_rouge1": 16.6678, "eval_rouge2": 10.1662, "eval_rougeL": 14.5472, "eval_rougeLsum": 16.0314, "eval_runtime": 427.2745, "eval_samples_per_second": 3.291, "eval_steps_per_second": 0.206, "step": 5280 }, { "epoch": 15.625, "grad_norm": 3.330857038497925, "learning_rate": 2.2869089949783792e-07, "loss": 1.1791, "step": 5500 }, { "epoch": 16.0, "eval_gen_len": 21.0, "eval_loss": 1.162706971168518, "eval_rouge1": 16.6631, "eval_rouge2": 10.2047, "eval_rougeL": 14.5382, "eval_rougeLsum": 16.031, "eval_runtime": 437.8499, "eval_samples_per_second": 3.211, "eval_steps_per_second": 0.201, "step": 5632 }, { "epoch": 17.0, "eval_gen_len": 21.0, "eval_loss": 1.161091685295105, "eval_rouge1": 16.6575, "eval_rouge2": 10.1867, "eval_rougeL": 14.5325, "eval_rougeLsum": 16.0221, "eval_runtime": 436.8448, "eval_samples_per_second": 3.219, "eval_steps_per_second": 0.201, "step": 5984 }, { "epoch": 17.045454545454547, "grad_norm": 3.5640358924865723, "learning_rate": 1.0697642632201121e-07, "loss": 1.1809, "step": 6000 }, { "epoch": 18.0, "eval_gen_len": 21.0, "eval_loss": 1.1619925498962402, "eval_rouge1": 16.6763, "eval_rouge2": 10.2071, "eval_rougeL": 14.5592, "eval_rougeLsum": 16.0364, "eval_runtime": 433.2267, "eval_samples_per_second": 3.245, "eval_steps_per_second": 0.203, "step": 6336 }, { "epoch": 18.46590909090909, "grad_norm": 4.443442344665527, "learning_rate": 2.953657379014829e-08, "loss": 1.1743, "step": 6500 }, { "epoch": 19.0, "eval_gen_len": 21.0, "eval_loss": 1.1618391275405884, "eval_rouge1": 16.6734, "eval_rouge2": 10.1981, "eval_rougeL": 14.5583, "eval_rougeLsum": 16.032, "eval_runtime": 435.6258, "eval_samples_per_second": 3.228, "eval_steps_per_second": 0.202, "step": 6688 }, { "epoch": 19.886363636363637, "grad_norm": 3.133742094039917, "learning_rate": 2.1068074461783049e-10, "loss": 1.1759, "step": 7000 }, { "epoch": 20.0, "eval_gen_len": 21.0, "eval_loss": 1.1616698503494263, "eval_rouge1": 16.6639, "eval_rouge2": 10.1849, "eval_rougeL": 14.5427, "eval_rougeLsum": 16.0297, "eval_runtime": 431.0391, "eval_samples_per_second": 3.262, "eval_steps_per_second": 0.204, "step": 7040 } ], "logging_steps": 500, "max_steps": 7040, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4375592564555776e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }