| { | |
| "best_metric": 1.161091685295105, | |
| "best_model_checkpoint": "./BART_Legal/checkpoint-5984", | |
| "epoch": 20.0, | |
| "eval_steps": 500, | |
| "global_step": 7040, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.371184229850769, | |
| "eval_rouge1": 16.2547, | |
| "eval_rouge2": 9.6949, | |
| "eval_rougeL": 14.0623, | |
| "eval_rougeLsum": 15.5626, | |
| "eval_runtime": 96.7786, | |
| "eval_samples_per_second": 14.528, | |
| "eval_steps_per_second": 0.909, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.4204545454545454, | |
| "grad_norm": 4.283212184906006, | |
| "learning_rate": 1.975604259192243e-06, | |
| "loss": 1.6871, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.296962857246399, | |
| "eval_rouge1": 15.9413, | |
| "eval_rouge2": 9.6091, | |
| "eval_rougeL": 13.8601, | |
| "eval_rougeLsum": 15.3188, | |
| "eval_runtime": 96.896, | |
| "eval_samples_per_second": 14.51, | |
| "eval_steps_per_second": 0.908, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.840909090909091, | |
| "grad_norm": 3.6476199626922607, | |
| "learning_rate": 1.9028412780702233e-06, | |
| "loss": 1.4446, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.260762095451355, | |
| "eval_rouge1": 16.5266, | |
| "eval_rouge2": 10.0679, | |
| "eval_rougeL": 14.3339, | |
| "eval_rougeLsum": 15.8563, | |
| "eval_runtime": 96.5071, | |
| "eval_samples_per_second": 14.569, | |
| "eval_steps_per_second": 0.912, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.2416234016418457, | |
| "eval_rouge1": 16.6338, | |
| "eval_rouge2": 10.1766, | |
| "eval_rougeL": 14.5447, | |
| "eval_rougeLsum": 15.9669, | |
| "eval_runtime": 96.5225, | |
| "eval_samples_per_second": 14.567, | |
| "eval_steps_per_second": 0.912, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 4.261363636363637, | |
| "grad_norm": 4.507070064544678, | |
| "learning_rate": 1.7853169308807447e-06, | |
| "loss": 1.3688, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.232724905014038, | |
| "eval_rouge1": 16.7571, | |
| "eval_rouge2": 10.1866, | |
| "eval_rougeL": 14.6125, | |
| "eval_rougeLsum": 16.0794, | |
| "eval_runtime": 96.7579, | |
| "eval_samples_per_second": 14.531, | |
| "eval_steps_per_second": 0.909, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 5.681818181818182, | |
| "grad_norm": 7.366779327392578, | |
| "learning_rate": 1.6292047838965046e-06, | |
| "loss": 1.3173, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.2103159427642822, | |
| "eval_rouge1": 16.5799, | |
| "eval_rouge2": 10.1068, | |
| "eval_rougeL": 14.4487, | |
| "eval_rougeLsum": 15.9372, | |
| "eval_runtime": 96.3022, | |
| "eval_samples_per_second": 14.6, | |
| "eval_steps_per_second": 0.914, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.2007499933242798, | |
| "eval_rouge1": 16.5958, | |
| "eval_rouge2": 10.1498, | |
| "eval_rougeL": 14.4406, | |
| "eval_rougeLsum": 15.945, | |
| "eval_runtime": 96.3959, | |
| "eval_samples_per_second": 14.586, | |
| "eval_steps_per_second": 0.913, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 7.1022727272727275, | |
| "grad_norm": 3.5551233291625977, | |
| "learning_rate": 1.4416215204103047e-06, | |
| "loss": 1.2818, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.193101167678833, | |
| "eval_rouge1": 16.6412, | |
| "eval_rouge2": 10.1469, | |
| "eval_rougeL": 14.4385, | |
| "eval_rougeLsum": 15.9981, | |
| "eval_runtime": 96.0432, | |
| "eval_samples_per_second": 14.639, | |
| "eval_steps_per_second": 0.916, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 8.522727272727273, | |
| "grad_norm": 3.9627692699432373, | |
| "learning_rate": 1.2321433978624924e-06, | |
| "loss": 1.2533, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1837595701217651, | |
| "eval_rouge1": 16.6114, | |
| "eval_rouge2": 10.1789, | |
| "eval_rougeL": 14.4309, | |
| "eval_rougeLsum": 15.9703, | |
| "eval_runtime": 95.5751, | |
| "eval_samples_per_second": 14.711, | |
| "eval_steps_per_second": 0.921, | |
| "step": 3168 | |
| }, | |
| { | |
| "epoch": 9.943181818181818, | |
| "grad_norm": 3.2599680423736572, | |
| "learning_rate": 1.0111559924935877e-06, | |
| "loss": 1.2269, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1798343658447266, | |
| "eval_rouge1": 16.6425, | |
| "eval_rouge2": 10.1569, | |
| "eval_rougeL": 14.5234, | |
| "eval_rougeLsum": 15.9968, | |
| "eval_runtime": 96.0029, | |
| "eval_samples_per_second": 14.645, | |
| "eval_steps_per_second": 0.917, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1735297441482544, | |
| "eval_rouge1": 16.7312, | |
| "eval_rouge2": 10.2697, | |
| "eval_rougeL": 14.6245, | |
| "eval_rougeLsum": 16.0927, | |
| "eval_runtime": 439.0443, | |
| "eval_samples_per_second": 3.202, | |
| "eval_steps_per_second": 0.2, | |
| "step": 3872 | |
| }, | |
| { | |
| "epoch": 11.363636363636363, | |
| "grad_norm": 3.0771420001983643, | |
| "learning_rate": 7.896154916072532e-07, | |
| "loss": 1.2102, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1649082899093628, | |
| "eval_rouge1": 16.5778, | |
| "eval_rouge2": 10.126, | |
| "eval_rougeL": 14.4729, | |
| "eval_rougeLsum": 15.948, | |
| "eval_runtime": 428.9181, | |
| "eval_samples_per_second": 3.278, | |
| "eval_steps_per_second": 0.205, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 12.784090909090908, | |
| "grad_norm": 3.9918179512023926, | |
| "learning_rate": 5.785055040612244e-07, | |
| "loss": 1.2001, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1660524606704712, | |
| "eval_rouge1": 16.6632, | |
| "eval_rouge2": 10.1667, | |
| "eval_rougeL": 14.5389, | |
| "eval_rougeLsum": 16.0237, | |
| "eval_runtime": 433.402, | |
| "eval_samples_per_second": 3.244, | |
| "eval_steps_per_second": 0.203, | |
| "step": 4576 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1648682355880737, | |
| "eval_rouge1": 16.7154, | |
| "eval_rouge2": 10.2296, | |
| "eval_rougeL": 14.5912, | |
| "eval_rougeLsum": 16.0608, | |
| "eval_runtime": 438.5153, | |
| "eval_samples_per_second": 3.206, | |
| "eval_steps_per_second": 0.201, | |
| "step": 4928 | |
| }, | |
| { | |
| "epoch": 14.204545454545455, | |
| "grad_norm": 3.055126667022705, | |
| "learning_rate": 3.8864559256318376e-07, | |
| "loss": 1.1922, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1646767854690552, | |
| "eval_rouge1": 16.6678, | |
| "eval_rouge2": 10.1662, | |
| "eval_rougeL": 14.5472, | |
| "eval_rougeLsum": 16.0314, | |
| "eval_runtime": 427.2745, | |
| "eval_samples_per_second": 3.291, | |
| "eval_steps_per_second": 0.206, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 15.625, | |
| "grad_norm": 3.330857038497925, | |
| "learning_rate": 2.2869089949783792e-07, | |
| "loss": 1.1791, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.162706971168518, | |
| "eval_rouge1": 16.6631, | |
| "eval_rouge2": 10.2047, | |
| "eval_rougeL": 14.5382, | |
| "eval_rougeLsum": 16.031, | |
| "eval_runtime": 437.8499, | |
| "eval_samples_per_second": 3.211, | |
| "eval_steps_per_second": 0.201, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.161091685295105, | |
| "eval_rouge1": 16.6575, | |
| "eval_rouge2": 10.1867, | |
| "eval_rougeL": 14.5325, | |
| "eval_rougeLsum": 16.0221, | |
| "eval_runtime": 436.8448, | |
| "eval_samples_per_second": 3.219, | |
| "eval_steps_per_second": 0.201, | |
| "step": 5984 | |
| }, | |
| { | |
| "epoch": 17.045454545454547, | |
| "grad_norm": 3.5640358924865723, | |
| "learning_rate": 1.0697642632201121e-07, | |
| "loss": 1.1809, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1619925498962402, | |
| "eval_rouge1": 16.6763, | |
| "eval_rouge2": 10.2071, | |
| "eval_rougeL": 14.5592, | |
| "eval_rougeLsum": 16.0364, | |
| "eval_runtime": 433.2267, | |
| "eval_samples_per_second": 3.245, | |
| "eval_steps_per_second": 0.203, | |
| "step": 6336 | |
| }, | |
| { | |
| "epoch": 18.46590909090909, | |
| "grad_norm": 4.443442344665527, | |
| "learning_rate": 2.953657379014829e-08, | |
| "loss": 1.1743, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1618391275405884, | |
| "eval_rouge1": 16.6734, | |
| "eval_rouge2": 10.1981, | |
| "eval_rougeL": 14.5583, | |
| "eval_rougeLsum": 16.032, | |
| "eval_runtime": 435.6258, | |
| "eval_samples_per_second": 3.228, | |
| "eval_steps_per_second": 0.202, | |
| "step": 6688 | |
| }, | |
| { | |
| "epoch": 19.886363636363637, | |
| "grad_norm": 3.133742094039917, | |
| "learning_rate": 2.1068074461783049e-10, | |
| "loss": 1.1759, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_gen_len": 21.0, | |
| "eval_loss": 1.1616698503494263, | |
| "eval_rouge1": 16.6639, | |
| "eval_rouge2": 10.1849, | |
| "eval_rougeL": 14.5427, | |
| "eval_rougeLsum": 16.0297, | |
| "eval_runtime": 431.0391, | |
| "eval_samples_per_second": 3.262, | |
| "eval_steps_per_second": 0.204, | |
| "step": 7040 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 7040, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 4, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.4375592564555776e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |