| { | |
| "best_metric": 0.0059661865234375, | |
| "best_model_checkpoint": "model_fewrel_1_4-task5/checkpoint-630", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 2100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.017303466796875, | |
| "eval_rouge1": 98.4539, | |
| "eval_rouge2": 97.5762, | |
| "eval_rougeL": 98.1882, | |
| "eval_rougeLsum": 98.4482, | |
| "eval_runtime": 34.4843, | |
| "eval_samples_per_second": 32.479, | |
| "eval_steps_per_second": 1.015, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.00751495361328125, | |
| "eval_rouge1": 99.1583, | |
| "eval_rouge2": 98.6851, | |
| "eval_rougeL": 99.0047, | |
| "eval_rougeLsum": 99.1647, | |
| "eval_runtime": 32.741, | |
| "eval_samples_per_second": 34.208, | |
| "eval_steps_per_second": 1.069, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.014579183422029018, | |
| "learning_rate": 0.0008665259359149131, | |
| "loss": 0.0573, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.0059661865234375, | |
| "eval_rouge1": 99.3646, | |
| "eval_rouge2": 98.9205, | |
| "eval_rougeL": 99.2221, | |
| "eval_rougeLsum": 99.3603, | |
| "eval_runtime": 32.9254, | |
| "eval_samples_per_second": 34.016, | |
| "eval_steps_per_second": 1.063, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.00925445556640625, | |
| "eval_rouge1": 99.1633, | |
| "eval_rouge2": 98.6891, | |
| "eval_rougeL": 99.0235, | |
| "eval_rougeLsum": 99.1601, | |
| "eval_runtime": 33.2811, | |
| "eval_samples_per_second": 33.653, | |
| "eval_steps_per_second": 1.052, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "grad_norm": 0.1038060188293457, | |
| "learning_rate": 0.0005373650467932121, | |
| "loss": 0.0102, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.007312774658203125, | |
| "eval_rouge1": 99.3523, | |
| "eval_rouge2": 98.8747, | |
| "eval_rougeL": 99.1909, | |
| "eval_rougeLsum": 99.3521, | |
| "eval_runtime": 33.1107, | |
| "eval_samples_per_second": 33.826, | |
| "eval_steps_per_second": 1.057, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.008575439453125, | |
| "eval_rouge1": 99.4615, | |
| "eval_rouge2": 99.0736, | |
| "eval_rougeL": 99.3351, | |
| "eval_rougeLsum": 99.4494, | |
| "eval_runtime": 32.8562, | |
| "eval_samples_per_second": 34.088, | |
| "eval_steps_per_second": 1.065, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.00952911376953125, | |
| "eval_rouge1": 99.3799, | |
| "eval_rouge2": 99.0097, | |
| "eval_rougeL": 99.2562, | |
| "eval_rougeLsum": 99.371, | |
| "eval_runtime": 33.1813, | |
| "eval_samples_per_second": 33.754, | |
| "eval_steps_per_second": 1.055, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "grad_norm": 0.025980567559599876, | |
| "learning_rate": 0.00018825509907063325, | |
| "loss": 0.0042, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.00867462158203125, | |
| "eval_rouge1": 99.4488, | |
| "eval_rouge2": 99.0827, | |
| "eval_rougeL": 99.3346, | |
| "eval_rougeLsum": 99.4596, | |
| "eval_runtime": 32.7477, | |
| "eval_samples_per_second": 34.201, | |
| "eval_steps_per_second": 1.069, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.00786590576171875, | |
| "eval_rouge1": 99.3538, | |
| "eval_rouge2": 98.9422, | |
| "eval_rougeL": 99.2192, | |
| "eval_rougeLsum": 99.3563, | |
| "eval_runtime": 33.0744, | |
| "eval_samples_per_second": 33.863, | |
| "eval_steps_per_second": 1.058, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 9.523809523809524, | |
| "grad_norm": 0.18442556262016296, | |
| "learning_rate": 5.5845868874357386e-06, | |
| "loss": 0.0023, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.007965087890625, | |
| "eval_rouge1": 99.3538, | |
| "eval_rouge2": 98.9422, | |
| "eval_rougeL": 99.2192, | |
| "eval_rougeLsum": 99.3563, | |
| "eval_runtime": 33.0331, | |
| "eval_samples_per_second": 33.905, | |
| "eval_steps_per_second": 1.06, | |
| "step": 2100 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3099168784384e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |