| { | |
| "best_metric": 0.016448974609375, | |
| "best_model_checkpoint": "model_fewrel_1_3-task4/checkpoint-1890", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 2100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.0394287109375, | |
| "eval_rouge1": 98.8133, | |
| "eval_rouge2": 98.2303, | |
| "eval_rougeL": 98.6613, | |
| "eval_rougeLsum": 98.8046, | |
| "eval_runtime": 26.1194, | |
| "eval_samples_per_second": 42.88, | |
| "eval_steps_per_second": 1.34, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.032379150390625, | |
| "eval_rouge1": 98.0957, | |
| "eval_rouge2": 97.1666, | |
| "eval_rougeL": 97.8621, | |
| "eval_rougeLsum": 98.1056, | |
| "eval_runtime": 25.8031, | |
| "eval_samples_per_second": 43.406, | |
| "eval_steps_per_second": 1.356, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 0.016574041917920113, | |
| "learning_rate": 0.0008665259359149131, | |
| "loss": 0.0725, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.0189056396484375, | |
| "eval_rouge1": 99.423, | |
| "eval_rouge2": 99.1539, | |
| "eval_rougeL": 99.359, | |
| "eval_rougeLsum": 99.4285, | |
| "eval_runtime": 25.7666, | |
| "eval_samples_per_second": 43.467, | |
| "eval_steps_per_second": 1.358, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.03497314453125, | |
| "eval_rouge1": 98.9701, | |
| "eval_rouge2": 98.497, | |
| "eval_rougeL": 98.8414, | |
| "eval_rougeLsum": 98.9712, | |
| "eval_runtime": 25.9235, | |
| "eval_samples_per_second": 43.204, | |
| "eval_steps_per_second": 1.35, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "grad_norm": 0.016128525137901306, | |
| "learning_rate": 0.0005373650467932121, | |
| "loss": 0.0226, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.0195159912109375, | |
| "eval_rouge1": 99.2315, | |
| "eval_rouge2": 98.8414, | |
| "eval_rougeL": 99.1293, | |
| "eval_rougeLsum": 99.2314, | |
| "eval_runtime": 26.0919, | |
| "eval_samples_per_second": 42.925, | |
| "eval_steps_per_second": 1.341, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.020782470703125, | |
| "eval_rouge1": 99.5165, | |
| "eval_rouge2": 99.2985, | |
| "eval_rougeL": 99.4726, | |
| "eval_rougeLsum": 99.5153, | |
| "eval_runtime": 25.9934, | |
| "eval_samples_per_second": 43.088, | |
| "eval_steps_per_second": 1.346, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.0180206298828125, | |
| "eval_rouge1": 99.5187, | |
| "eval_rouge2": 99.3048, | |
| "eval_rougeL": 99.4708, | |
| "eval_rougeLsum": 99.5346, | |
| "eval_runtime": 25.8971, | |
| "eval_samples_per_second": 43.248, | |
| "eval_steps_per_second": 1.352, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 7.142857142857143, | |
| "grad_norm": 0.08678867667913437, | |
| "learning_rate": 0.00018825509907063325, | |
| "loss": 0.0096, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.016693115234375, | |
| "eval_rouge1": 99.4715, | |
| "eval_rouge2": 99.2326, | |
| "eval_rougeL": 99.4096, | |
| "eval_rougeLsum": 99.484, | |
| "eval_runtime": 25.9209, | |
| "eval_samples_per_second": 43.208, | |
| "eval_steps_per_second": 1.35, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.016448974609375, | |
| "eval_rouge1": 99.5187, | |
| "eval_rouge2": 99.3048, | |
| "eval_rougeL": 99.4708, | |
| "eval_rougeLsum": 99.5346, | |
| "eval_runtime": 25.8641, | |
| "eval_samples_per_second": 43.303, | |
| "eval_steps_per_second": 1.353, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 9.523809523809524, | |
| "grad_norm": 0.015132551081478596, | |
| "learning_rate": 5.5845868874357386e-06, | |
| "loss": 0.0063, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.0164794921875, | |
| "eval_rouge1": 99.5187, | |
| "eval_rouge2": 99.3048, | |
| "eval_rougeL": 99.4708, | |
| "eval_rougeLsum": 99.5346, | |
| "eval_runtime": 25.9133, | |
| "eval_samples_per_second": 43.221, | |
| "eval_steps_per_second": 1.351, | |
| "step": 2100 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.3099168784384e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |