| { | |
| "best_metric": 84.05797101449275, | |
| "best_model_checkpoint": "outputs/bitfit/t5-base/rte/checkpoint-300", | |
| "epoch": 20.0, | |
| "global_step": 1560, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.28, | |
| "eval_accuracy": 76.81159420289855, | |
| "eval_average_metrics": 76.81159420289855, | |
| "eval_loss": 0.21982769668102264, | |
| "eval_runtime": 1.0149, | |
| "eval_samples_per_second": 135.972, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_accuracy": 78.26086956521739, | |
| "eval_average_metrics": 78.26086956521739, | |
| "eval_loss": 0.18932242691516876, | |
| "eval_runtime": 0.6715, | |
| "eval_samples_per_second": 205.52, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "eval_accuracy": 84.05797101449275, | |
| "eval_average_metrics": 84.05797101449275, | |
| "eval_loss": 0.16963252425193787, | |
| "eval_runtime": 0.8653, | |
| "eval_samples_per_second": 159.484, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.13, | |
| "eval_accuracy": 80.43478260869566, | |
| "eval_average_metrics": 80.43478260869566, | |
| "eval_loss": 0.18479692935943604, | |
| "eval_runtime": 0.7171, | |
| "eval_samples_per_second": 192.431, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 0.00020384615384615385, | |
| "loss": 0.2327, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "eval_accuracy": 81.88405797101449, | |
| "eval_average_metrics": 81.88405797101449, | |
| "eval_loss": 0.23633617162704468, | |
| "eval_runtime": 0.6787, | |
| "eval_samples_per_second": 203.343, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "eval_accuracy": 79.71014492753623, | |
| "eval_average_metrics": 79.71014492753623, | |
| "eval_loss": 0.23751917481422424, | |
| "eval_runtime": 0.8803, | |
| "eval_samples_per_second": 156.767, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "eval_accuracy": 84.05797101449275, | |
| "eval_average_metrics": 84.05797101449275, | |
| "eval_loss": 0.23690848052501678, | |
| "eval_runtime": 0.5629, | |
| "eval_samples_per_second": 245.142, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 10.26, | |
| "eval_accuracy": 83.33333333333334, | |
| "eval_average_metrics": 83.33333333333334, | |
| "eval_loss": 0.2846378684043884, | |
| "eval_runtime": 0.9472, | |
| "eval_samples_per_second": 145.689, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 11.54, | |
| "eval_accuracy": 82.6086956521739, | |
| "eval_average_metrics": 82.6086956521739, | |
| "eval_loss": 0.2770601212978363, | |
| "eval_runtime": 0.8799, | |
| "eval_samples_per_second": 156.839, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "learning_rate": 0.00010769230769230768, | |
| "loss": 0.0774, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "eval_accuracy": 82.6086956521739, | |
| "eval_average_metrics": 82.6086956521739, | |
| "eval_loss": 0.3579100966453552, | |
| "eval_runtime": 0.9343, | |
| "eval_samples_per_second": 147.71, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 14.1, | |
| "eval_accuracy": 81.15942028985508, | |
| "eval_average_metrics": 81.15942028985508, | |
| "eval_loss": 0.4031548798084259, | |
| "eval_runtime": 0.6763, | |
| "eval_samples_per_second": 204.046, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "eval_accuracy": 81.15942028985508, | |
| "eval_average_metrics": 81.15942028985508, | |
| "eval_loss": 0.4019993841648102, | |
| "eval_runtime": 0.9288, | |
| "eval_samples_per_second": 148.583, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 16.67, | |
| "eval_accuracy": 78.98550724637681, | |
| "eval_average_metrics": 78.98550724637681, | |
| "eval_loss": 0.47033587098121643, | |
| "eval_runtime": 0.6015, | |
| "eval_samples_per_second": 229.417, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 17.95, | |
| "eval_accuracy": 81.88405797101449, | |
| "eval_average_metrics": 81.88405797101449, | |
| "eval_loss": 0.4693973958492279, | |
| "eval_runtime": 0.8377, | |
| "eval_samples_per_second": 164.743, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 19.23, | |
| "learning_rate": 1.1538461538461538e-05, | |
| "loss": 0.0312, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 19.23, | |
| "eval_accuracy": 79.71014492753623, | |
| "eval_average_metrics": 79.71014492753623, | |
| "eval_loss": 0.5053138136863708, | |
| "eval_runtime": 1.053, | |
| "eval_samples_per_second": 131.051, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 1560, | |
| "total_flos": 7649465032433664.0, | |
| "train_loss": 0.1103338890350782, | |
| "train_runtime": 529.9082, | |
| "train_samples_per_second": 93.979, | |
| "train_steps_per_second": 2.944 | |
| } | |
| ], | |
| "max_steps": 1560, | |
| "num_train_epochs": 20, | |
| "total_flos": 7649465032433664.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |