| { | |
| "best_metric": 0.6503068804740906, | |
| "best_model_checkpoint": "tiny_bert_rand_100_v1_qnli/checkpoint-410", | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 2460, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.4773553609848022, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.6657, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.61925681859784, | |
| "eval_loss": 0.6503068804740906, | |
| "eval_runtime": 1.9736, | |
| "eval_samples_per_second": 2768.051, | |
| "eval_steps_per_second": 11.147, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.6680256128311157, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.6367, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6212703642687168, | |
| "eval_loss": 0.6538947820663452, | |
| "eval_runtime": 1.9585, | |
| "eval_samples_per_second": 2789.444, | |
| "eval_steps_per_second": 11.233, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.4384894371032715, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.5927, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6128500823723229, | |
| "eval_loss": 0.6680212616920471, | |
| "eval_runtime": 2.1951, | |
| "eval_samples_per_second": 2488.7, | |
| "eval_steps_per_second": 10.022, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.7538044452667236, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.531, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6115687351272194, | |
| "eval_loss": 0.7184457182884216, | |
| "eval_runtime": 1.9482, | |
| "eval_samples_per_second": 2804.177, | |
| "eval_steps_per_second": 11.293, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 5.5930867195129395, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.4631, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6058941973274757, | |
| "eval_loss": 0.8040358424186707, | |
| "eval_runtime": 1.9417, | |
| "eval_samples_per_second": 2813.526, | |
| "eval_steps_per_second": 11.33, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 5.124334335327148, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.3972, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6069924949661358, | |
| "eval_loss": 0.9130538702011108, | |
| "eval_runtime": 1.9358, | |
| "eval_samples_per_second": 2822.07, | |
| "eval_steps_per_second": 11.365, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 2460, | |
| "total_flos": 1.6480417453111296e+16, | |
| "train_loss": 0.5477293495240251, | |
| "train_runtime": 433.0019, | |
| "train_samples_per_second": 12094.98, | |
| "train_steps_per_second": 47.344 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 20500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 1.6480417453111296e+16, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |