| { | |
| "best_metric": 0.8321161270141602, | |
| "best_model_checkpoint": "tiny_bert_km_50_v1_mnli/checkpoint-7670", | |
| "epoch": 10.0, | |
| "eval_steps": 500, | |
| "global_step": 15340, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.9514528512954712, | |
| "learning_rate": 4.9e-05, | |
| "loss": 1.0082, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5437595517065715, | |
| "eval_loss": 0.942550778388977, | |
| "eval_runtime": 3.5158, | |
| "eval_samples_per_second": 2791.707, | |
| "eval_steps_per_second": 11.093, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.3989537954330444, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.9147, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5845134997452879, | |
| "eval_loss": 0.8889986276626587, | |
| "eval_runtime": 3.4266, | |
| "eval_samples_per_second": 2864.392, | |
| "eval_steps_per_second": 11.382, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.4162970781326294, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.8604, | |
| "step": 4602 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.606316861946001, | |
| "eval_loss": 0.8645786046981812, | |
| "eval_runtime": 3.4993, | |
| "eval_samples_per_second": 2804.851, | |
| "eval_steps_per_second": 11.145, | |
| "step": 4602 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.4847444295883179, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.8145, | |
| "step": 6136 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.615588385124809, | |
| "eval_loss": 0.8447086811065674, | |
| "eval_runtime": 3.4418, | |
| "eval_samples_per_second": 2851.689, | |
| "eval_steps_per_second": 11.331, | |
| "step": 6136 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.7801928520202637, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.7652, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6298522669383596, | |
| "eval_loss": 0.8321161270141602, | |
| "eval_runtime": 3.451, | |
| "eval_samples_per_second": 2844.112, | |
| "eval_steps_per_second": 11.301, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 2.2125303745269775, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.7172, | |
| "step": 9204 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6405501782985227, | |
| "eval_loss": 0.84904545545578, | |
| "eval_runtime": 3.4667, | |
| "eval_samples_per_second": 2831.233, | |
| "eval_steps_per_second": 11.25, | |
| "step": 9204 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 2.1476242542266846, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.6722, | |
| "step": 10738 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.6512480896586856, | |
| "eval_loss": 0.8362392783164978, | |
| "eval_runtime": 3.4933, | |
| "eval_samples_per_second": 2809.683, | |
| "eval_steps_per_second": 11.164, | |
| "step": 10738 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 2.0134174823760986, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.6289, | |
| "step": 12272 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6459500764136525, | |
| "eval_loss": 0.8634655475616455, | |
| "eval_runtime": 3.4256, | |
| "eval_samples_per_second": 2865.208, | |
| "eval_steps_per_second": 11.385, | |
| "step": 12272 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 2.7003607749938965, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.5887, | |
| "step": 13806 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.6463576158940397, | |
| "eval_loss": 0.9014362096786499, | |
| "eval_runtime": 3.4293, | |
| "eval_samples_per_second": 2862.09, | |
| "eval_steps_per_second": 11.373, | |
| "step": 13806 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 3.0219602584838867, | |
| "learning_rate": 4e-05, | |
| "loss": 0.5489, | |
| "step": 15340 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6449312277126846, | |
| "eval_loss": 0.9343048334121704, | |
| "eval_runtime": 3.4187, | |
| "eval_samples_per_second": 2870.96, | |
| "eval_steps_per_second": 11.408, | |
| "step": 15340 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 15340, | |
| "total_flos": 1.029836100834816e+17, | |
| "train_loss": 0.751894306048576, | |
| "train_runtime": 2625.2366, | |
| "train_samples_per_second": 7479.364, | |
| "train_steps_per_second": 29.216 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 76700, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 1.029836100834816e+17, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |