| { | |
| "best_global_step": 6136, | |
| "best_metric": 0.6076479554176331, | |
| "best_model_checkpoint": "tinybert_base_train_kd_mnli/checkpoint-6136", | |
| "epoch": 9.0, | |
| "eval_steps": 500, | |
| "global_step": 13806, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.6455118656158447, | |
| "learning_rate": 4.90006518904824e-05, | |
| "loss": 0.8098, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6904737646459501, | |
| "eval_loss": 0.7136619091033936, | |
| "eval_runtime": 4.8793, | |
| "eval_samples_per_second": 2011.543, | |
| "eval_steps_per_second": 7.993, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.2584753036499023, | |
| "learning_rate": 4.80006518904824e-05, | |
| "loss": 0.6666, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7276617422312787, | |
| "eval_loss": 0.6575940847396851, | |
| "eval_runtime": 4.9057, | |
| "eval_samples_per_second": 2000.715, | |
| "eval_steps_per_second": 7.95, | |
| "step": 3068 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.2152528762817383, | |
| "learning_rate": 4.70006518904824e-05, | |
| "loss": 0.5873, | |
| "step": 4602 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7377483443708609, | |
| "eval_loss": 0.627395749092102, | |
| "eval_runtime": 4.9208, | |
| "eval_samples_per_second": 1994.577, | |
| "eval_steps_per_second": 7.925, | |
| "step": 4602 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.7104854583740234, | |
| "learning_rate": 4.60006518904824e-05, | |
| "loss": 0.522, | |
| "step": 6136 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7492613346917982, | |
| "eval_loss": 0.6076479554176331, | |
| "eval_runtime": 4.8283, | |
| "eval_samples_per_second": 2032.81, | |
| "eval_steps_per_second": 8.077, | |
| "step": 6136 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.4938905239105225, | |
| "learning_rate": 4.50006518904824e-05, | |
| "loss": 0.4623, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7570045848191543, | |
| "eval_loss": 0.6132704615592957, | |
| "eval_runtime": 4.9556, | |
| "eval_samples_per_second": 1980.583, | |
| "eval_steps_per_second": 7.87, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 2.4697744846343994, | |
| "learning_rate": 4.40006518904824e-05, | |
| "loss": 0.4069, | |
| "step": 9204 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7575140091696383, | |
| "eval_loss": 0.6447588801383972, | |
| "eval_runtime": 5.0012, | |
| "eval_samples_per_second": 1962.533, | |
| "eval_steps_per_second": 7.798, | |
| "step": 9204 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 3.08911395072937, | |
| "learning_rate": 4.30006518904824e-05, | |
| "loss": 0.3547, | |
| "step": 10738 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.760570555272542, | |
| "eval_loss": 0.6818413734436035, | |
| "eval_runtime": 4.9938, | |
| "eval_samples_per_second": 1965.428, | |
| "eval_steps_per_second": 7.81, | |
| "step": 10738 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 3.197273015975952, | |
| "learning_rate": 4.20006518904824e-05, | |
| "loss": 0.3073, | |
| "step": 12272 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7602649006622516, | |
| "eval_loss": 0.703414261341095, | |
| "eval_runtime": 5.5899, | |
| "eval_samples_per_second": 1755.83, | |
| "eval_steps_per_second": 6.977, | |
| "step": 12272 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 2.992473602294922, | |
| "learning_rate": 4.10006518904824e-05, | |
| "loss": 0.2658, | |
| "step": 13806 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7489556800815079, | |
| "eval_loss": 0.8077275156974792, | |
| "eval_runtime": 5.4298, | |
| "eval_samples_per_second": 1807.619, | |
| "eval_steps_per_second": 7.183, | |
| "step": 13806 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "step": 13806, | |
| "total_flos": 2.340951304214108e+17, | |
| "train_loss": 0.4869576942811889, | |
| "train_runtime": 3465.4123, | |
| "train_samples_per_second": 5666.021, | |
| "train_steps_per_second": 22.133 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 76700, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.340951304214108e+17, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |