{ "best_global_step": 1230, "best_metric": 0.646436333656311, "best_model_checkpoint": "tiny_bert_km_50_v2_qnli/checkpoint-1230", "epoch": 8.0, "eval_steps": 500, "global_step": 3280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.2369039058685303, "learning_rate": 4.9e-05, "loss": 0.6669, "step": 410 }, { "epoch": 1.0, "eval_accuracy": 0.6196229178107268, "eval_loss": 0.648975670337677, "eval_runtime": 1.6228, "eval_samples_per_second": 3366.355, "eval_steps_per_second": 13.557, "step": 410 }, { "epoch": 2.0, "grad_norm": 1.6113338470458984, "learning_rate": 4.8e-05, "loss": 0.6405, "step": 820 }, { "epoch": 2.0, "eval_accuracy": 0.633534687900421, "eval_loss": 0.6481720805168152, "eval_runtime": 1.571, "eval_samples_per_second": 3477.482, "eval_steps_per_second": 14.004, "step": 820 }, { "epoch": 3.0, "grad_norm": 2.5955770015716553, "learning_rate": 4.7e-05, "loss": 0.5995, "step": 1230 }, { "epoch": 3.0, "eval_accuracy": 0.6285923485264506, "eval_loss": 0.646436333656311, "eval_runtime": 1.5557, "eval_samples_per_second": 3511.528, "eval_steps_per_second": 14.141, "step": 1230 }, { "epoch": 4.0, "grad_norm": 3.301100969314575, "learning_rate": 4.600000000000001e-05, "loss": 0.5338, "step": 1640 }, { "epoch": 4.0, "eval_accuracy": 0.6223686619073769, "eval_loss": 0.6942920684814453, "eval_runtime": 1.597, "eval_samples_per_second": 3420.686, "eval_steps_per_second": 13.775, "step": 1640 }, { "epoch": 5.0, "grad_norm": 3.8189849853515625, "learning_rate": 4.5e-05, "loss": 0.4599, "step": 2050 }, { "epoch": 5.0, "eval_accuracy": 0.6231008603331503, "eval_loss": 0.7389899492263794, "eval_runtime": 1.5849, "eval_samples_per_second": 3446.901, "eval_steps_per_second": 13.881, "step": 2050 }, { "epoch": 6.0, "grad_norm": 4.4279351234436035, "learning_rate": 4.4000000000000006e-05, "loss": 0.3895, "step": 2460 }, { "epoch": 6.0, "eval_accuracy": 0.6198059674171701, "eval_loss": 0.8551831841468811, "eval_runtime": 1.5541, "eval_samples_per_second": 3515.177, "eval_steps_per_second": 14.156, "step": 2460 }, { "epoch": 7.0, "grad_norm": 6.780888080596924, "learning_rate": 4.3e-05, "loss": 0.3245, "step": 2870 }, { "epoch": 7.0, "eval_accuracy": 0.6088229910305692, "eval_loss": 0.9642999768257141, "eval_runtime": 1.6103, "eval_samples_per_second": 3392.576, "eval_steps_per_second": 13.662, "step": 2870 }, { "epoch": 8.0, "grad_norm": 4.599832057952881, "learning_rate": 4.2e-05, "loss": 0.2692, "step": 3280 }, { "epoch": 8.0, "eval_accuracy": 0.6168771737140765, "eval_loss": 1.008509874343872, "eval_runtime": 1.5621, "eval_samples_per_second": 3497.225, "eval_steps_per_second": 14.084, "step": 3280 }, { "epoch": 8.0, "step": 3280, "total_flos": 2.197388993748173e+16, "train_loss": 0.48549206431319075, "train_runtime": 454.2765, "train_samples_per_second": 11528.551, "train_steps_per_second": 45.127 } ], "logging_steps": 1, "max_steps": 20500, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.197388993748173e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }