{ "best_global_step": 1230, "best_metric": 0.5834688544273376, "best_model_checkpoint": "tiny_bert_km_5_v1_qnli/checkpoint-1230", "epoch": 8.0, "eval_steps": 500, "global_step": 3280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.540867567062378, "learning_rate": 4.9e-05, "loss": 0.6651, "step": 410 }, { "epoch": 1.0, "eval_accuracy": 0.61870766977851, "eval_loss": 0.6433318853378296, "eval_runtime": 1.5735, "eval_samples_per_second": 3471.792, "eval_steps_per_second": 13.981, "step": 410 }, { "epoch": 2.0, "grad_norm": 1.6758782863616943, "learning_rate": 4.8e-05, "loss": 0.63, "step": 820 }, { "epoch": 2.0, "eval_accuracy": 0.6426871682225883, "eval_loss": 0.6245182156562805, "eval_runtime": 1.5486, "eval_samples_per_second": 3527.65, "eval_steps_per_second": 14.206, "step": 820 }, { "epoch": 3.0, "grad_norm": 3.1263701915740967, "learning_rate": 4.7e-05, "loss": 0.5711, "step": 1230 }, { "epoch": 3.0, "eval_accuracy": 0.6994325462200256, "eval_loss": 0.5834688544273376, "eval_runtime": 1.5561, "eval_samples_per_second": 3510.739, "eval_steps_per_second": 14.138, "step": 1230 }, { "epoch": 4.0, "grad_norm": 5.254637241363525, "learning_rate": 4.600000000000001e-05, "loss": 0.4752, "step": 1640 }, { "epoch": 4.0, "eval_accuracy": 0.7082189273293062, "eval_loss": 0.5910441279411316, "eval_runtime": 1.5491, "eval_samples_per_second": 3526.58, "eval_steps_per_second": 14.202, "step": 1640 }, { "epoch": 5.0, "grad_norm": 5.907124042510986, "learning_rate": 4.5e-05, "loss": 0.3883, "step": 2050 }, { "epoch": 5.0, "eval_accuracy": 0.7129782170968333, "eval_loss": 0.6258730888366699, "eval_runtime": 1.5681, "eval_samples_per_second": 3483.82, "eval_steps_per_second": 14.03, "step": 2050 }, { "epoch": 6.0, "grad_norm": 6.061448574066162, "learning_rate": 4.4000000000000006e-05, "loss": 0.3147, "step": 2460 }, { "epoch": 6.0, "eval_accuracy": 0.7067545304777595, "eval_loss": 0.688542902469635, "eval_runtime": 1.554, "eval_samples_per_second": 3515.533, "eval_steps_per_second": 14.157, "step": 2460 }, { "epoch": 7.0, "grad_norm": 7.78029727935791, "learning_rate": 4.3e-05, "loss": 0.2538, "step": 2870 }, { "epoch": 7.0, "eval_accuracy": 0.6972359509427055, "eval_loss": 0.8067191243171692, "eval_runtime": 1.5468, "eval_samples_per_second": 3531.879, "eval_steps_per_second": 14.223, "step": 2870 }, { "epoch": 8.0, "grad_norm": 4.341258525848389, "learning_rate": 4.2e-05, "loss": 0.2075, "step": 3280 }, { "epoch": 8.0, "eval_accuracy": 0.6996155958264689, "eval_loss": 0.8747366070747375, "eval_runtime": 1.5509, "eval_samples_per_second": 3522.396, "eval_steps_per_second": 14.185, "step": 3280 }, { "epoch": 8.0, "step": 3280, "total_flos": 2.197388993748173e+16, "train_loss": 0.43822247574969037, "train_runtime": 458.6842, "train_samples_per_second": 11417.769, "train_steps_per_second": 44.693 } ], "logging_steps": 1, "max_steps": 20500, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.197388993748173e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }