{ "best_global_step": 2844, "best_metric": 0.41833943128585815, "best_model_checkpoint": "tiny_bert_rand_10_v1_qqp/checkpoint-2844", "epoch": 7.0, "eval_steps": 500, "global_step": 9954, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.6271923780441284, "learning_rate": 4.9e-05, "loss": 0.4957, "step": 1422 }, { "epoch": 1.0, "eval_accuracy": 0.7827850605985655, "eval_combined_score": 0.7276225765472314, "eval_f1": 0.6724600924958973, "eval_loss": 0.4516511857509613, "eval_runtime": 12.0702, "eval_samples_per_second": 3349.569, "eval_steps_per_second": 13.09, "step": 1422 }, { "epoch": 2.0, "grad_norm": 2.40657639503479, "learning_rate": 4.8e-05, "loss": 0.4048, "step": 2844 }, { "epoch": 2.0, "eval_accuracy": 0.8025476131585456, "eval_combined_score": 0.7654761676856587, "eval_f1": 0.7284047222127717, "eval_loss": 0.41833943128585815, "eval_runtime": 11.8057, "eval_samples_per_second": 3424.62, "eval_steps_per_second": 13.383, "step": 2844 }, { "epoch": 3.0, "grad_norm": 3.13236141204834, "learning_rate": 4.7e-05, "loss": 0.3455, "step": 4266 }, { "epoch": 3.0, "eval_accuracy": 0.8124412564927035, "eval_combined_score": 0.7738034871853382, "eval_f1": 0.7351657178779729, "eval_loss": 0.4275418519973755, "eval_runtime": 11.7509, "eval_samples_per_second": 3440.578, "eval_steps_per_second": 13.446, "step": 4266 }, { "epoch": 4.0, "grad_norm": 2.7595160007476807, "learning_rate": 4.600000000000001e-05, "loss": 0.2989, "step": 5688 }, { "epoch": 4.0, "eval_accuracy": 0.8182785060598565, "eval_combined_score": 0.7767561040876241, "eval_f1": 0.7352337021153915, "eval_loss": 0.4458482563495636, "eval_runtime": 11.8253, "eval_samples_per_second": 3418.932, "eval_steps_per_second": 13.361, "step": 5688 }, { "epoch": 5.0, "grad_norm": 2.8723201751708984, "learning_rate": 4.5e-05, "loss": 0.2624, "step": 7110 }, { "epoch": 5.0, "eval_accuracy": 0.8240168191936681, "eval_combined_score": 0.7922474265973389, "eval_f1": 0.7604780340010099, "eval_loss": 0.4298754334449768, "eval_runtime": 11.8409, "eval_samples_per_second": 3414.438, "eval_steps_per_second": 13.344, "step": 7110 }, { "epoch": 6.0, "grad_norm": 3.1317880153656006, "learning_rate": 4.4000000000000006e-05, "loss": 0.2308, "step": 8532 }, { "epoch": 6.0, "eval_accuracy": 0.823695275785308, "eval_combined_score": 0.791588493608675, "eval_f1": 0.7594817114320421, "eval_loss": 0.4603184163570404, "eval_runtime": 11.8708, "eval_samples_per_second": 3405.843, "eval_steps_per_second": 13.31, "step": 8532 }, { "epoch": 7.0, "grad_norm": 3.599036455154419, "learning_rate": 4.3e-05, "loss": 0.2067, "step": 9954 }, { "epoch": 7.0, "eval_accuracy": 0.824956715310413, "eval_combined_score": 0.79490850019433, "eval_f1": 0.764860285078247, "eval_loss": 0.46773019433021545, "eval_runtime": 11.8891, "eval_samples_per_second": 3400.585, "eval_steps_per_second": 13.289, "step": 9954 }, { "epoch": 7.0, "step": 9954, "total_flos": 6.678940801217126e+16, "train_loss": 0.32066750828223894, "train_runtime": 1423.9001, "train_samples_per_second": 12776.388, "train_steps_per_second": 49.933 } ], "logging_steps": 1, "max_steps": 71100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.678940801217126e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }