| { | |
| "best_global_step": 4266, | |
| "best_metric": 0.3953316807746887, | |
| "best_model_checkpoint": "bert_base_rand_50_v2_qqp/checkpoint-4266", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 11376, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.5561025142669678, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.4729, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7941380163245115, | |
| "eval_combined_score": 0.7416441715609485, | |
| "eval_f1": 0.6891503267973856, | |
| "eval_loss": 0.43494340777397156, | |
| "eval_runtime": 25.783, | |
| "eval_samples_per_second": 1568.086, | |
| "eval_steps_per_second": 6.128, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.604124069213867, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.3717, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8183032401681919, | |
| "eval_combined_score": 0.7890482913286189, | |
| "eval_f1": 0.7597933424890458, | |
| "eval_loss": 0.39567166566848755, | |
| "eval_runtime": 25.6617, | |
| "eval_samples_per_second": 1575.5, | |
| "eval_steps_per_second": 6.157, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.7459185123443604, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.2951, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.827652733118971, | |
| "eval_combined_score": 0.7986680403877396, | |
| "eval_f1": 0.7696833476565083, | |
| "eval_loss": 0.3953316807746887, | |
| "eval_runtime": 25.8037, | |
| "eval_samples_per_second": 1566.833, | |
| "eval_steps_per_second": 6.123, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 5.060959339141846, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.2327, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8347514222112293, | |
| "eval_combined_score": 0.7992742438968468, | |
| "eval_f1": 0.7637970655824642, | |
| "eval_loss": 0.4646459221839905, | |
| "eval_runtime": 25.6379, | |
| "eval_samples_per_second": 1576.964, | |
| "eval_steps_per_second": 6.163, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.4146206378936768, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.1833, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8385110066782092, | |
| "eval_combined_score": 0.8084103707459838, | |
| "eval_f1": 0.7783097348137584, | |
| "eval_loss": 0.4751463830471039, | |
| "eval_runtime": 25.5271, | |
| "eval_samples_per_second": 1583.807, | |
| "eval_steps_per_second": 6.19, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 4.024450302124023, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.145, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8344051446945338, | |
| "eval_combined_score": 0.8098179767036009, | |
| "eval_f1": 0.785230808712668, | |
| "eval_loss": 0.5040478706359863, | |
| "eval_runtime": 25.6967, | |
| "eval_samples_per_second": 1573.356, | |
| "eval_steps_per_second": 6.149, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 7.6686692237854, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.1174, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8348256245362354, | |
| "eval_combined_score": 0.8105853231460292, | |
| "eval_f1": 0.7863450217558229, | |
| "eval_loss": 0.6121538877487183, | |
| "eval_runtime": 25.7118, | |
| "eval_samples_per_second": 1572.427, | |
| "eval_steps_per_second": 6.145, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 5.302459239959717, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.0944, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8387830818698986, | |
| "eval_combined_score": 0.8108233485170666, | |
| "eval_f1": 0.7828636151642347, | |
| "eval_loss": 0.6166710257530212, | |
| "eval_runtime": 25.6523, | |
| "eval_samples_per_second": 1576.075, | |
| "eval_steps_per_second": 6.159, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 11376, | |
| "total_flos": 3.8292762019405824e+17, | |
| "train_loss": 0.2390665867157626, | |
| "train_runtime": 4931.5872, | |
| "train_samples_per_second": 3688.934, | |
| "train_steps_per_second": 14.417 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 71100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.8292762019405824e+17, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |