| { | |
| "best_global_step": 5688, | |
| "best_metric": 0.4171031415462494, | |
| "best_model_checkpoint": "tiny_bert_km_10_v1_qqp/checkpoint-5688", | |
| "epoch": 9.0, | |
| "eval_steps": 500, | |
| "global_step": 12798, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.6181977987289429, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.494, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.783700222606975, | |
| "eval_combined_score": 0.7331329164869788, | |
| "eval_f1": 0.6825656103669825, | |
| "eval_loss": 0.4529588520526886, | |
| "eval_runtime": 11.6668, | |
| "eval_samples_per_second": 3465.375, | |
| "eval_steps_per_second": 13.543, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.4298322200775146, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.4064, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8001484046500124, | |
| "eval_combined_score": 0.7520346273891323, | |
| "eval_f1": 0.7039208501282521, | |
| "eval_loss": 0.42547887563705444, | |
| "eval_runtime": 11.3788, | |
| "eval_samples_per_second": 3553.11, | |
| "eval_steps_per_second": 13.886, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.6808485984802246, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.3487, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8137768983428147, | |
| "eval_combined_score": 0.7720519578930665, | |
| "eval_f1": 0.7303270174433182, | |
| "eval_loss": 0.4186139702796936, | |
| "eval_runtime": 11.3626, | |
| "eval_samples_per_second": 3558.152, | |
| "eval_steps_per_second": 13.905, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.833254814147949, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.3007, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8211723967350977, | |
| "eval_combined_score": 0.782321934955684, | |
| "eval_f1": 0.7434714731762703, | |
| "eval_loss": 0.4171031415462494, | |
| "eval_runtime": 11.3784, | |
| "eval_samples_per_second": 3553.225, | |
| "eval_steps_per_second": 13.886, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.4554388523101807, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.261, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8216423447934702, | |
| "eval_combined_score": 0.7901753634624871, | |
| "eval_f1": 0.758708382131504, | |
| "eval_loss": 0.4420517385005951, | |
| "eval_runtime": 11.3653, | |
| "eval_samples_per_second": 3557.305, | |
| "eval_steps_per_second": 13.902, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 3.096228837966919, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.2272, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8227801137768983, | |
| "eval_combined_score": 0.7922236396978827, | |
| "eval_f1": 0.761667165618867, | |
| "eval_loss": 0.45790544152259827, | |
| "eval_runtime": 11.3923, | |
| "eval_samples_per_second": 3548.873, | |
| "eval_steps_per_second": 13.869, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 4.3414082527160645, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.2, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.827207519168934, | |
| "eval_combined_score": 0.7969059111279656, | |
| "eval_f1": 0.7666043030869972, | |
| "eval_loss": 0.48471689224243164, | |
| "eval_runtime": 11.3867, | |
| "eval_samples_per_second": 3550.629, | |
| "eval_steps_per_second": 13.876, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 1.463023066520691, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.176, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8280484788523373, | |
| "eval_combined_score": 0.7961457803919854, | |
| "eval_f1": 0.7642430819316333, | |
| "eval_loss": 0.5298035144805908, | |
| "eval_runtime": 11.3676, | |
| "eval_samples_per_second": 3556.591, | |
| "eval_steps_per_second": 13.899, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 6.6194353103637695, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.1561, | |
| "step": 12798 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8292604501607717, | |
| "eval_combined_score": 0.799653043961761, | |
| "eval_f1": 0.7700456377627503, | |
| "eval_loss": 0.5476874709129333, | |
| "eval_runtime": 11.3611, | |
| "eval_samples_per_second": 3558.634, | |
| "eval_steps_per_second": 13.907, | |
| "step": 12798 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "step": 12798, | |
| "total_flos": 8.587209601564877e+16, | |
| "train_loss": 0.2855648035406228, | |
| "train_runtime": 1814.373, | |
| "train_samples_per_second": 10026.769, | |
| "train_steps_per_second": 39.187 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 71100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8.587209601564877e+16, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |