| { | |
| "best_metric": 0.4193161725997925, | |
| "best_model_checkpoint": "tiny_bert_km_50_v1_qqp/checkpoint-4266", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 11376, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.748605489730835, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.5011, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7840217660153351, | |
| "eval_combined_score": 0.732481505632631, | |
| "eval_f1": 0.6809412452499269, | |
| "eval_loss": 0.4513613283634186, | |
| "eval_runtime": 15.0073, | |
| "eval_samples_per_second": 2694.026, | |
| "eval_steps_per_second": 10.528, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.0197439193725586, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.4146, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7996784565916398, | |
| "eval_combined_score": 0.7551677267417489, | |
| "eval_f1": 0.7106569968918581, | |
| "eval_loss": 0.42334410548210144, | |
| "eval_runtime": 14.0744, | |
| "eval_samples_per_second": 2872.589, | |
| "eval_steps_per_second": 11.226, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.5988855361938477, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.3605, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8108582735592382, | |
| "eval_combined_score": 0.7698776779218888, | |
| "eval_f1": 0.7288970822845393, | |
| "eval_loss": 0.4193161725997925, | |
| "eval_runtime": 14.0194, | |
| "eval_samples_per_second": 2883.855, | |
| "eval_steps_per_second": 11.27, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 2.5036709308624268, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.3153, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8142715805095226, | |
| "eval_combined_score": 0.7768661951678455, | |
| "eval_f1": 0.7394608098261685, | |
| "eval_loss": 0.42982715368270874, | |
| "eval_runtime": 14.0649, | |
| "eval_samples_per_second": 2874.526, | |
| "eval_steps_per_second": 11.234, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.8022961616516113, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.2768, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8161019045263418, | |
| "eval_combined_score": 0.7811781492871632, | |
| "eval_f1": 0.7462543940479847, | |
| "eval_loss": 0.4367225170135498, | |
| "eval_runtime": 14.2004, | |
| "eval_samples_per_second": 2847.095, | |
| "eval_steps_per_second": 11.126, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 3.0807063579559326, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.245, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8136779619094732, | |
| "eval_combined_score": 0.7862144813550569, | |
| "eval_f1": 0.7587510008006405, | |
| "eval_loss": 0.4542311131954193, | |
| "eval_runtime": 14.0405, | |
| "eval_samples_per_second": 2879.525, | |
| "eval_steps_per_second": 11.253, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 4.5525922775268555, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.2174, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8195152114766263, | |
| "eval_combined_score": 0.7896423794008647, | |
| "eval_f1": 0.7597695473251029, | |
| "eval_loss": 0.47380971908569336, | |
| "eval_runtime": 14.0747, | |
| "eval_samples_per_second": 2872.521, | |
| "eval_steps_per_second": 11.226, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 3.2192418575286865, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.1927, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.823695275785308, | |
| "eval_combined_score": 0.7905984033549933, | |
| "eval_f1": 0.7575015309246785, | |
| "eval_loss": 0.5163273811340332, | |
| "eval_runtime": 14.1681, | |
| "eval_samples_per_second": 2853.585, | |
| "eval_steps_per_second": 11.152, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 11376, | |
| "total_flos": 7.633075201391002e+16, | |
| "train_loss": 0.315434974960134, | |
| "train_runtime": 2030.1953, | |
| "train_samples_per_second": 8960.862, | |
| "train_steps_per_second": 35.021 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 71100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 7.633075201391002e+16, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |