{ "best_metric": 0.4193161725997925, "best_model_checkpoint": "tiny_bert_km_50_v1_qqp/checkpoint-4266", "epoch": 8.0, "eval_steps": 500, "global_step": 11376, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.748605489730835, "learning_rate": 4.9e-05, "loss": 0.5011, "step": 1422 }, { "epoch": 1.0, "eval_accuracy": 0.7840217660153351, "eval_combined_score": 0.732481505632631, "eval_f1": 0.6809412452499269, "eval_loss": 0.4513613283634186, "eval_runtime": 15.0073, "eval_samples_per_second": 2694.026, "eval_steps_per_second": 10.528, "step": 1422 }, { "epoch": 2.0, "grad_norm": 2.0197439193725586, "learning_rate": 4.8e-05, "loss": 0.4146, "step": 2844 }, { "epoch": 2.0, "eval_accuracy": 0.7996784565916398, "eval_combined_score": 0.7551677267417489, "eval_f1": 0.7106569968918581, "eval_loss": 0.42334410548210144, "eval_runtime": 14.0744, "eval_samples_per_second": 2872.589, "eval_steps_per_second": 11.226, "step": 2844 }, { "epoch": 3.0, "grad_norm": 2.5988855361938477, "learning_rate": 4.7e-05, "loss": 0.3605, "step": 4266 }, { "epoch": 3.0, "eval_accuracy": 0.8108582735592382, "eval_combined_score": 0.7698776779218888, "eval_f1": 0.7288970822845393, "eval_loss": 0.4193161725997925, "eval_runtime": 14.0194, "eval_samples_per_second": 2883.855, "eval_steps_per_second": 11.27, "step": 4266 }, { "epoch": 4.0, "grad_norm": 2.5036709308624268, "learning_rate": 4.600000000000001e-05, "loss": 0.3153, "step": 5688 }, { "epoch": 4.0, "eval_accuracy": 0.8142715805095226, "eval_combined_score": 0.7768661951678455, "eval_f1": 0.7394608098261685, "eval_loss": 0.42982715368270874, "eval_runtime": 14.0649, "eval_samples_per_second": 2874.526, "eval_steps_per_second": 11.234, "step": 5688 }, { "epoch": 5.0, "grad_norm": 2.8022961616516113, "learning_rate": 4.5e-05, "loss": 0.2768, "step": 7110 }, { "epoch": 5.0, "eval_accuracy": 0.8161019045263418, "eval_combined_score": 0.7811781492871632, "eval_f1": 0.7462543940479847, "eval_loss": 0.4367225170135498, "eval_runtime": 14.2004, "eval_samples_per_second": 2847.095, "eval_steps_per_second": 11.126, "step": 7110 }, { "epoch": 6.0, "grad_norm": 3.0807063579559326, "learning_rate": 4.4000000000000006e-05, "loss": 0.245, "step": 8532 }, { "epoch": 6.0, "eval_accuracy": 0.8136779619094732, "eval_combined_score": 0.7862144813550569, "eval_f1": 0.7587510008006405, "eval_loss": 0.4542311131954193, "eval_runtime": 14.0405, "eval_samples_per_second": 2879.525, "eval_steps_per_second": 11.253, "step": 8532 }, { "epoch": 7.0, "grad_norm": 4.5525922775268555, "learning_rate": 4.3e-05, "loss": 0.2174, "step": 9954 }, { "epoch": 7.0, "eval_accuracy": 0.8195152114766263, "eval_combined_score": 0.7896423794008647, "eval_f1": 0.7597695473251029, "eval_loss": 0.47380971908569336, "eval_runtime": 14.0747, "eval_samples_per_second": 2872.521, "eval_steps_per_second": 11.226, "step": 9954 }, { "epoch": 8.0, "grad_norm": 3.2192418575286865, "learning_rate": 4.2e-05, "loss": 0.1927, "step": 11376 }, { "epoch": 8.0, "eval_accuracy": 0.823695275785308, "eval_combined_score": 0.7905984033549933, "eval_f1": 0.7575015309246785, "eval_loss": 0.5163273811340332, "eval_runtime": 14.1681, "eval_samples_per_second": 2853.585, "eval_steps_per_second": 11.152, "step": 11376 }, { "epoch": 8.0, "step": 11376, "total_flos": 7.633075201391002e+16, "train_loss": 0.315434974960134, "train_runtime": 2030.1953, "train_samples_per_second": 8960.862, "train_steps_per_second": 35.021 } ], "logging_steps": 1, "max_steps": 71100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 7.633075201391002e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }