{ "best_metric": 0.4153307378292084, "best_model_checkpoint": "tiny_bert_rand_100_v1_qqp/checkpoint-4266", "epoch": 8.0, "eval_steps": 500, "global_step": 11376, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.7541351318359375, "learning_rate": 4.9e-05, "loss": 0.496, "step": 1422 }, { "epoch": 1.0, "eval_accuracy": 0.7838486272569873, "eval_combined_score": 0.7322145289141401, "eval_f1": 0.6805804305712928, "eval_loss": 0.4491816461086273, "eval_runtime": 14.8017, "eval_samples_per_second": 2731.443, "eval_steps_per_second": 10.674, "step": 1422 }, { "epoch": 2.0, "grad_norm": 2.7397406101226807, "learning_rate": 4.8e-05, "loss": 0.4089, "step": 2844 }, { "epoch": 2.0, "eval_accuracy": 0.8033638387336136, "eval_combined_score": 0.7612324811645597, "eval_f1": 0.7191011235955056, "eval_loss": 0.4195970892906189, "eval_runtime": 14.1267, "eval_samples_per_second": 2861.947, "eval_steps_per_second": 11.184, "step": 2844 }, { "epoch": 3.0, "grad_norm": 3.006162405014038, "learning_rate": 4.7e-05, "loss": 0.3518, "step": 4266 }, { "epoch": 3.0, "eval_accuracy": 0.8110066782092505, "eval_combined_score": 0.7764890548375805, "eval_f1": 0.7419714314659103, "eval_loss": 0.4153307378292084, "eval_runtime": 14.1896, "eval_samples_per_second": 2849.272, "eval_steps_per_second": 11.135, "step": 4266 }, { "epoch": 4.0, "grad_norm": 3.5791120529174805, "learning_rate": 4.600000000000001e-05, "loss": 0.3062, "step": 5688 }, { "epoch": 4.0, "eval_accuracy": 0.8186495176848875, "eval_combined_score": 0.780485228372211, "eval_f1": 0.7423209390595347, "eval_loss": 0.4295639395713806, "eval_runtime": 14.2354, "eval_samples_per_second": 2840.098, "eval_steps_per_second": 11.099, "step": 5688 }, { "epoch": 5.0, "grad_norm": 2.346965789794922, "learning_rate": 4.5e-05, "loss": 0.2689, "step": 7110 }, { "epoch": 5.0, "eval_accuracy": 0.8198367548849864, "eval_combined_score": 0.7865778193281611, "eval_f1": 0.7533188837713357, "eval_loss": 0.4466039538383484, "eval_runtime": 13.8417, "eval_samples_per_second": 2920.893, "eval_steps_per_second": 11.415, "step": 7110 }, { "epoch": 6.0, "grad_norm": 2.1513712406158447, "learning_rate": 4.4000000000000006e-05, "loss": 0.239, "step": 8532 }, { "epoch": 6.0, "eval_accuracy": 0.8201582982933465, "eval_combined_score": 0.7912292357791229, "eval_f1": 0.7623001732648992, "eval_loss": 0.4361380636692047, "eval_runtime": 13.8654, "eval_samples_per_second": 2915.885, "eval_steps_per_second": 11.395, "step": 8532 }, { "epoch": 7.0, "grad_norm": 4.002156734466553, "learning_rate": 4.3e-05, "loss": 0.2131, "step": 9954 }, { "epoch": 7.0, "eval_accuracy": 0.8231016571852585, "eval_combined_score": 0.7940583854332128, "eval_f1": 0.765015113681167, "eval_loss": 0.46639078855514526, "eval_runtime": 13.9853, "eval_samples_per_second": 2890.887, "eval_steps_per_second": 11.298, "step": 9954 }, { "epoch": 8.0, "grad_norm": 3.098388910293579, "learning_rate": 4.2e-05, "loss": 0.1896, "step": 11376 }, { "epoch": 8.0, "eval_accuracy": 0.8200840959683403, "eval_combined_score": 0.7939181654809773, "eval_f1": 0.7677522349936143, "eval_loss": 0.5052474141120911, "eval_runtime": 13.808, "eval_samples_per_second": 2928.009, "eval_steps_per_second": 11.443, "step": 11376 }, { "epoch": 8.0, "step": 11376, "total_flos": 7.633075201391002e+16, "train_loss": 0.3091887569293359, "train_runtime": 2020.2759, "train_samples_per_second": 9004.859, "train_steps_per_second": 35.193 } ], "logging_steps": 1, "max_steps": 71100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 7.633075201391002e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }