| { | |
| "best_metric": 0.4153307378292084, | |
| "best_model_checkpoint": "tiny_bert_rand_100_v1_qqp/checkpoint-4266", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 11376, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.7541351318359375, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.496, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7838486272569873, | |
| "eval_combined_score": 0.7322145289141401, | |
| "eval_f1": 0.6805804305712928, | |
| "eval_loss": 0.4491816461086273, | |
| "eval_runtime": 14.8017, | |
| "eval_samples_per_second": 2731.443, | |
| "eval_steps_per_second": 10.674, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.7397406101226807, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.4089, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8033638387336136, | |
| "eval_combined_score": 0.7612324811645597, | |
| "eval_f1": 0.7191011235955056, | |
| "eval_loss": 0.4195970892906189, | |
| "eval_runtime": 14.1267, | |
| "eval_samples_per_second": 2861.947, | |
| "eval_steps_per_second": 11.184, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.006162405014038, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.3518, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8110066782092505, | |
| "eval_combined_score": 0.7764890548375805, | |
| "eval_f1": 0.7419714314659103, | |
| "eval_loss": 0.4153307378292084, | |
| "eval_runtime": 14.1896, | |
| "eval_samples_per_second": 2849.272, | |
| "eval_steps_per_second": 11.135, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.5791120529174805, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.3062, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8186495176848875, | |
| "eval_combined_score": 0.780485228372211, | |
| "eval_f1": 0.7423209390595347, | |
| "eval_loss": 0.4295639395713806, | |
| "eval_runtime": 14.2354, | |
| "eval_samples_per_second": 2840.098, | |
| "eval_steps_per_second": 11.099, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.346965789794922, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.2689, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8198367548849864, | |
| "eval_combined_score": 0.7865778193281611, | |
| "eval_f1": 0.7533188837713357, | |
| "eval_loss": 0.4466039538383484, | |
| "eval_runtime": 13.8417, | |
| "eval_samples_per_second": 2920.893, | |
| "eval_steps_per_second": 11.415, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 2.1513712406158447, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.239, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8201582982933465, | |
| "eval_combined_score": 0.7912292357791229, | |
| "eval_f1": 0.7623001732648992, | |
| "eval_loss": 0.4361380636692047, | |
| "eval_runtime": 13.8654, | |
| "eval_samples_per_second": 2915.885, | |
| "eval_steps_per_second": 11.395, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 4.002156734466553, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.2131, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8231016571852585, | |
| "eval_combined_score": 0.7940583854332128, | |
| "eval_f1": 0.765015113681167, | |
| "eval_loss": 0.46639078855514526, | |
| "eval_runtime": 13.9853, | |
| "eval_samples_per_second": 2890.887, | |
| "eval_steps_per_second": 11.298, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 3.098388910293579, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.1896, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8200840959683403, | |
| "eval_combined_score": 0.7939181654809773, | |
| "eval_f1": 0.7677522349936143, | |
| "eval_loss": 0.5052474141120911, | |
| "eval_runtime": 13.808, | |
| "eval_samples_per_second": 2928.009, | |
| "eval_steps_per_second": 11.443, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 11376, | |
| "total_flos": 7.633075201391002e+16, | |
| "train_loss": 0.3091887569293359, | |
| "train_runtime": 2020.2759, | |
| "train_samples_per_second": 9004.859, | |
| "train_steps_per_second": 35.193 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 71100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 7.633075201391002e+16, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |