File size: 4,630 Bytes
4eba8be | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | {
"best_metric": 0.4153307378292084,
"best_model_checkpoint": "tiny_bert_rand_100_v1_qqp/checkpoint-4266",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 11376,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 1.7541351318359375,
"learning_rate": 4.9e-05,
"loss": 0.496,
"step": 1422
},
{
"epoch": 1.0,
"eval_accuracy": 0.7838486272569873,
"eval_combined_score": 0.7322145289141401,
"eval_f1": 0.6805804305712928,
"eval_loss": 0.4491816461086273,
"eval_runtime": 14.8017,
"eval_samples_per_second": 2731.443,
"eval_steps_per_second": 10.674,
"step": 1422
},
{
"epoch": 2.0,
"grad_norm": 2.7397406101226807,
"learning_rate": 4.8e-05,
"loss": 0.4089,
"step": 2844
},
{
"epoch": 2.0,
"eval_accuracy": 0.8033638387336136,
"eval_combined_score": 0.7612324811645597,
"eval_f1": 0.7191011235955056,
"eval_loss": 0.4195970892906189,
"eval_runtime": 14.1267,
"eval_samples_per_second": 2861.947,
"eval_steps_per_second": 11.184,
"step": 2844
},
{
"epoch": 3.0,
"grad_norm": 3.006162405014038,
"learning_rate": 4.7e-05,
"loss": 0.3518,
"step": 4266
},
{
"epoch": 3.0,
"eval_accuracy": 0.8110066782092505,
"eval_combined_score": 0.7764890548375805,
"eval_f1": 0.7419714314659103,
"eval_loss": 0.4153307378292084,
"eval_runtime": 14.1896,
"eval_samples_per_second": 2849.272,
"eval_steps_per_second": 11.135,
"step": 4266
},
{
"epoch": 4.0,
"grad_norm": 3.5791120529174805,
"learning_rate": 4.600000000000001e-05,
"loss": 0.3062,
"step": 5688
},
{
"epoch": 4.0,
"eval_accuracy": 0.8186495176848875,
"eval_combined_score": 0.780485228372211,
"eval_f1": 0.7423209390595347,
"eval_loss": 0.4295639395713806,
"eval_runtime": 14.2354,
"eval_samples_per_second": 2840.098,
"eval_steps_per_second": 11.099,
"step": 5688
},
{
"epoch": 5.0,
"grad_norm": 2.346965789794922,
"learning_rate": 4.5e-05,
"loss": 0.2689,
"step": 7110
},
{
"epoch": 5.0,
"eval_accuracy": 0.8198367548849864,
"eval_combined_score": 0.7865778193281611,
"eval_f1": 0.7533188837713357,
"eval_loss": 0.4466039538383484,
"eval_runtime": 13.8417,
"eval_samples_per_second": 2920.893,
"eval_steps_per_second": 11.415,
"step": 7110
},
{
"epoch": 6.0,
"grad_norm": 2.1513712406158447,
"learning_rate": 4.4000000000000006e-05,
"loss": 0.239,
"step": 8532
},
{
"epoch": 6.0,
"eval_accuracy": 0.8201582982933465,
"eval_combined_score": 0.7912292357791229,
"eval_f1": 0.7623001732648992,
"eval_loss": 0.4361380636692047,
"eval_runtime": 13.8654,
"eval_samples_per_second": 2915.885,
"eval_steps_per_second": 11.395,
"step": 8532
},
{
"epoch": 7.0,
"grad_norm": 4.002156734466553,
"learning_rate": 4.3e-05,
"loss": 0.2131,
"step": 9954
},
{
"epoch": 7.0,
"eval_accuracy": 0.8231016571852585,
"eval_combined_score": 0.7940583854332128,
"eval_f1": 0.765015113681167,
"eval_loss": 0.46639078855514526,
"eval_runtime": 13.9853,
"eval_samples_per_second": 2890.887,
"eval_steps_per_second": 11.298,
"step": 9954
},
{
"epoch": 8.0,
"grad_norm": 3.098388910293579,
"learning_rate": 4.2e-05,
"loss": 0.1896,
"step": 11376
},
{
"epoch": 8.0,
"eval_accuracy": 0.8200840959683403,
"eval_combined_score": 0.7939181654809773,
"eval_f1": 0.7677522349936143,
"eval_loss": 0.5052474141120911,
"eval_runtime": 13.808,
"eval_samples_per_second": 2928.009,
"eval_steps_per_second": 11.443,
"step": 11376
},
{
"epoch": 8.0,
"step": 11376,
"total_flos": 7.633075201391002e+16,
"train_loss": 0.3091887569293359,
"train_runtime": 2020.2759,
"train_samples_per_second": 9004.859,
"train_steps_per_second": 35.193
}
],
"logging_steps": 1,
"max_steps": 71100,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 7.633075201391002e+16,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}
|