File size: 5,322 Bytes
642da08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
{
"best_global_step": 2000,
"best_metric": 0.9999500948197181,
"best_model_checkpoint": "trained_models/intent_classifier/checkpoint-2000",
"epoch": 1.596169193934557,
"eval_steps": 1000,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0007980845969672786,
"grad_norm": 51713.75390625,
"learning_rate": 0.0,
"loss": 0.695,
"step": 1
},
{
"epoch": 0.07980845969672785,
"grad_norm": 91372.3984375,
"learning_rate": 1.98e-06,
"loss": 0.6689,
"step": 100
},
{
"epoch": 0.1596169193934557,
"grad_norm": 47582.55078125,
"learning_rate": 3.980000000000001e-06,
"loss": 0.2889,
"step": 200
},
{
"epoch": 0.23942537909018355,
"grad_norm": 2023.180908203125,
"learning_rate": 5.98e-06,
"loss": 0.0206,
"step": 300
},
{
"epoch": 0.3192338387869114,
"grad_norm": 105.5784912109375,
"learning_rate": 7.980000000000002e-06,
"loss": 0.0006,
"step": 400
},
{
"epoch": 0.39904229848363926,
"grad_norm": 2.8826353549957275,
"learning_rate": 9.980000000000001e-06,
"loss": 0.0016,
"step": 500
},
{
"epoch": 0.4788507581803671,
"grad_norm": 0.18906153738498688,
"learning_rate": 1.198e-05,
"loss": 0.0,
"step": 600
},
{
"epoch": 0.5586592178770949,
"grad_norm": 0.06490982323884964,
"learning_rate": 1.398e-05,
"loss": 0.001,
"step": 700
},
{
"epoch": 0.6384676775738228,
"grad_norm": 0.07483379542827606,
"learning_rate": 1.5980000000000003e-05,
"loss": 0.0026,
"step": 800
},
{
"epoch": 0.7182761372705507,
"grad_norm": 0.029934018850326538,
"learning_rate": 1.798e-05,
"loss": 0.0027,
"step": 900
},
{
"epoch": 0.7980845969672785,
"grad_norm": 0.023010307922959328,
"learning_rate": 1.9980000000000002e-05,
"loss": 0.0049,
"step": 1000
},
{
"epoch": 0.7980845969672785,
"eval_accuracy": 0.9999001896396846,
"eval_f1": 0.9999001896386902,
"eval_f1_macro": 0.9999001896386903,
"eval_loss": 0.0016707783797755837,
"eval_runtime": 88.2175,
"eval_samples_per_second": 227.143,
"eval_steps_per_second": 1.78,
"step": 1000
},
{
"epoch": 0.8778930566640064,
"grad_norm": 0.030565178021788597,
"learning_rate": 1.9282348677056906e-05,
"loss": 0.0091,
"step": 1100
},
{
"epoch": 0.9577015163607342,
"grad_norm": 0.025367770344018936,
"learning_rate": 1.8557448350851758e-05,
"loss": 0.0029,
"step": 1200
},
{
"epoch": 1.037509976057462,
"grad_norm": 0.01835496723651886,
"learning_rate": 1.7832548024646613e-05,
"loss": 0.0025,
"step": 1300
},
{
"epoch": 1.1173184357541899,
"grad_norm": 0.013528961688280106,
"learning_rate": 1.7107647698441465e-05,
"loss": 0.0004,
"step": 1400
},
{
"epoch": 1.1971268954509178,
"grad_norm": 0.013094124384224415,
"learning_rate": 1.638274737223632e-05,
"loss": 0.0043,
"step": 1500
},
{
"epoch": 1.2769353551476457,
"grad_norm": 0.010992957279086113,
"learning_rate": 1.565784704603117e-05,
"loss": 0.0001,
"step": 1600
},
{
"epoch": 1.3567438148443736,
"grad_norm": 0.009696166031062603,
"learning_rate": 1.4932946719826025e-05,
"loss": 0.0001,
"step": 1700
},
{
"epoch": 1.4365522745411012,
"grad_norm": 0.008244643919169903,
"learning_rate": 1.4208046393620877e-05,
"loss": 0.0,
"step": 1800
},
{
"epoch": 1.5163607342378294,
"grad_norm": 0.00833104643970728,
"learning_rate": 1.348314606741573e-05,
"loss": 0.0,
"step": 1900
},
{
"epoch": 1.596169193934557,
"grad_norm": 0.008447665721178055,
"learning_rate": 1.2758245741210584e-05,
"loss": 0.0031,
"step": 2000
},
{
"epoch": 1.596169193934557,
"eval_accuracy": 0.9999500948198423,
"eval_f1": 0.9999500948197181,
"eval_f1_macro": 0.999950094819718,
"eval_loss": 0.000961420766543597,
"eval_runtime": 88.1534,
"eval_samples_per_second": 227.308,
"eval_steps_per_second": 1.781,
"step": 2000
}
],
"logging_steps": 100,
"max_steps": 3759,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 8475131698612224.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}
|