Training in progress, step 20500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f364037fe3d6208b2c05dda635ce09c71590d8662e232f0b7b434a1610e5c6b
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b999ec0e9bc401face62bc16ac08f4e745f2cd6b0ffba6a9f05615f8c9650e5f
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91d0e4637157719f5fcffcd5d4a99e903acaab012174cc7599b33a508d13c5ca
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3a5ed47396b325271b233c59cffa14dc5086d4af5c552b3c7216a7a0ac3fa86
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.08186879754066467,
|
| 3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-19000",
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3127,6 +3127,84 @@
|
|
| 3127 |
"eval_samples_per_second": 22.716,
|
| 3128 |
"eval_steps_per_second": 5.679,
|
| 3129 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3130 |
}
|
| 3131 |
],
|
| 3132 |
"logging_steps": 50,
|
|
@@ -3146,7 +3224,7 @@
|
|
| 3146 |
"attributes": {}
|
| 3147 |
}
|
| 3148 |
},
|
| 3149 |
-
"total_flos": 4.
|
| 3150 |
"train_batch_size": 4,
|
| 3151 |
"trial_name": null,
|
| 3152 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.08186879754066467,
|
| 3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-19000",
|
| 4 |
+
"epoch": 1.6400000000000001,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 20500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3127 |
"eval_samples_per_second": 22.716,
|
| 3128 |
"eval_steps_per_second": 5.679,
|
| 3129 |
"step": 20000
|
| 3130 |
+
},
|
| 3131 |
+
{
|
| 3132 |
+
"epoch": 1.604,
|
| 3133 |
+
"grad_norm": 0.18323849141597748,
|
| 3134 |
+
"learning_rate": 5.9472e-06,
|
| 3135 |
+
"loss": 0.0555,
|
| 3136 |
+
"step": 20050
|
| 3137 |
+
},
|
| 3138 |
+
{
|
| 3139 |
+
"epoch": 1.608,
|
| 3140 |
+
"grad_norm": 0.10411707311868668,
|
| 3141 |
+
"learning_rate": 5.8872e-06,
|
| 3142 |
+
"loss": 0.0506,
|
| 3143 |
+
"step": 20100
|
| 3144 |
+
},
|
| 3145 |
+
{
|
| 3146 |
+
"epoch": 1.612,
|
| 3147 |
+
"grad_norm": 0.18998867273330688,
|
| 3148 |
+
"learning_rate": 5.8272e-06,
|
| 3149 |
+
"loss": 0.0522,
|
| 3150 |
+
"step": 20150
|
| 3151 |
+
},
|
| 3152 |
+
{
|
| 3153 |
+
"epoch": 1.616,
|
| 3154 |
+
"grad_norm": 0.159571573138237,
|
| 3155 |
+
"learning_rate": 5.7672e-06,
|
| 3156 |
+
"loss": 0.0527,
|
| 3157 |
+
"step": 20200
|
| 3158 |
+
},
|
| 3159 |
+
{
|
| 3160 |
+
"epoch": 1.62,
|
| 3161 |
+
"grad_norm": 0.10246822983026505,
|
| 3162 |
+
"learning_rate": 5.7072e-06,
|
| 3163 |
+
"loss": 0.0517,
|
| 3164 |
+
"step": 20250
|
| 3165 |
+
},
|
| 3166 |
+
{
|
| 3167 |
+
"epoch": 1.624,
|
| 3168 |
+
"grad_norm": 0.17232735455036163,
|
| 3169 |
+
"learning_rate": 5.6472e-06,
|
| 3170 |
+
"loss": 0.0559,
|
| 3171 |
+
"step": 20300
|
| 3172 |
+
},
|
| 3173 |
+
{
|
| 3174 |
+
"epoch": 1.6280000000000001,
|
| 3175 |
+
"grad_norm": 0.13078981637954712,
|
| 3176 |
+
"learning_rate": 5.5872e-06,
|
| 3177 |
+
"loss": 0.0497,
|
| 3178 |
+
"step": 20350
|
| 3179 |
+
},
|
| 3180 |
+
{
|
| 3181 |
+
"epoch": 1.6320000000000001,
|
| 3182 |
+
"grad_norm": 0.06083545461297035,
|
| 3183 |
+
"learning_rate": 5.527199999999999e-06,
|
| 3184 |
+
"loss": 0.0502,
|
| 3185 |
+
"step": 20400
|
| 3186 |
+
},
|
| 3187 |
+
{
|
| 3188 |
+
"epoch": 1.6360000000000001,
|
| 3189 |
+
"grad_norm": 0.1311408132314682,
|
| 3190 |
+
"learning_rate": 5.467200000000001e-06,
|
| 3191 |
+
"loss": 0.0572,
|
| 3192 |
+
"step": 20450
|
| 3193 |
+
},
|
| 3194 |
+
{
|
| 3195 |
+
"epoch": 1.6400000000000001,
|
| 3196 |
+
"grad_norm": 0.16154557466506958,
|
| 3197 |
+
"learning_rate": 5.4072000000000005e-06,
|
| 3198 |
+
"loss": 0.0545,
|
| 3199 |
+
"step": 20500
|
| 3200 |
+
},
|
| 3201 |
+
{
|
| 3202 |
+
"epoch": 1.6400000000000001,
|
| 3203 |
+
"eval_loss": 0.08202797174453735,
|
| 3204 |
+
"eval_runtime": 87.9943,
|
| 3205 |
+
"eval_samples_per_second": 22.729,
|
| 3206 |
+
"eval_steps_per_second": 5.682,
|
| 3207 |
+
"step": 20500
|
| 3208 |
}
|
| 3209 |
],
|
| 3210 |
"logging_steps": 50,
|
|
|
|
| 3224 |
"attributes": {}
|
| 3225 |
}
|
| 3226 |
},
|
| 3227 |
+
"total_flos": 4.993454702592e+16,
|
| 3228 |
"train_batch_size": 4,
|
| 3229 |
"trial_name": null,
|
| 3230 |
"trial_params": null
|