Training in progress, step 21000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a93f403a198a0abf134a3fd5cbeca3aa8c16276f10e0b35daa2bc2bf8a2a6957
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18372560aadc54215809cfae0eaf7225bb168ffc940aa3b172c422f28f9cfff5
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d9c5f7443e1222c25c8a224aeec2cab3e754343ab09e424a8f337440ada3c79
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67a88db37888ba561bfce26ae8fef54113ba48b68f86826f4ed7d7cb198ed4fd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3205,6 +3205,84 @@
|
|
| 3205 |
"eval_samples_per_second": 22.729,
|
| 3206 |
"eval_steps_per_second": 5.682,
|
| 3207 |
"step": 20500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3208 |
}
|
| 3209 |
],
|
| 3210 |
"logging_steps": 50,
|
|
@@ -3224,7 +3302,7 @@
|
|
| 3224 |
"attributes": {}
|
| 3225 |
}
|
| 3226 |
},
|
| 3227 |
-
"total_flos":
|
| 3228 |
"train_batch_size": 4,
|
| 3229 |
"trial_name": null,
|
| 3230 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.0817028358578682,
|
| 3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-21000",
|
| 4 |
+
"epoch": 1.6800000000000002,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 21000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3205 |
"eval_samples_per_second": 22.729,
|
| 3206 |
"eval_steps_per_second": 5.682,
|
| 3207 |
"step": 20500
|
| 3208 |
+
},
|
| 3209 |
+
{
|
| 3210 |
+
"epoch": 1.6440000000000001,
|
| 3211 |
+
"grad_norm": 0.13027295470237732,
|
| 3212 |
+
"learning_rate": 5.3472e-06,
|
| 3213 |
+
"loss": 0.0551,
|
| 3214 |
+
"step": 20550
|
| 3215 |
+
},
|
| 3216 |
+
{
|
| 3217 |
+
"epoch": 1.6480000000000001,
|
| 3218 |
+
"grad_norm": 0.1394919753074646,
|
| 3219 |
+
"learning_rate": 5.2872e-06,
|
| 3220 |
+
"loss": 0.054,
|
| 3221 |
+
"step": 20600
|
| 3222 |
+
},
|
| 3223 |
+
{
|
| 3224 |
+
"epoch": 1.6520000000000001,
|
| 3225 |
+
"grad_norm": 0.16753709316253662,
|
| 3226 |
+
"learning_rate": 5.2272000000000005e-06,
|
| 3227 |
+
"loss": 0.0501,
|
| 3228 |
+
"step": 20650
|
| 3229 |
+
},
|
| 3230 |
+
{
|
| 3231 |
+
"epoch": 1.6560000000000001,
|
| 3232 |
+
"grad_norm": 0.1509876549243927,
|
| 3233 |
+
"learning_rate": 5.1672e-06,
|
| 3234 |
+
"loss": 0.0527,
|
| 3235 |
+
"step": 20700
|
| 3236 |
+
},
|
| 3237 |
+
{
|
| 3238 |
+
"epoch": 1.6600000000000001,
|
| 3239 |
+
"grad_norm": 0.13625292479991913,
|
| 3240 |
+
"learning_rate": 5.1072e-06,
|
| 3241 |
+
"loss": 0.0508,
|
| 3242 |
+
"step": 20750
|
| 3243 |
+
},
|
| 3244 |
+
{
|
| 3245 |
+
"epoch": 1.6640000000000001,
|
| 3246 |
+
"grad_norm": 0.1552583873271942,
|
| 3247 |
+
"learning_rate": 5.0472000000000006e-06,
|
| 3248 |
+
"loss": 0.0548,
|
| 3249 |
+
"step": 20800
|
| 3250 |
+
},
|
| 3251 |
+
{
|
| 3252 |
+
"epoch": 1.6680000000000001,
|
| 3253 |
+
"grad_norm": 0.1763962060213089,
|
| 3254 |
+
"learning_rate": 4.9872e-06,
|
| 3255 |
+
"loss": 0.0585,
|
| 3256 |
+
"step": 20850
|
| 3257 |
+
},
|
| 3258 |
+
{
|
| 3259 |
+
"epoch": 1.6720000000000002,
|
| 3260 |
+
"grad_norm": 0.11216771602630615,
|
| 3261 |
+
"learning_rate": 4.9272e-06,
|
| 3262 |
+
"loss": 0.0567,
|
| 3263 |
+
"step": 20900
|
| 3264 |
+
},
|
| 3265 |
+
{
|
| 3266 |
+
"epoch": 1.6760000000000002,
|
| 3267 |
+
"grad_norm": 0.08550629019737244,
|
| 3268 |
+
"learning_rate": 4.8672e-06,
|
| 3269 |
+
"loss": 0.0523,
|
| 3270 |
+
"step": 20950
|
| 3271 |
+
},
|
| 3272 |
+
{
|
| 3273 |
+
"epoch": 1.6800000000000002,
|
| 3274 |
+
"grad_norm": 0.11488083750009537,
|
| 3275 |
+
"learning_rate": 4.8072e-06,
|
| 3276 |
+
"loss": 0.0503,
|
| 3277 |
+
"step": 21000
|
| 3278 |
+
},
|
| 3279 |
+
{
|
| 3280 |
+
"epoch": 1.6800000000000002,
|
| 3281 |
+
"eval_loss": 0.0817028358578682,
|
| 3282 |
+
"eval_runtime": 88.097,
|
| 3283 |
+
"eval_samples_per_second": 22.702,
|
| 3284 |
+
"eval_steps_per_second": 5.676,
|
| 3285 |
+
"step": 21000
|
| 3286 |
}
|
| 3287 |
],
|
| 3288 |
"logging_steps": 50,
|
|
|
|
| 3302 |
"attributes": {}
|
| 3303 |
}
|
| 3304 |
},
|
| 3305 |
+
"total_flos": 5.115246280704e+16,
|
| 3306 |
"train_batch_size": 4,
|
| 3307 |
"trial_name": null,
|
| 3308 |
"trial_params": null
|