Training in progress, step 22000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b24490e1afd177c9d7aa64e2bc93a14c723f2b07f34c096656faca30a819b31
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cb51a493e883d2bba3783d9a9c610f4b7ed8a2fdbc2ae094434c3938af33f10
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75610a62f0c2e3c2144553b21cd56625818792160128bd8489e6566f2e9cc991
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16420615b82a27af4c6aa0fff49c44fb5eed4e6ef5c3ebd2f44a0387c672c1ca
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3361,6 +3361,84 @@
|
|
| 3361 |
"eval_samples_per_second": 22.713,
|
| 3362 |
"eval_steps_per_second": 5.678,
|
| 3363 |
"step": 21500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3364 |
}
|
| 3365 |
],
|
| 3366 |
"logging_steps": 50,
|
|
@@ -3380,7 +3458,7 @@
|
|
| 3380 |
"attributes": {}
|
| 3381 |
}
|
| 3382 |
},
|
| 3383 |
-
"total_flos": 5.
|
| 3384 |
"train_batch_size": 4,
|
| 3385 |
"trial_name": null,
|
| 3386 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.08166228979825974,
|
| 3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-22000",
|
| 4 |
+
"epoch": 1.76,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 22000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3361 |
"eval_samples_per_second": 22.713,
|
| 3362 |
"eval_steps_per_second": 5.678,
|
| 3363 |
"step": 21500
|
| 3364 |
+
},
|
| 3365 |
+
{
|
| 3366 |
+
"epoch": 1.724,
|
| 3367 |
+
"grad_norm": 0.14097870886325836,
|
| 3368 |
+
"learning_rate": 4.1472e-06,
|
| 3369 |
+
"loss": 0.0488,
|
| 3370 |
+
"step": 21550
|
| 3371 |
+
},
|
| 3372 |
+
{
|
| 3373 |
+
"epoch": 1.728,
|
| 3374 |
+
"grad_norm": 0.1706978976726532,
|
| 3375 |
+
"learning_rate": 4.0872000000000004e-06,
|
| 3376 |
+
"loss": 0.0496,
|
| 3377 |
+
"step": 21600
|
| 3378 |
+
},
|
| 3379 |
+
{
|
| 3380 |
+
"epoch": 1.732,
|
| 3381 |
+
"grad_norm": 0.14371682703495026,
|
| 3382 |
+
"learning_rate": 4.0272e-06,
|
| 3383 |
+
"loss": 0.0557,
|
| 3384 |
+
"step": 21650
|
| 3385 |
+
},
|
| 3386 |
+
{
|
| 3387 |
+
"epoch": 1.736,
|
| 3388 |
+
"grad_norm": 0.1176629364490509,
|
| 3389 |
+
"learning_rate": 3.9672e-06,
|
| 3390 |
+
"loss": 0.0531,
|
| 3391 |
+
"step": 21700
|
| 3392 |
+
},
|
| 3393 |
+
{
|
| 3394 |
+
"epoch": 1.74,
|
| 3395 |
+
"grad_norm": 0.17272049188613892,
|
| 3396 |
+
"learning_rate": 3.9072e-06,
|
| 3397 |
+
"loss": 0.0604,
|
| 3398 |
+
"step": 21750
|
| 3399 |
+
},
|
| 3400 |
+
{
|
| 3401 |
+
"epoch": 1.744,
|
| 3402 |
+
"grad_norm": 0.15085135400295258,
|
| 3403 |
+
"learning_rate": 3.8472e-06,
|
| 3404 |
+
"loss": 0.0537,
|
| 3405 |
+
"step": 21800
|
| 3406 |
+
},
|
| 3407 |
+
{
|
| 3408 |
+
"epoch": 1.748,
|
| 3409 |
+
"grad_norm": 0.11613863706588745,
|
| 3410 |
+
"learning_rate": 3.7884e-06,
|
| 3411 |
+
"loss": 0.0544,
|
| 3412 |
+
"step": 21850
|
| 3413 |
+
},
|
| 3414 |
+
{
|
| 3415 |
+
"epoch": 1.752,
|
| 3416 |
+
"grad_norm": 0.13247713446617126,
|
| 3417 |
+
"learning_rate": 3.7284e-06,
|
| 3418 |
+
"loss": 0.0556,
|
| 3419 |
+
"step": 21900
|
| 3420 |
+
},
|
| 3421 |
+
{
|
| 3422 |
+
"epoch": 1.756,
|
| 3423 |
+
"grad_norm": 0.1755180060863495,
|
| 3424 |
+
"learning_rate": 3.6684e-06,
|
| 3425 |
+
"loss": 0.0563,
|
| 3426 |
+
"step": 21950
|
| 3427 |
+
},
|
| 3428 |
+
{
|
| 3429 |
+
"epoch": 1.76,
|
| 3430 |
+
"grad_norm": 0.07918363809585571,
|
| 3431 |
+
"learning_rate": 3.6084e-06,
|
| 3432 |
+
"loss": 0.0508,
|
| 3433 |
+
"step": 22000
|
| 3434 |
+
},
|
| 3435 |
+
{
|
| 3436 |
+
"epoch": 1.76,
|
| 3437 |
+
"eval_loss": 0.08166228979825974,
|
| 3438 |
+
"eval_runtime": 88.0895,
|
| 3439 |
+
"eval_samples_per_second": 22.704,
|
| 3440 |
+
"eval_steps_per_second": 5.676,
|
| 3441 |
+
"step": 22000
|
| 3442 |
}
|
| 3443 |
],
|
| 3444 |
"logging_steps": 50,
|
|
|
|
| 3458 |
"attributes": {}
|
| 3459 |
}
|
| 3460 |
},
|
| 3461 |
+
"total_flos": 5.358829436928e+16,
|
| 3462 |
"train_batch_size": 4,
|
| 3463 |
"trial_name": null,
|
| 3464 |
"trial_params": null
|