Training in progress, step 23000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 891558696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40edcfcd3eadd3934df8237241fafe734256be703a0e3946a4e6ee14e6bf5a22
|
| 3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1783272762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d296490c405857ea96e4fbe1deae357928a09b842487dd9e87ba7f64056b04e
|
| 3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b5df9c2ed680cbe1eadef613230eab99b9589effb616e86f824a6c4d292f402
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5dcf2c6dc0ad63a4d7cbede289213f75c3cbabb91c5c15238e6438903f0efdfd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.0815029963850975,
|
| 3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-22500",
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -3517,6 +3517,84 @@
|
|
| 3517 |
"eval_samples_per_second": 22.709,
|
| 3518 |
"eval_steps_per_second": 5.677,
|
| 3519 |
"step": 22500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3520 |
}
|
| 3521 |
],
|
| 3522 |
"logging_steps": 50,
|
|
@@ -3536,7 +3614,7 @@
|
|
| 3536 |
"attributes": {}
|
| 3537 |
}
|
| 3538 |
},
|
| 3539 |
-
"total_flos": 5.
|
| 3540 |
"train_batch_size": 4,
|
| 3541 |
"trial_name": null,
|
| 3542 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.0815029963850975,
|
| 3 |
"best_model_checkpoint": "./fine-tuned/checkpoint-22500",
|
| 4 |
+
"epoch": 1.8399999999999999,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 23000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 3517 |
"eval_samples_per_second": 22.709,
|
| 3518 |
"eval_steps_per_second": 5.677,
|
| 3519 |
"step": 22500
|
| 3520 |
+
},
|
| 3521 |
+
{
|
| 3522 |
+
"epoch": 1.804,
|
| 3523 |
+
"grad_norm": 0.10228476673364639,
|
| 3524 |
+
"learning_rate": 2.9496e-06,
|
| 3525 |
+
"loss": 0.0534,
|
| 3526 |
+
"step": 22550
|
| 3527 |
+
},
|
| 3528 |
+
{
|
| 3529 |
+
"epoch": 1.808,
|
| 3530 |
+
"grad_norm": 0.12501686811447144,
|
| 3531 |
+
"learning_rate": 2.8896000000000003e-06,
|
| 3532 |
+
"loss": 0.0529,
|
| 3533 |
+
"step": 22600
|
| 3534 |
+
},
|
| 3535 |
+
{
|
| 3536 |
+
"epoch": 1.812,
|
| 3537 |
+
"grad_norm": 0.10296665877103806,
|
| 3538 |
+
"learning_rate": 2.8296e-06,
|
| 3539 |
+
"loss": 0.0575,
|
| 3540 |
+
"step": 22650
|
| 3541 |
+
},
|
| 3542 |
+
{
|
| 3543 |
+
"epoch": 1.8159999999999998,
|
| 3544 |
+
"grad_norm": 0.15849712491035461,
|
| 3545 |
+
"learning_rate": 2.7696e-06,
|
| 3546 |
+
"loss": 0.054,
|
| 3547 |
+
"step": 22700
|
| 3548 |
+
},
|
| 3549 |
+
{
|
| 3550 |
+
"epoch": 1.8199999999999998,
|
| 3551 |
+
"grad_norm": 0.0910249873995781,
|
| 3552 |
+
"learning_rate": 2.7096e-06,
|
| 3553 |
+
"loss": 0.0533,
|
| 3554 |
+
"step": 22750
|
| 3555 |
+
},
|
| 3556 |
+
{
|
| 3557 |
+
"epoch": 1.8239999999999998,
|
| 3558 |
+
"grad_norm": 0.14466793835163116,
|
| 3559 |
+
"learning_rate": 2.6496e-06,
|
| 3560 |
+
"loss": 0.0512,
|
| 3561 |
+
"step": 22800
|
| 3562 |
+
},
|
| 3563 |
+
{
|
| 3564 |
+
"epoch": 1.8279999999999998,
|
| 3565 |
+
"grad_norm": 0.21741582453250885,
|
| 3566 |
+
"learning_rate": 2.5895999999999997e-06,
|
| 3567 |
+
"loss": 0.0574,
|
| 3568 |
+
"step": 22850
|
| 3569 |
+
},
|
| 3570 |
+
{
|
| 3571 |
+
"epoch": 1.8319999999999999,
|
| 3572 |
+
"grad_norm": 0.14835171401500702,
|
| 3573 |
+
"learning_rate": 2.5296000000000003e-06,
|
| 3574 |
+
"loss": 0.0569,
|
| 3575 |
+
"step": 22900
|
| 3576 |
+
},
|
| 3577 |
+
{
|
| 3578 |
+
"epoch": 1.8359999999999999,
|
| 3579 |
+
"grad_norm": 0.13074947893619537,
|
| 3580 |
+
"learning_rate": 2.4696e-06,
|
| 3581 |
+
"loss": 0.0555,
|
| 3582 |
+
"step": 22950
|
| 3583 |
+
},
|
| 3584 |
+
{
|
| 3585 |
+
"epoch": 1.8399999999999999,
|
| 3586 |
+
"grad_norm": 0.14285966753959656,
|
| 3587 |
+
"learning_rate": 2.4096e-06,
|
| 3588 |
+
"loss": 0.0531,
|
| 3589 |
+
"step": 23000
|
| 3590 |
+
},
|
| 3591 |
+
{
|
| 3592 |
+
"epoch": 1.8399999999999999,
|
| 3593 |
+
"eval_loss": 0.08154193311929703,
|
| 3594 |
+
"eval_runtime": 88.0742,
|
| 3595 |
+
"eval_samples_per_second": 22.708,
|
| 3596 |
+
"eval_steps_per_second": 5.677,
|
| 3597 |
+
"step": 23000
|
| 3598 |
}
|
| 3599 |
],
|
| 3600 |
"logging_steps": 50,
|
|
|
|
| 3614 |
"attributes": {}
|
| 3615 |
}
|
| 3616 |
},
|
| 3617 |
+
"total_flos": 5.602412593152e+16,
|
| 3618 |
"train_batch_size": 4,
|
| 3619 |
"trial_name": null,
|
| 3620 |
"trial_params": null
|