Training in progress, step 440000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a4ad94b9d5ed920cecf2394888d2a87ad3ee893c3c5ddbd5617ff00b81d3e84
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c579869ee7eca1f3cfb28c1e8c5c1a2c4c07c47eb9cbbb5f2453c93bc559b64a
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1894f32b2441ea8820978bbb44f8f2d9ce0a579e669301efbd1655378591798a
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -8606,11 +8606,211 @@
|
|
| 8606 |
"eval_samples_per_second": 1115.617,
|
| 8607 |
"eval_steps_per_second": 17.485,
|
| 8608 |
"step": 430000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8609 |
}
|
| 8610 |
],
|
| 8611 |
"max_steps": 500000,
|
| 8612 |
"num_train_epochs": 12,
|
| 8613 |
-
"total_flos": 1.
|
| 8614 |
"trial_name": null,
|
| 8615 |
"trial_params": null
|
| 8616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.814418272662383,
|
| 5 |
+
"global_step": 440000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 8606 |
"eval_samples_per_second": 1115.617,
|
| 8607 |
"eval_steps_per_second": 17.485,
|
| 8608 |
"step": 430000
|
| 8609 |
+
},
|
| 8610 |
+
{
|
| 8611 |
+
"epoch": 9.6,
|
| 8612 |
+
"learning_rate": 2.5050805238106804e-05,
|
| 8613 |
+
"loss": 0.2561,
|
| 8614 |
+
"step": 430500
|
| 8615 |
+
},
|
| 8616 |
+
{
|
| 8617 |
+
"epoch": 9.61,
|
| 8618 |
+
"learning_rate": 2.4838784403798542e-05,
|
| 8619 |
+
"loss": 0.256,
|
| 8620 |
+
"step": 431000
|
| 8621 |
+
},
|
| 8622 |
+
{
|
| 8623 |
+
"epoch": 9.61,
|
| 8624 |
+
"eval_loss": 0.23836444318294525,
|
| 8625 |
+
"eval_runtime": 2.1211,
|
| 8626 |
+
"eval_samples_per_second": 1082.938,
|
| 8627 |
+
"eval_steps_per_second": 16.972,
|
| 8628 |
+
"step": 431000
|
| 8629 |
+
},
|
| 8630 |
+
{
|
| 8631 |
+
"epoch": 9.62,
|
| 8632 |
+
"learning_rate": 2.4628186991690346e-05,
|
| 8633 |
+
"loss": 0.256,
|
| 8634 |
+
"step": 431500
|
| 8635 |
+
},
|
| 8636 |
+
{
|
| 8637 |
+
"epoch": 9.64,
|
| 8638 |
+
"learning_rate": 2.4419015304841797e-05,
|
| 8639 |
+
"loss": 0.2562,
|
| 8640 |
+
"step": 432000
|
| 8641 |
+
},
|
| 8642 |
+
{
|
| 8643 |
+
"epoch": 9.64,
|
| 8644 |
+
"eval_loss": 0.2407396286725998,
|
| 8645 |
+
"eval_runtime": 2.084,
|
| 8646 |
+
"eval_samples_per_second": 1102.19,
|
| 8647 |
+
"eval_steps_per_second": 17.274,
|
| 8648 |
+
"step": 432000
|
| 8649 |
+
},
|
| 8650 |
+
{
|
| 8651 |
+
"epoch": 9.65,
|
| 8652 |
+
"learning_rate": 2.4211271630720957e-05,
|
| 8653 |
+
"loss": 0.2559,
|
| 8654 |
+
"step": 432500
|
| 8655 |
+
},
|
| 8656 |
+
{
|
| 8657 |
+
"epoch": 9.66,
|
| 8658 |
+
"learning_rate": 2.4004958241179347e-05,
|
| 8659 |
+
"loss": 0.2558,
|
| 8660 |
+
"step": 433000
|
| 8661 |
+
},
|
| 8662 |
+
{
|
| 8663 |
+
"epoch": 9.66,
|
| 8664 |
+
"eval_loss": 0.23972494900226593,
|
| 8665 |
+
"eval_runtime": 2.0364,
|
| 8666 |
+
"eval_samples_per_second": 1127.958,
|
| 8667 |
+
"eval_steps_per_second": 17.678,
|
| 8668 |
+
"step": 433000
|
| 8669 |
+
},
|
| 8670 |
+
{
|
| 8671 |
+
"epoch": 9.67,
|
| 8672 |
+
"learning_rate": 2.3800077392427193e-05,
|
| 8673 |
+
"loss": 0.2559,
|
| 8674 |
+
"step": 433500
|
| 8675 |
+
},
|
| 8676 |
+
{
|
| 8677 |
+
"epoch": 9.68,
|
| 8678 |
+
"learning_rate": 2.3596631325008536e-05,
|
| 8679 |
+
"loss": 0.256,
|
| 8680 |
+
"step": 434000
|
| 8681 |
+
},
|
| 8682 |
+
{
|
| 8683 |
+
"epoch": 9.68,
|
| 8684 |
+
"eval_loss": 0.24173137545585632,
|
| 8685 |
+
"eval_runtime": 2.0453,
|
| 8686 |
+
"eval_samples_per_second": 1123.05,
|
| 8687 |
+
"eval_steps_per_second": 17.601,
|
| 8688 |
+
"step": 434000
|
| 8689 |
+
},
|
| 8690 |
+
{
|
| 8691 |
+
"epoch": 9.69,
|
| 8692 |
+
"learning_rate": 2.3394622263777042e-05,
|
| 8693 |
+
"loss": 0.2566,
|
| 8694 |
+
"step": 434500
|
| 8695 |
+
},
|
| 8696 |
+
{
|
| 8697 |
+
"epoch": 9.7,
|
| 8698 |
+
"learning_rate": 2.3194052417871433e-05,
|
| 8699 |
+
"loss": 0.2558,
|
| 8700 |
+
"step": 435000
|
| 8701 |
+
},
|
| 8702 |
+
{
|
| 8703 |
+
"epoch": 9.7,
|
| 8704 |
+
"eval_loss": 0.23999714851379395,
|
| 8705 |
+
"eval_runtime": 2.0454,
|
| 8706 |
+
"eval_samples_per_second": 1123.023,
|
| 8707 |
+
"eval_steps_per_second": 17.601,
|
| 8708 |
+
"step": 435000
|
| 8709 |
+
},
|
| 8710 |
+
{
|
| 8711 |
+
"epoch": 9.71,
|
| 8712 |
+
"learning_rate": 2.2994923980691425e-05,
|
| 8713 |
+
"loss": 0.2556,
|
| 8714 |
+
"step": 435500
|
| 8715 |
+
},
|
| 8716 |
+
{
|
| 8717 |
+
"epoch": 9.73,
|
| 8718 |
+
"learning_rate": 2.279723912987365e-05,
|
| 8719 |
+
"loss": 0.2552,
|
| 8720 |
+
"step": 436000
|
| 8721 |
+
},
|
| 8722 |
+
{
|
| 8723 |
+
"epoch": 9.73,
|
| 8724 |
+
"eval_loss": 0.23865634202957153,
|
| 8725 |
+
"eval_runtime": 2.0599,
|
| 8726 |
+
"eval_samples_per_second": 1115.091,
|
| 8727 |
+
"eval_steps_per_second": 17.476,
|
| 8728 |
+
"step": 436000
|
| 8729 |
+
},
|
| 8730 |
+
{
|
| 8731 |
+
"epoch": 9.74,
|
| 8732 |
+
"learning_rate": 2.2601000027268006e-05,
|
| 8733 |
+
"loss": 0.2555,
|
| 8734 |
+
"step": 436500
|
| 8735 |
+
},
|
| 8736 |
+
{
|
| 8737 |
+
"epoch": 9.75,
|
| 8738 |
+
"learning_rate": 2.2406208818913857e-05,
|
| 8739 |
+
"loss": 0.2556,
|
| 8740 |
+
"step": 437000
|
| 8741 |
+
},
|
| 8742 |
+
{
|
| 8743 |
+
"epoch": 9.75,
|
| 8744 |
+
"eval_loss": 0.2411411553621292,
|
| 8745 |
+
"eval_runtime": 2.0137,
|
| 8746 |
+
"eval_samples_per_second": 1140.659,
|
| 8747 |
+
"eval_steps_per_second": 17.877,
|
| 8748 |
+
"step": 437000
|
| 8749 |
+
},
|
| 8750 |
+
{
|
| 8751 |
+
"epoch": 9.76,
|
| 8752 |
+
"learning_rate": 2.221286763501666e-05,
|
| 8753 |
+
"loss": 0.2571,
|
| 8754 |
+
"step": 437500
|
| 8755 |
+
},
|
| 8756 |
+
{
|
| 8757 |
+
"epoch": 9.77,
|
| 8758 |
+
"learning_rate": 2.2020978589924673e-05,
|
| 8759 |
+
"loss": 0.258,
|
| 8760 |
+
"step": 438000
|
| 8761 |
+
},
|
| 8762 |
+
{
|
| 8763 |
+
"epoch": 9.77,
|
| 8764 |
+
"eval_loss": 0.24090658128261566,
|
| 8765 |
+
"eval_runtime": 2.0077,
|
| 8766 |
+
"eval_samples_per_second": 1144.123,
|
| 8767 |
+
"eval_steps_per_second": 17.931,
|
| 8768 |
+
"step": 438000
|
| 8769 |
+
},
|
| 8770 |
+
{
|
| 8771 |
+
"epoch": 9.78,
|
| 8772 |
+
"learning_rate": 2.1830543782105647e-05,
|
| 8773 |
+
"loss": 0.2566,
|
| 8774 |
+
"step": 438500
|
| 8775 |
+
},
|
| 8776 |
+
{
|
| 8777 |
+
"epoch": 9.79,
|
| 8778 |
+
"learning_rate": 2.1641565294124206e-05,
|
| 8779 |
+
"loss": 0.2565,
|
| 8780 |
+
"step": 439000
|
| 8781 |
+
},
|
| 8782 |
+
{
|
| 8783 |
+
"epoch": 9.79,
|
| 8784 |
+
"eval_loss": 0.23793531954288483,
|
| 8785 |
+
"eval_runtime": 1.9748,
|
| 8786 |
+
"eval_samples_per_second": 1163.127,
|
| 8787 |
+
"eval_steps_per_second": 18.229,
|
| 8788 |
+
"step": 439000
|
| 8789 |
+
},
|
| 8790 |
+
{
|
| 8791 |
+
"epoch": 9.8,
|
| 8792 |
+
"learning_rate": 2.1454045192618794e-05,
|
| 8793 |
+
"loss": 0.2564,
|
| 8794 |
+
"step": 439500
|
| 8795 |
+
},
|
| 8796 |
+
{
|
| 8797 |
+
"epoch": 9.81,
|
| 8798 |
+
"learning_rate": 2.1267985528279212e-05,
|
| 8799 |
+
"loss": 0.2569,
|
| 8800 |
+
"step": 440000
|
| 8801 |
+
},
|
| 8802 |
+
{
|
| 8803 |
+
"epoch": 9.81,
|
| 8804 |
+
"eval_loss": 0.23971830308437347,
|
| 8805 |
+
"eval_runtime": 1.9911,
|
| 8806 |
+
"eval_samples_per_second": 1153.611,
|
| 8807 |
+
"eval_steps_per_second": 18.08,
|
| 8808 |
+
"step": 440000
|
| 8809 |
}
|
| 8810 |
],
|
| 8811 |
"max_steps": 500000,
|
| 8812 |
"num_train_epochs": 12,
|
| 8813 |
+
"total_flos": 1.4057178017725262e+22,
|
| 8814 |
"trial_name": null,
|
| 8815 |
"trial_params": null
|
| 8816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c579869ee7eca1f3cfb28c1e8c5c1a2c4c07c47eb9cbbb5f2453c93bc559b64a
|
| 3 |
size 102501541
|