Training in progress, step 490000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2e2116038f17a755ee0cd7f714c11f53fb07e5f62178c545ddce403c568ffbe
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6673bee6889f96f4d0585736b0bbd0104ce06075881649e694ae573ad1d2887a
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:644e2fc163f368345d150d3fc83a57447c36fc56a8c5b1ac9505e0d54bf78bd0
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a9725305d551fdabba33dd56f81db3b2c581f84aafc700effd4e6475b7ec812
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cbe151312b54baa9a4e64116f572138b7b71dec85430cd296cb3640e4c2da8a4
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:368b6ac98a14548e894c6dec52e2926741be3331b9c05acbb8210a7533733def
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb22d9d447646758c04766cf11ca8c2e8fc19a36cd697a9779fc029a142baac3
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:978268c9f7cbaa0bf6afcfb4a36b2945ad3b7df5c70456c1598647d77f700d81
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea73efb8fdbffbda3c38be20fe382b5f7cbbd8a80c55ee21bf5ba148273fa6a6
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:363c6a12aae6b2a1c1924495d50178e5b2b9c07c6657605723562b286ea4de81
|
| 3 |
+
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9606,11 +9606,211 @@
|
|
| 9606 |
"eval_samples_per_second": 1946.296,
|
| 9607 |
"eval_steps_per_second": 31.141,
|
| 9608 |
"step": 480000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9609 |
}
|
| 9610 |
],
|
| 9611 |
"max_steps": 500000,
|
| 9612 |
"num_train_epochs": 16,
|
| 9613 |
-
"total_flos": 1.
|
| 9614 |
"trial_name": null,
|
| 9615 |
"trial_params": null
|
| 9616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 15.012714850332424,
|
| 5 |
+
"global_step": 490000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9606 |
"eval_samples_per_second": 1946.296,
|
| 9607 |
"eval_steps_per_second": 31.141,
|
| 9608 |
"step": 480000
|
| 9609 |
+
},
|
| 9610 |
+
{
|
| 9611 |
+
"epoch": 14.72,
|
| 9612 |
+
"learning_rate": 1.1204252864868377e-05,
|
| 9613 |
+
"loss": 0.3127,
|
| 9614 |
+
"step": 480500
|
| 9615 |
+
},
|
| 9616 |
+
{
|
| 9617 |
+
"epoch": 14.74,
|
| 9618 |
+
"learning_rate": 1.1143368309400725e-05,
|
| 9619 |
+
"loss": 0.3125,
|
| 9620 |
+
"step": 481000
|
| 9621 |
+
},
|
| 9622 |
+
{
|
| 9623 |
+
"epoch": 14.74,
|
| 9624 |
+
"eval_loss": 0.7771226763725281,
|
| 9625 |
+
"eval_runtime": 0.5065,
|
| 9626 |
+
"eval_samples_per_second": 1974.24,
|
| 9627 |
+
"eval_steps_per_second": 31.588,
|
| 9628 |
+
"step": 481000
|
| 9629 |
+
},
|
| 9630 |
+
{
|
| 9631 |
+
"epoch": 14.75,
|
| 9632 |
+
"learning_rate": 1.1084056947009348e-05,
|
| 9633 |
+
"loss": 0.3125,
|
| 9634 |
+
"step": 481500
|
| 9635 |
+
},
|
| 9636 |
+
{
|
| 9637 |
+
"epoch": 14.77,
|
| 9638 |
+
"learning_rate": 1.1026319426313837e-05,
|
| 9639 |
+
"loss": 0.3124,
|
| 9640 |
+
"step": 482000
|
| 9641 |
+
},
|
| 9642 |
+
{
|
| 9643 |
+
"epoch": 14.77,
|
| 9644 |
+
"eval_loss": 0.7745999693870544,
|
| 9645 |
+
"eval_runtime": 0.4972,
|
| 9646 |
+
"eval_samples_per_second": 2011.082,
|
| 9647 |
+
"eval_steps_per_second": 32.177,
|
| 9648 |
+
"step": 482000
|
| 9649 |
+
},
|
| 9650 |
+
{
|
| 9651 |
+
"epoch": 14.78,
|
| 9652 |
+
"learning_rate": 1.097015637872247e-05,
|
| 9653 |
+
"loss": 0.3126,
|
| 9654 |
+
"step": 482500
|
| 9655 |
+
},
|
| 9656 |
+
{
|
| 9657 |
+
"epoch": 14.8,
|
| 9658 |
+
"learning_rate": 1.0915568418425301e-05,
|
| 9659 |
+
"loss": 0.3125,
|
| 9660 |
+
"step": 483000
|
| 9661 |
+
},
|
| 9662 |
+
{
|
| 9663 |
+
"epoch": 14.8,
|
| 9664 |
+
"eval_loss": 0.7761328220367432,
|
| 9665 |
+
"eval_runtime": 0.5155,
|
| 9666 |
+
"eval_samples_per_second": 1939.967,
|
| 9667 |
+
"eval_steps_per_second": 31.039,
|
| 9668 |
+
"step": 483000
|
| 9669 |
+
},
|
| 9670 |
+
{
|
| 9671 |
+
"epoch": 14.81,
|
| 9672 |
+
"learning_rate": 1.0862556142387571e-05,
|
| 9673 |
+
"loss": 0.3126,
|
| 9674 |
+
"step": 483500
|
| 9675 |
+
},
|
| 9676 |
+
{
|
| 9677 |
+
"epoch": 14.83,
|
| 9678 |
+
"learning_rate": 1.081112013034298e-05,
|
| 9679 |
+
"loss": 0.3127,
|
| 9680 |
+
"step": 484000
|
| 9681 |
+
},
|
| 9682 |
+
{
|
| 9683 |
+
"epoch": 14.83,
|
| 9684 |
+
"eval_loss": 0.7775337100028992,
|
| 9685 |
+
"eval_runtime": 0.5093,
|
| 9686 |
+
"eval_samples_per_second": 1963.507,
|
| 9687 |
+
"eval_steps_per_second": 31.416,
|
| 9688 |
+
"step": 484000
|
| 9689 |
+
},
|
| 9690 |
+
{
|
| 9691 |
+
"epoch": 14.84,
|
| 9692 |
+
"learning_rate": 1.0761260944787561e-05,
|
| 9693 |
+
"loss": 0.313,
|
| 9694 |
+
"step": 484500
|
| 9695 |
+
},
|
| 9696 |
+
{
|
| 9697 |
+
"epoch": 14.86,
|
| 9698 |
+
"learning_rate": 1.0712979130973347e-05,
|
| 9699 |
+
"loss": 0.3126,
|
| 9700 |
+
"step": 485000
|
| 9701 |
+
},
|
| 9702 |
+
{
|
| 9703 |
+
"epoch": 14.86,
|
| 9704 |
+
"eval_loss": 0.7740907073020935,
|
| 9705 |
+
"eval_runtime": 0.5188,
|
| 9706 |
+
"eval_samples_per_second": 1927.581,
|
| 9707 |
+
"eval_steps_per_second": 30.841,
|
| 9708 |
+
"step": 485000
|
| 9709 |
+
},
|
| 9710 |
+
{
|
| 9711 |
+
"epoch": 14.87,
|
| 9712 |
+
"learning_rate": 1.0666275216902535e-05,
|
| 9713 |
+
"loss": 0.3125,
|
| 9714 |
+
"step": 485500
|
| 9715 |
+
},
|
| 9716 |
+
{
|
| 9717 |
+
"epoch": 14.89,
|
| 9718 |
+
"learning_rate": 1.0621149713321656e-05,
|
| 9719 |
+
"loss": 0.3128,
|
| 9720 |
+
"step": 486000
|
| 9721 |
+
},
|
| 9722 |
+
{
|
| 9723 |
+
"epoch": 14.89,
|
| 9724 |
+
"eval_loss": 0.7765258550643921,
|
| 9725 |
+
"eval_runtime": 0.52,
|
| 9726 |
+
"eval_samples_per_second": 1923.021,
|
| 9727 |
+
"eval_steps_per_second": 30.768,
|
| 9728 |
+
"step": 486000
|
| 9729 |
+
},
|
| 9730 |
+
{
|
| 9731 |
+
"epoch": 14.91,
|
| 9732 |
+
"learning_rate": 1.0577603113715964e-05,
|
| 9733 |
+
"loss": 0.3125,
|
| 9734 |
+
"step": 486500
|
| 9735 |
+
},
|
| 9736 |
+
{
|
| 9737 |
+
"epoch": 14.92,
|
| 9738 |
+
"learning_rate": 1.0535635894304106e-05,
|
| 9739 |
+
"loss": 0.3126,
|
| 9740 |
+
"step": 487000
|
| 9741 |
+
},
|
| 9742 |
+
{
|
| 9743 |
+
"epoch": 14.92,
|
| 9744 |
+
"eval_loss": 0.7741805911064148,
|
| 9745 |
+
"eval_runtime": 0.5052,
|
| 9746 |
+
"eval_samples_per_second": 1979.424,
|
| 9747 |
+
"eval_steps_per_second": 31.671,
|
| 9748 |
+
"step": 487000
|
| 9749 |
+
},
|
| 9750 |
+
{
|
| 9751 |
+
"epoch": 14.94,
|
| 9752 |
+
"learning_rate": 1.0495248514032875e-05,
|
| 9753 |
+
"loss": 0.3123,
|
| 9754 |
+
"step": 487500
|
| 9755 |
+
},
|
| 9756 |
+
{
|
| 9757 |
+
"epoch": 14.95,
|
| 9758 |
+
"learning_rate": 1.045644141457218e-05,
|
| 9759 |
+
"loss": 0.3126,
|
| 9760 |
+
"step": 488000
|
| 9761 |
+
},
|
| 9762 |
+
{
|
| 9763 |
+
"epoch": 14.95,
|
| 9764 |
+
"eval_loss": 0.7744404077529907,
|
| 9765 |
+
"eval_runtime": 0.5273,
|
| 9766 |
+
"eval_samples_per_second": 1896.451,
|
| 9767 |
+
"eval_steps_per_second": 30.343,
|
| 9768 |
+
"step": 488000
|
| 9769 |
+
},
|
| 9770 |
+
{
|
| 9771 |
+
"epoch": 14.97,
|
| 9772 |
+
"learning_rate": 1.0419215020310254e-05,
|
| 9773 |
+
"loss": 0.3123,
|
| 9774 |
+
"step": 488500
|
| 9775 |
+
},
|
| 9776 |
+
{
|
| 9777 |
+
"epoch": 14.98,
|
| 9778 |
+
"learning_rate": 1.0383569738348988e-05,
|
| 9779 |
+
"loss": 0.3125,
|
| 9780 |
+
"step": 489000
|
| 9781 |
+
},
|
| 9782 |
+
{
|
| 9783 |
+
"epoch": 14.98,
|
| 9784 |
+
"eval_loss": 0.7724484205245972,
|
| 9785 |
+
"eval_runtime": 0.5078,
|
| 9786 |
+
"eval_samples_per_second": 1969.112,
|
| 9787 |
+
"eval_steps_per_second": 31.506,
|
| 9788 |
+
"step": 489000
|
| 9789 |
+
},
|
| 9790 |
+
{
|
| 9791 |
+
"epoch": 15.0,
|
| 9792 |
+
"learning_rate": 1.0349505958499436e-05,
|
| 9793 |
+
"loss": 0.3126,
|
| 9794 |
+
"step": 489500
|
| 9795 |
+
},
|
| 9796 |
+
{
|
| 9797 |
+
"epoch": 15.01,
|
| 9798 |
+
"learning_rate": 1.0317024053277693e-05,
|
| 9799 |
+
"loss": 0.3124,
|
| 9800 |
+
"step": 490000
|
| 9801 |
+
},
|
| 9802 |
+
{
|
| 9803 |
+
"epoch": 15.01,
|
| 9804 |
+
"eval_loss": 0.7753793001174927,
|
| 9805 |
+
"eval_runtime": 0.498,
|
| 9806 |
+
"eval_samples_per_second": 2008.198,
|
| 9807 |
+
"eval_steps_per_second": 32.131,
|
| 9808 |
+
"step": 490000
|
| 9809 |
}
|
| 9810 |
],
|
| 9811 |
"max_steps": 500000,
|
| 9812 |
"num_train_epochs": 16,
|
| 9813 |
+
"total_flos": 1.5654749980670862e+22,
|
| 9814 |
"trial_name": null,
|
| 9815 |
"trial_params": null
|
| 9816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6673bee6889f96f4d0585736b0bbd0104ce06075881649e694ae573ad1d2887a
|
| 3 |
size 102501541
|