Training in progress, step 490000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cab54be7cb4d2572a34bddaadf5aa44b09e63a53da564cdebcbf1c0114515cb4
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cd7e757b48942dd6939c67ce3bb195396690b3f6c6d27ddc20a0b96e1fdb0e9
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90597af9402f8b4ddc86423edd1595c0f03275adba0e3f54a96b077337ac052
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 12.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9606,11 +9606,211 @@
|
|
| 9606 |
"eval_samples_per_second": 759.093,
|
| 9607 |
"eval_steps_per_second": 12.145,
|
| 9608 |
"step": 480000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9609 |
}
|
| 9610 |
],
|
| 9611 |
"max_steps": 500000,
|
| 9612 |
"num_train_epochs": 13,
|
| 9613 |
-
"total_flos": 1.
|
| 9614 |
"trial_name": null,
|
| 9615 |
"trial_params": null
|
| 9616 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 12.487894388093174,
|
| 5 |
+
"global_step": 490000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9606 |
"eval_samples_per_second": 759.093,
|
| 9607 |
"eval_steps_per_second": 12.145,
|
| 9608 |
"step": 480000
|
| 9609 |
+
},
|
| 9610 |
+
{
|
| 9611 |
+
"epoch": 12.25,
|
| 9612 |
+
"learning_rate": 1.1204252864868377e-05,
|
| 9613 |
+
"loss": 0.2669,
|
| 9614 |
+
"step": 480500
|
| 9615 |
+
},
|
| 9616 |
+
{
|
| 9617 |
+
"epoch": 12.26,
|
| 9618 |
+
"learning_rate": 1.1143368309400725e-05,
|
| 9619 |
+
"loss": 0.2672,
|
| 9620 |
+
"step": 481000
|
| 9621 |
+
},
|
| 9622 |
+
{
|
| 9623 |
+
"epoch": 12.26,
|
| 9624 |
+
"eval_loss": 0.8035285472869873,
|
| 9625 |
+
"eval_runtime": 1.597,
|
| 9626 |
+
"eval_samples_per_second": 626.161,
|
| 9627 |
+
"eval_steps_per_second": 10.019,
|
| 9628 |
+
"step": 481000
|
| 9629 |
+
},
|
| 9630 |
+
{
|
| 9631 |
+
"epoch": 12.27,
|
| 9632 |
+
"learning_rate": 1.1084056947009348e-05,
|
| 9633 |
+
"loss": 0.2671,
|
| 9634 |
+
"step": 481500
|
| 9635 |
+
},
|
| 9636 |
+
{
|
| 9637 |
+
"epoch": 12.28,
|
| 9638 |
+
"learning_rate": 1.1026319426313837e-05,
|
| 9639 |
+
"loss": 0.267,
|
| 9640 |
+
"step": 482000
|
| 9641 |
+
},
|
| 9642 |
+
{
|
| 9643 |
+
"epoch": 12.28,
|
| 9644 |
+
"eval_loss": 0.8039422035217285,
|
| 9645 |
+
"eval_runtime": 1.2756,
|
| 9646 |
+
"eval_samples_per_second": 783.963,
|
| 9647 |
+
"eval_steps_per_second": 12.543,
|
| 9648 |
+
"step": 482000
|
| 9649 |
+
},
|
| 9650 |
+
{
|
| 9651 |
+
"epoch": 12.3,
|
| 9652 |
+
"learning_rate": 1.097015637872247e-05,
|
| 9653 |
+
"loss": 0.2669,
|
| 9654 |
+
"step": 482500
|
| 9655 |
+
},
|
| 9656 |
+
{
|
| 9657 |
+
"epoch": 12.31,
|
| 9658 |
+
"learning_rate": 1.0915568418425301e-05,
|
| 9659 |
+
"loss": 0.2669,
|
| 9660 |
+
"step": 483000
|
| 9661 |
+
},
|
| 9662 |
+
{
|
| 9663 |
+
"epoch": 12.31,
|
| 9664 |
+
"eval_loss": 0.8074455857276917,
|
| 9665 |
+
"eval_runtime": 1.3487,
|
| 9666 |
+
"eval_samples_per_second": 741.458,
|
| 9667 |
+
"eval_steps_per_second": 11.863,
|
| 9668 |
+
"step": 483000
|
| 9669 |
+
},
|
| 9670 |
+
{
|
| 9671 |
+
"epoch": 12.32,
|
| 9672 |
+
"learning_rate": 1.0862556142387571e-05,
|
| 9673 |
+
"loss": 0.2673,
|
| 9674 |
+
"step": 483500
|
| 9675 |
+
},
|
| 9676 |
+
{
|
| 9677 |
+
"epoch": 12.33,
|
| 9678 |
+
"learning_rate": 1.081112013034298e-05,
|
| 9679 |
+
"loss": 0.267,
|
| 9680 |
+
"step": 484000
|
| 9681 |
+
},
|
| 9682 |
+
{
|
| 9683 |
+
"epoch": 12.33,
|
| 9684 |
+
"eval_loss": 0.8040180802345276,
|
| 9685 |
+
"eval_runtime": 1.3584,
|
| 9686 |
+
"eval_samples_per_second": 736.142,
|
| 9687 |
+
"eval_steps_per_second": 11.778,
|
| 9688 |
+
"step": 484000
|
| 9689 |
+
},
|
| 9690 |
+
{
|
| 9691 |
+
"epoch": 12.35,
|
| 9692 |
+
"learning_rate": 1.0761260944787561e-05,
|
| 9693 |
+
"loss": 0.267,
|
| 9694 |
+
"step": 484500
|
| 9695 |
+
},
|
| 9696 |
+
{
|
| 9697 |
+
"epoch": 12.36,
|
| 9698 |
+
"learning_rate": 1.0712979130973347e-05,
|
| 9699 |
+
"loss": 0.267,
|
| 9700 |
+
"step": 485000
|
| 9701 |
+
},
|
| 9702 |
+
{
|
| 9703 |
+
"epoch": 12.36,
|
| 9704 |
+
"eval_loss": 0.8028098940849304,
|
| 9705 |
+
"eval_runtime": 1.3341,
|
| 9706 |
+
"eval_samples_per_second": 749.592,
|
| 9707 |
+
"eval_steps_per_second": 11.993,
|
| 9708 |
+
"step": 485000
|
| 9709 |
+
},
|
| 9710 |
+
{
|
| 9711 |
+
"epoch": 12.37,
|
| 9712 |
+
"learning_rate": 1.0666275216902535e-05,
|
| 9713 |
+
"loss": 0.2668,
|
| 9714 |
+
"step": 485500
|
| 9715 |
+
},
|
| 9716 |
+
{
|
| 9717 |
+
"epoch": 12.39,
|
| 9718 |
+
"learning_rate": 1.0621149713321656e-05,
|
| 9719 |
+
"loss": 0.2668,
|
| 9720 |
+
"step": 486000
|
| 9721 |
+
},
|
| 9722 |
+
{
|
| 9723 |
+
"epoch": 12.39,
|
| 9724 |
+
"eval_loss": 0.8054640889167786,
|
| 9725 |
+
"eval_runtime": 1.3289,
|
| 9726 |
+
"eval_samples_per_second": 752.527,
|
| 9727 |
+
"eval_steps_per_second": 12.04,
|
| 9728 |
+
"step": 486000
|
| 9729 |
+
},
|
| 9730 |
+
{
|
| 9731 |
+
"epoch": 12.4,
|
| 9732 |
+
"learning_rate": 1.0577603113715964e-05,
|
| 9733 |
+
"loss": 0.2669,
|
| 9734 |
+
"step": 486500
|
| 9735 |
+
},
|
| 9736 |
+
{
|
| 9737 |
+
"epoch": 12.41,
|
| 9738 |
+
"learning_rate": 1.0535635894304106e-05,
|
| 9739 |
+
"loss": 0.2669,
|
| 9740 |
+
"step": 487000
|
| 9741 |
+
},
|
| 9742 |
+
{
|
| 9743 |
+
"epoch": 12.41,
|
| 9744 |
+
"eval_loss": 0.8062050342559814,
|
| 9745 |
+
"eval_runtime": 1.3114,
|
| 9746 |
+
"eval_samples_per_second": 762.54,
|
| 9747 |
+
"eval_steps_per_second": 12.201,
|
| 9748 |
+
"step": 487000
|
| 9749 |
+
},
|
| 9750 |
+
{
|
| 9751 |
+
"epoch": 12.42,
|
| 9752 |
+
"learning_rate": 1.0495248514032875e-05,
|
| 9753 |
+
"loss": 0.2669,
|
| 9754 |
+
"step": 487500
|
| 9755 |
+
},
|
| 9756 |
+
{
|
| 9757 |
+
"epoch": 12.44,
|
| 9758 |
+
"learning_rate": 1.045644141457218e-05,
|
| 9759 |
+
"loss": 0.2669,
|
| 9760 |
+
"step": 488000
|
| 9761 |
+
},
|
| 9762 |
+
{
|
| 9763 |
+
"epoch": 12.44,
|
| 9764 |
+
"eval_loss": 0.8053330779075623,
|
| 9765 |
+
"eval_runtime": 1.3085,
|
| 9766 |
+
"eval_samples_per_second": 764.21,
|
| 9767 |
+
"eval_steps_per_second": 12.227,
|
| 9768 |
+
"step": 488000
|
| 9769 |
+
},
|
| 9770 |
+
{
|
| 9771 |
+
"epoch": 12.45,
|
| 9772 |
+
"learning_rate": 1.0419215020310254e-05,
|
| 9773 |
+
"loss": 0.2671,
|
| 9774 |
+
"step": 488500
|
| 9775 |
+
},
|
| 9776 |
+
{
|
| 9777 |
+
"epoch": 12.46,
|
| 9778 |
+
"learning_rate": 1.0383569738348988e-05,
|
| 9779 |
+
"loss": 0.267,
|
| 9780 |
+
"step": 489000
|
| 9781 |
+
},
|
| 9782 |
+
{
|
| 9783 |
+
"epoch": 12.46,
|
| 9784 |
+
"eval_loss": 0.8089292049407959,
|
| 9785 |
+
"eval_runtime": 1.3008,
|
| 9786 |
+
"eval_samples_per_second": 768.73,
|
| 9787 |
+
"eval_steps_per_second": 12.3,
|
| 9788 |
+
"step": 489000
|
| 9789 |
+
},
|
| 9790 |
+
{
|
| 9791 |
+
"epoch": 12.48,
|
| 9792 |
+
"learning_rate": 1.0349505958499436e-05,
|
| 9793 |
+
"loss": 0.2671,
|
| 9794 |
+
"step": 489500
|
| 9795 |
+
},
|
| 9796 |
+
{
|
| 9797 |
+
"epoch": 12.49,
|
| 9798 |
+
"learning_rate": 1.0317024053277693e-05,
|
| 9799 |
+
"loss": 0.267,
|
| 9800 |
+
"step": 490000
|
| 9801 |
+
},
|
| 9802 |
+
{
|
| 9803 |
+
"epoch": 12.49,
|
| 9804 |
+
"eval_loss": 0.8080971837043762,
|
| 9805 |
+
"eval_runtime": 1.3533,
|
| 9806 |
+
"eval_samples_per_second": 738.941,
|
| 9807 |
+
"eval_steps_per_second": 11.823,
|
| 9808 |
+
"step": 490000
|
| 9809 |
}
|
| 9810 |
],
|
| 9811 |
"max_steps": 500000,
|
| 9812 |
"num_train_epochs": 13,
|
| 9813 |
+
"total_flos": 1.565470805299396e+22,
|
| 9814 |
"trial_name": null,
|
| 9815 |
"trial_params": null
|
| 9816 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0cd7e757b48942dd6939c67ce3bb195396690b3f6c6d27ddc20a0b96e1fdb0e9
|
| 3 |
size 102501541
|