Training in progress, step 820000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb7fd5efb2ff87d094795926832b73ee7aa1e6614b8bfcc85208c1fac61c542d
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dd2e6a7da537ff760743918bcaefe9c986fc2c76a2e5c7636f4801ac5b5cd7b
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0609e713a668aa906084c6d7021e86eb2ccb60af4342d1b4397b833b19fbbc89
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3592882666e2655598af900b2cb26f9f1ba0f86a99376f61e0440aabceaa6a07
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ca8c6c74101f99bae3f401fe7213e04d3ffd2cde1efe24238143ecf20918b53
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a67149e6f2c803c5510419511ac904f9537eb4fbff5066e666ed08b44f9664f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e59f03ce343bc2279a4caef20772b97445f7cad5f56f03c6fe562ef9297aba5
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49b75ef2c8e55abd504706b58ef3e0a8e29500c910d22cedece78b4bbdd0c43a
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a3e48ab19d013994e76ce74871eebcf792a7f0de0b1bc5e31a34b55c3911660
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83f313ad3307a61df4799cc55bebfd321f79a2d67e43e4af68c3ea5f2e8a2721
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:735207f6708877a90cb07d7ba5bda4e331561a0f2cd24f0dff22c21fa9a1d464
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -9726,11 +9726,131 @@
|
|
| 9726 |
"learning_rate": 2.3368810393753687e-05,
|
| 9727 |
"loss": 0.2895,
|
| 9728 |
"step": 810000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9729 |
}
|
| 9730 |
],
|
| 9731 |
"max_steps": 1000000,
|
| 9732 |
"num_train_epochs": 2,
|
| 9733 |
-
"total_flos": 5.
|
| 9734 |
"trial_name": null,
|
| 9735 |
"trial_params": null
|
| 9736 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.1793671925447022,
|
| 5 |
+
"global_step": 820000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 9726 |
"learning_rate": 2.3368810393753687e-05,
|
| 9727 |
"loss": 0.2895,
|
| 9728 |
"step": 810000
|
| 9729 |
+
},
|
| 9730 |
+
{
|
| 9731 |
+
"epoch": 1.16,
|
| 9732 |
+
"learning_rate": 2.3300855876332162e-05,
|
| 9733 |
+
"loss": 0.2894,
|
| 9734 |
+
"step": 810500
|
| 9735 |
+
},
|
| 9736 |
+
{
|
| 9737 |
+
"epoch": 1.16,
|
| 9738 |
+
"learning_rate": 2.32330563722056e-05,
|
| 9739 |
+
"loss": 0.2896,
|
| 9740 |
+
"step": 811000
|
| 9741 |
+
},
|
| 9742 |
+
{
|
| 9743 |
+
"epoch": 1.16,
|
| 9744 |
+
"learning_rate": 2.316541206673529e-05,
|
| 9745 |
+
"loss": 0.2896,
|
| 9746 |
+
"step": 811500
|
| 9747 |
+
},
|
| 9748 |
+
{
|
| 9749 |
+
"epoch": 1.16,
|
| 9750 |
+
"learning_rate": 2.309792314485815e-05,
|
| 9751 |
+
"loss": 0.2895,
|
| 9752 |
+
"step": 812000
|
| 9753 |
+
},
|
| 9754 |
+
{
|
| 9755 |
+
"epoch": 1.16,
|
| 9756 |
+
"learning_rate": 2.3030589791086353e-05,
|
| 9757 |
+
"loss": 0.2893,
|
| 9758 |
+
"step": 812500
|
| 9759 |
+
},
|
| 9760 |
+
{
|
| 9761 |
+
"epoch": 1.17,
|
| 9762 |
+
"learning_rate": 2.2963412189506695e-05,
|
| 9763 |
+
"loss": 0.2893,
|
| 9764 |
+
"step": 813000
|
| 9765 |
+
},
|
| 9766 |
+
{
|
| 9767 |
+
"epoch": 1.17,
|
| 9768 |
+
"learning_rate": 2.2896390523780156e-05,
|
| 9769 |
+
"loss": 0.2894,
|
| 9770 |
+
"step": 813500
|
| 9771 |
+
},
|
| 9772 |
+
{
|
| 9773 |
+
"epoch": 1.17,
|
| 9774 |
+
"learning_rate": 2.282952497714145e-05,
|
| 9775 |
+
"loss": 0.2894,
|
| 9776 |
+
"step": 814000
|
| 9777 |
+
},
|
| 9778 |
+
{
|
| 9779 |
+
"epoch": 1.17,
|
| 9780 |
+
"learning_rate": 2.2762815732398387e-05,
|
| 9781 |
+
"loss": 0.2896,
|
| 9782 |
+
"step": 814500
|
| 9783 |
+
},
|
| 9784 |
+
{
|
| 9785 |
+
"epoch": 1.17,
|
| 9786 |
+
"learning_rate": 2.2696262971931538e-05,
|
| 9787 |
+
"loss": 0.2891,
|
| 9788 |
+
"step": 815000
|
| 9789 |
+
},
|
| 9790 |
+
{
|
| 9791 |
+
"epoch": 1.17,
|
| 9792 |
+
"learning_rate": 2.2629866877693577e-05,
|
| 9793 |
+
"loss": 0.2892,
|
| 9794 |
+
"step": 815500
|
| 9795 |
+
},
|
| 9796 |
+
{
|
| 9797 |
+
"epoch": 1.17,
|
| 9798 |
+
"learning_rate": 2.2563627631208887e-05,
|
| 9799 |
+
"loss": 0.2892,
|
| 9800 |
+
"step": 816000
|
| 9801 |
+
},
|
| 9802 |
+
{
|
| 9803 |
+
"epoch": 1.17,
|
| 9804 |
+
"learning_rate": 2.2497545413573065e-05,
|
| 9805 |
+
"loss": 0.2902,
|
| 9806 |
+
"step": 816500
|
| 9807 |
+
},
|
| 9808 |
+
{
|
| 9809 |
+
"epoch": 1.17,
|
| 9810 |
+
"learning_rate": 2.2431620405452336e-05,
|
| 9811 |
+
"loss": 0.2889,
|
| 9812 |
+
"step": 817000
|
| 9813 |
+
},
|
| 9814 |
+
{
|
| 9815 |
+
"epoch": 1.17,
|
| 9816 |
+
"learning_rate": 2.23658527870832e-05,
|
| 9817 |
+
"loss": 0.2892,
|
| 9818 |
+
"step": 817500
|
| 9819 |
+
},
|
| 9820 |
+
{
|
| 9821 |
+
"epoch": 1.18,
|
| 9822 |
+
"learning_rate": 2.230024273827179e-05,
|
| 9823 |
+
"loss": 0.2885,
|
| 9824 |
+
"step": 818000
|
| 9825 |
+
},
|
| 9826 |
+
{
|
| 9827 |
+
"epoch": 1.18,
|
| 9828 |
+
"learning_rate": 2.223479043839345e-05,
|
| 9829 |
+
"loss": 0.2888,
|
| 9830 |
+
"step": 818500
|
| 9831 |
+
},
|
| 9832 |
+
{
|
| 9833 |
+
"epoch": 1.18,
|
| 9834 |
+
"learning_rate": 2.216949606639231e-05,
|
| 9835 |
+
"loss": 0.2892,
|
| 9836 |
+
"step": 819000
|
| 9837 |
+
},
|
| 9838 |
+
{
|
| 9839 |
+
"epoch": 1.18,
|
| 9840 |
+
"learning_rate": 2.2104359800780665e-05,
|
| 9841 |
+
"loss": 0.2885,
|
| 9842 |
+
"step": 819500
|
| 9843 |
+
},
|
| 9844 |
+
{
|
| 9845 |
+
"epoch": 1.18,
|
| 9846 |
+
"learning_rate": 2.2039381819638596e-05,
|
| 9847 |
+
"loss": 0.2894,
|
| 9848 |
+
"step": 820000
|
| 9849 |
}
|
| 9850 |
],
|
| 9851 |
"max_steps": 1000000,
|
| 9852 |
"num_train_epochs": 2,
|
| 9853 |
+
"total_flos": 5.543772678797676e+22,
|
| 9854 |
"trial_name": null,
|
| 9855 |
"trial_params": null
|
| 9856 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dd2e6a7da537ff760743918bcaefe9c986fc2c76a2e5c7636f4801ac5b5cd7b
|
| 3 |
size 449450757
|