Training in progress, step 450000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 202194449
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7cf25044f894af33b58ee33e839ca67a9010ce216694a7f4c91e8f90caf02e3
|
| 3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f48e297dd8d43fe26955c08bdc18374b43b5c5c7bf58df74b63ccfce891d130
|
| 3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d0eaf38f8d76dc97fc60763011f1de34c7a2cb3c95faaa0610cc0f4af72cd60
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1aee1e0607f38b87a3b735ae98b8e01339f7cc72ffa6ccf3c213d28824ed54a9
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cceb842179d613224b3b5f8d750e75368fc012474b9befae3962586a3fa07c34
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e777f0df33e0c44b8c16c09cacb56ca419e02a262aa3b9ece5534f0249ba6105
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44ab8fd99918dee712abc63025e4d8d70437de212ab6324f5ae0cbe74ed24f94
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef227953f09853ae7340813887aac1a30150643cccf7844f37d1f0ff5cb9042d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7caa6c821a93c7fa5649d2fba3a2ebec3c3cd1a1620660f06157dc5569333b5a
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee0e57d5d9717a7b27c7acd3b2ffc6fc1518aa4d2ebf016d3b2d036634f60df0
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 13.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -8806,11 +8806,211 @@
|
|
| 8806 |
"eval_samples_per_second": 1931.767,
|
| 8807 |
"eval_steps_per_second": 30.908,
|
| 8808 |
"step": 440000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8809 |
}
|
| 8810 |
],
|
| 8811 |
"max_steps": 500000,
|
| 8812 |
"num_train_epochs": 16,
|
| 8813 |
-
"total_flos": 1.
|
| 8814 |
"trial_name": null,
|
| 8815 |
"trial_params": null
|
| 8816 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 13.787187107448144,
|
| 5 |
+
"global_step": 450000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 8806 |
"eval_samples_per_second": 1931.767,
|
| 8807 |
"eval_steps_per_second": 30.908,
|
| 8808 |
"step": 440000
|
| 8809 |
+
},
|
| 8810 |
+
{
|
| 8811 |
+
"epoch": 13.5,
|
| 8812 |
+
"learning_rate": 2.1083388335824145e-05,
|
| 8813 |
+
"loss": 0.315,
|
| 8814 |
+
"step": 440500
|
| 8815 |
+
},
|
| 8816 |
+
{
|
| 8817 |
+
"epoch": 13.51,
|
| 8818 |
+
"learning_rate": 2.0900255633978873e-05,
|
| 8819 |
+
"loss": 0.3147,
|
| 8820 |
+
"step": 441000
|
| 8821 |
+
},
|
| 8822 |
+
{
|
| 8823 |
+
"epoch": 13.51,
|
| 8824 |
+
"eval_loss": 0.7775884866714478,
|
| 8825 |
+
"eval_runtime": 0.5109,
|
| 8826 |
+
"eval_samples_per_second": 1957.402,
|
| 8827 |
+
"eval_steps_per_second": 31.318,
|
| 8828 |
+
"step": 441000
|
| 8829 |
+
},
|
| 8830 |
+
{
|
| 8831 |
+
"epoch": 13.53,
|
| 8832 |
+
"learning_rate": 2.0718589425453314e-05,
|
| 8833 |
+
"loss": 0.3145,
|
| 8834 |
+
"step": 441500
|
| 8835 |
+
},
|
| 8836 |
+
{
|
| 8837 |
+
"epoch": 13.54,
|
| 8838 |
+
"learning_rate": 2.0538391696920015e-05,
|
| 8839 |
+
"loss": 0.3145,
|
| 8840 |
+
"step": 442000
|
| 8841 |
+
},
|
| 8842 |
+
{
|
| 8843 |
+
"epoch": 13.54,
|
| 8844 |
+
"eval_loss": 0.7738910913467407,
|
| 8845 |
+
"eval_runtime": 0.5137,
|
| 8846 |
+
"eval_samples_per_second": 1946.745,
|
| 8847 |
+
"eval_steps_per_second": 31.148,
|
| 8848 |
+
"step": 442000
|
| 8849 |
+
},
|
| 8850 |
+
{
|
| 8851 |
+
"epoch": 13.56,
|
| 8852 |
+
"learning_rate": 2.035966441899249e-05,
|
| 8853 |
+
"loss": 0.3148,
|
| 8854 |
+
"step": 442500
|
| 8855 |
+
},
|
| 8856 |
+
{
|
| 8857 |
+
"epoch": 13.57,
|
| 8858 |
+
"learning_rate": 2.0182409546203555e-05,
|
| 8859 |
+
"loss": 0.3145,
|
| 8860 |
+
"step": 443000
|
| 8861 |
+
},
|
| 8862 |
+
{
|
| 8863 |
+
"epoch": 13.57,
|
| 8864 |
+
"eval_loss": 0.7752822041511536,
|
| 8865 |
+
"eval_runtime": 0.5041,
|
| 8866 |
+
"eval_samples_per_second": 1983.895,
|
| 8867 |
+
"eval_steps_per_second": 31.742,
|
| 8868 |
+
"step": 443000
|
| 8869 |
+
},
|
| 8870 |
+
{
|
| 8871 |
+
"epoch": 13.59,
|
| 8872 |
+
"learning_rate": 2.000662901698415e-05,
|
| 8873 |
+
"loss": 0.3143,
|
| 8874 |
+
"step": 443500
|
| 8875 |
+
},
|
| 8876 |
+
{
|
| 8877 |
+
"epoch": 13.6,
|
| 8878 |
+
"learning_rate": 1.983232475364195e-05,
|
| 8879 |
+
"loss": 0.3145,
|
| 8880 |
+
"step": 444000
|
| 8881 |
+
},
|
| 8882 |
+
{
|
| 8883 |
+
"epoch": 13.6,
|
| 8884 |
+
"eval_loss": 0.7770563960075378,
|
| 8885 |
+
"eval_runtime": 0.5102,
|
| 8886 |
+
"eval_samples_per_second": 1959.954,
|
| 8887 |
+
"eval_steps_per_second": 31.359,
|
| 8888 |
+
"step": 444000
|
| 8889 |
+
},
|
| 8890 |
+
{
|
| 8891 |
+
"epoch": 13.62,
|
| 8892 |
+
"learning_rate": 1.9659498662340474e-05,
|
| 8893 |
+
"loss": 0.3144,
|
| 8894 |
+
"step": 444500
|
| 8895 |
+
},
|
| 8896 |
+
{
|
| 8897 |
+
"epoch": 13.63,
|
| 8898 |
+
"learning_rate": 1.948815263307819e-05,
|
| 8899 |
+
"loss": 0.3146,
|
| 8900 |
+
"step": 445000
|
| 8901 |
+
},
|
| 8902 |
+
{
|
| 8903 |
+
"epoch": 13.63,
|
| 8904 |
+
"eval_loss": 0.7755433917045593,
|
| 8905 |
+
"eval_runtime": 0.5167,
|
| 8906 |
+
"eval_samples_per_second": 1935.403,
|
| 8907 |
+
"eval_steps_per_second": 30.966,
|
| 8908 |
+
"step": 445000
|
| 8909 |
+
},
|
| 8910 |
+
{
|
| 8911 |
+
"epoch": 13.65,
|
| 8912 |
+
"learning_rate": 1.9318288539667765e-05,
|
| 8913 |
+
"loss": 0.3144,
|
| 8914 |
+
"step": 445500
|
| 8915 |
+
},
|
| 8916 |
+
{
|
| 8917 |
+
"epoch": 13.66,
|
| 8918 |
+
"learning_rate": 1.914990823971574e-05,
|
| 8919 |
+
"loss": 0.3144,
|
| 8920 |
+
"step": 446000
|
| 8921 |
+
},
|
| 8922 |
+
{
|
| 8923 |
+
"epoch": 13.66,
|
| 8924 |
+
"eval_loss": 0.7735174298286438,
|
| 8925 |
+
"eval_runtime": 0.5138,
|
| 8926 |
+
"eval_samples_per_second": 1946.362,
|
| 8927 |
+
"eval_steps_per_second": 31.142,
|
| 8928 |
+
"step": 446000
|
| 8929 |
+
},
|
| 8930 |
+
{
|
| 8931 |
+
"epoch": 13.68,
|
| 8932 |
+
"learning_rate": 1.8983013574602096e-05,
|
| 8933 |
+
"loss": 0.314,
|
| 8934 |
+
"step": 446500
|
| 8935 |
+
},
|
| 8936 |
+
{
|
| 8937 |
+
"epoch": 13.7,
|
| 8938 |
+
"learning_rate": 1.8817606369460156e-05,
|
| 8939 |
+
"loss": 0.3143,
|
| 8940 |
+
"step": 447000
|
| 8941 |
+
},
|
| 8942 |
+
{
|
| 8943 |
+
"epoch": 13.7,
|
| 8944 |
+
"eval_loss": 0.7771323323249817,
|
| 8945 |
+
"eval_runtime": 0.5128,
|
| 8946 |
+
"eval_samples_per_second": 1950.249,
|
| 8947 |
+
"eval_steps_per_second": 31.204,
|
| 8948 |
+
"step": 447000
|
| 8949 |
+
},
|
| 8950 |
+
{
|
| 8951 |
+
"epoch": 13.71,
|
| 8952 |
+
"learning_rate": 1.865368843315663e-05,
|
| 8953 |
+
"loss": 0.3147,
|
| 8954 |
+
"step": 447500
|
| 8955 |
+
},
|
| 8956 |
+
{
|
| 8957 |
+
"epoch": 13.73,
|
| 8958 |
+
"learning_rate": 1.8491261558271762e-05,
|
| 8959 |
+
"loss": 0.314,
|
| 8960 |
+
"step": 448000
|
| 8961 |
+
},
|
| 8962 |
+
{
|
| 8963 |
+
"epoch": 13.73,
|
| 8964 |
+
"eval_loss": 0.7759497165679932,
|
| 8965 |
+
"eval_runtime": 0.5133,
|
| 8966 |
+
"eval_samples_per_second": 1948.013,
|
| 8967 |
+
"eval_steps_per_second": 31.168,
|
| 8968 |
+
"step": 448000
|
| 8969 |
+
},
|
| 8970 |
+
{
|
| 8971 |
+
"epoch": 13.74,
|
| 8972 |
+
"learning_rate": 1.833032752107986e-05,
|
| 8973 |
+
"loss": 0.3143,
|
| 8974 |
+
"step": 448500
|
| 8975 |
+
},
|
| 8976 |
+
{
|
| 8977 |
+
"epoch": 13.76,
|
| 8978 |
+
"learning_rate": 1.817088808152978e-05,
|
| 8979 |
+
"loss": 0.3143,
|
| 8980 |
+
"step": 449000
|
| 8981 |
+
},
|
| 8982 |
+
{
|
| 8983 |
+
"epoch": 13.76,
|
| 8984 |
+
"eval_loss": 0.7774102687835693,
|
| 8985 |
+
"eval_runtime": 0.5028,
|
| 8986 |
+
"eval_samples_per_second": 1988.858,
|
| 8987 |
+
"eval_steps_per_second": 31.822,
|
| 8988 |
+
"step": 449000
|
| 8989 |
+
},
|
| 8990 |
+
{
|
| 8991 |
+
"epoch": 13.77,
|
| 8992 |
+
"learning_rate": 1.801294498322569e-05,
|
| 8993 |
+
"loss": 0.3141,
|
| 8994 |
+
"step": 449500
|
| 8995 |
+
},
|
| 8996 |
+
{
|
| 8997 |
+
"epoch": 13.79,
|
| 8998 |
+
"learning_rate": 1.7856499953407978e-05,
|
| 8999 |
+
"loss": 0.3142,
|
| 9000 |
+
"step": 450000
|
| 9001 |
+
},
|
| 9002 |
+
{
|
| 9003 |
+
"epoch": 13.79,
|
| 9004 |
+
"eval_loss": 0.7757880687713623,
|
| 9005 |
+
"eval_runtime": 0.5025,
|
| 9006 |
+
"eval_samples_per_second": 1989.909,
|
| 9007 |
+
"eval_steps_per_second": 31.839,
|
| 9008 |
+
"step": 450000
|
| 9009 |
}
|
| 9010 |
],
|
| 9011 |
"max_steps": 500000,
|
| 9012 |
"num_train_epochs": 16,
|
| 9013 |
+
"total_flos": 1.4376817401476814e+22,
|
| 9014 |
"trial_name": null,
|
| 9015 |
"trial_params": null
|
| 9016 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 102501541
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f48e297dd8d43fe26955c08bdc18374b43b5c5c7bf58df74b63ccfce891d130
|
| 3 |
size 102501541
|