Training in progress, step 850000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e51d88aff5257549a283e52c14c7816f109ed1c8f1cd4c0209be1013bd750037
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c2b61d52c517c0fe4a2eaad25ea86eca5fed6ebf386f54b8df5e3364654d10f
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3998272c6f0774d9c932f650afe844739a9faa8bc3cb7003f4f32bc8a833e115
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fbbdae9c95471a40e6c6d019353b081fa6055bc839ed4f2163c0c1b80837934
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -16806,11 +16806,211 @@
|
|
| 16806 |
"eval_samples_per_second": 870.984,
|
| 16807 |
"eval_steps_per_second": 13.651,
|
| 16808 |
"step": 840000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16809 |
}
|
| 16810 |
],
|
| 16811 |
"max_steps": 1000000,
|
| 16812 |
"num_train_epochs": 12,
|
| 16813 |
-
"total_flos": 5.
|
| 16814 |
"trial_name": null,
|
| 16815 |
"trial_params": null
|
| 16816 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.122804278241862,
|
| 5 |
+
"global_step": 850000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 16806 |
"eval_samples_per_second": 870.984,
|
| 16807 |
"eval_steps_per_second": 13.651,
|
| 16808 |
"step": 840000
|
| 16809 |
+
},
|
| 16810 |
+
{
|
| 16811 |
+
"epoch": 9.02,
|
| 16812 |
+
"learning_rate": 1.95137059427344e-05,
|
| 16813 |
+
"loss": 0.1851,
|
| 16814 |
+
"step": 840500
|
| 16815 |
+
},
|
| 16816 |
+
{
|
| 16817 |
+
"epoch": 9.02,
|
| 16818 |
+
"learning_rate": 1.945553091971727e-05,
|
| 16819 |
+
"loss": 0.1852,
|
| 16820 |
+
"step": 841000
|
| 16821 |
+
},
|
| 16822 |
+
{
|
| 16823 |
+
"epoch": 9.02,
|
| 16824 |
+
"eval_loss": 0.17605267465114594,
|
| 16825 |
+
"eval_runtime": 2.6415,
|
| 16826 |
+
"eval_samples_per_second": 869.593,
|
| 16827 |
+
"eval_steps_per_second": 13.629,
|
| 16828 |
+
"step": 841000
|
| 16829 |
+
},
|
| 16830 |
+
{
|
| 16831 |
+
"epoch": 9.03,
|
| 16832 |
+
"learning_rate": 1.93975214229667e-05,
|
| 16833 |
+
"loss": 0.185,
|
| 16834 |
+
"step": 841500
|
| 16835 |
+
},
|
| 16836 |
+
{
|
| 16837 |
+
"epoch": 9.03,
|
| 16838 |
+
"learning_rate": 1.933967761107847e-05,
|
| 16839 |
+
"loss": 0.1846,
|
| 16840 |
+
"step": 842000
|
| 16841 |
+
},
|
| 16842 |
+
{
|
| 16843 |
+
"epoch": 9.03,
|
| 16844 |
+
"eval_loss": 0.17334681749343872,
|
| 16845 |
+
"eval_runtime": 2.679,
|
| 16846 |
+
"eval_samples_per_second": 857.418,
|
| 16847 |
+
"eval_steps_per_second": 13.438,
|
| 16848 |
+
"step": 842000
|
| 16849 |
+
},
|
| 16850 |
+
{
|
| 16851 |
+
"epoch": 9.04,
|
| 16852 |
+
"learning_rate": 1.928199964219533e-05,
|
| 16853 |
+
"loss": 0.1851,
|
| 16854 |
+
"step": 842500
|
| 16855 |
+
},
|
| 16856 |
+
{
|
| 16857 |
+
"epoch": 9.04,
|
| 16858 |
+
"learning_rate": 1.9224487674006694e-05,
|
| 16859 |
+
"loss": 0.1848,
|
| 16860 |
+
"step": 843000
|
| 16861 |
+
},
|
| 16862 |
+
{
|
| 16863 |
+
"epoch": 9.04,
|
| 16864 |
+
"eval_loss": 0.17702366411685944,
|
| 16865 |
+
"eval_runtime": 2.7205,
|
| 16866 |
+
"eval_samples_per_second": 844.336,
|
| 16867 |
+
"eval_steps_per_second": 13.233,
|
| 16868 |
+
"step": 843000
|
| 16869 |
+
},
|
| 16870 |
+
{
|
| 16871 |
+
"epoch": 9.05,
|
| 16872 |
+
"learning_rate": 1.9167141863748015e-05,
|
| 16873 |
+
"loss": 0.1848,
|
| 16874 |
+
"step": 843500
|
| 16875 |
+
},
|
| 16876 |
+
{
|
| 16877 |
+
"epoch": 9.06,
|
| 16878 |
+
"learning_rate": 1.9109962368200602e-05,
|
| 16879 |
+
"loss": 0.1846,
|
| 16880 |
+
"step": 844000
|
| 16881 |
+
},
|
| 16882 |
+
{
|
| 16883 |
+
"epoch": 9.06,
|
| 16884 |
+
"eval_loss": 0.17539054155349731,
|
| 16885 |
+
"eval_runtime": 2.6244,
|
| 16886 |
+
"eval_samples_per_second": 875.248,
|
| 16887 |
+
"eval_steps_per_second": 13.717,
|
| 16888 |
+
"step": 844000
|
| 16889 |
+
},
|
| 16890 |
+
{
|
| 16891 |
+
"epoch": 9.06,
|
| 16892 |
+
"learning_rate": 1.9052949343690977e-05,
|
| 16893 |
+
"loss": 0.1848,
|
| 16894 |
+
"step": 844500
|
| 16895 |
+
},
|
| 16896 |
+
{
|
| 16897 |
+
"epoch": 9.07,
|
| 16898 |
+
"learning_rate": 1.8996102946090586e-05,
|
| 16899 |
+
"loss": 0.1846,
|
| 16900 |
+
"step": 845000
|
| 16901 |
+
},
|
| 16902 |
+
{
|
| 16903 |
+
"epoch": 9.07,
|
| 16904 |
+
"eval_loss": 0.17318959534168243,
|
| 16905 |
+
"eval_runtime": 2.7201,
|
| 16906 |
+
"eval_samples_per_second": 844.445,
|
| 16907 |
+
"eval_steps_per_second": 13.235,
|
| 16908 |
+
"step": 845000
|
| 16909 |
+
},
|
| 16910 |
+
{
|
| 16911 |
+
"epoch": 9.07,
|
| 16912 |
+
"learning_rate": 1.8939423330815345e-05,
|
| 16913 |
+
"loss": 0.1847,
|
| 16914 |
+
"step": 845500
|
| 16915 |
+
},
|
| 16916 |
+
{
|
| 16917 |
+
"epoch": 9.08,
|
| 16918 |
+
"learning_rate": 1.888291065282509e-05,
|
| 16919 |
+
"loss": 0.1847,
|
| 16920 |
+
"step": 846000
|
| 16921 |
+
},
|
| 16922 |
+
{
|
| 16923 |
+
"epoch": 9.08,
|
| 16924 |
+
"eval_loss": 0.17560191452503204,
|
| 16925 |
+
"eval_runtime": 2.7101,
|
| 16926 |
+
"eval_samples_per_second": 847.565,
|
| 16927 |
+
"eval_steps_per_second": 13.284,
|
| 16928 |
+
"step": 846000
|
| 16929 |
+
},
|
| 16930 |
+
{
|
| 16931 |
+
"epoch": 9.08,
|
| 16932 |
+
"learning_rate": 1.882656506662338e-05,
|
| 16933 |
+
"loss": 0.1846,
|
| 16934 |
+
"step": 846500
|
| 16935 |
+
},
|
| 16936 |
+
{
|
| 16937 |
+
"epoch": 9.09,
|
| 16938 |
+
"learning_rate": 1.8770386726256865e-05,
|
| 16939 |
+
"loss": 0.1844,
|
| 16940 |
+
"step": 847000
|
| 16941 |
+
},
|
| 16942 |
+
{
|
| 16943 |
+
"epoch": 9.09,
|
| 16944 |
+
"eval_loss": 0.17197825014591217,
|
| 16945 |
+
"eval_runtime": 2.7201,
|
| 16946 |
+
"eval_samples_per_second": 844.459,
|
| 16947 |
+
"eval_steps_per_second": 13.235,
|
| 16948 |
+
"step": 847000
|
| 16949 |
+
},
|
| 16950 |
+
{
|
| 16951 |
+
"epoch": 9.09,
|
| 16952 |
+
"learning_rate": 1.8714375785315006e-05,
|
| 16953 |
+
"loss": 0.1845,
|
| 16954 |
+
"step": 847500
|
| 16955 |
+
},
|
| 16956 |
+
{
|
| 16957 |
+
"epoch": 9.1,
|
| 16958 |
+
"learning_rate": 1.8658532396929565e-05,
|
| 16959 |
+
"loss": 0.184,
|
| 16960 |
+
"step": 848000
|
| 16961 |
+
},
|
| 16962 |
+
{
|
| 16963 |
+
"epoch": 9.1,
|
| 16964 |
+
"eval_loss": 0.17653484642505646,
|
| 16965 |
+
"eval_runtime": 2.6616,
|
| 16966 |
+
"eval_samples_per_second": 863.031,
|
| 16967 |
+
"eval_steps_per_second": 13.526,
|
| 16968 |
+
"step": 848000
|
| 16969 |
+
},
|
| 16970 |
+
{
|
| 16971 |
+
"epoch": 9.11,
|
| 16972 |
+
"learning_rate": 1.8602856713774208e-05,
|
| 16973 |
+
"loss": 0.1843,
|
| 16974 |
+
"step": 848500
|
| 16975 |
+
},
|
| 16976 |
+
{
|
| 16977 |
+
"epoch": 9.11,
|
| 16978 |
+
"learning_rate": 1.8547348888064178e-05,
|
| 16979 |
+
"loss": 0.1848,
|
| 16980 |
+
"step": 849000
|
| 16981 |
+
},
|
| 16982 |
+
{
|
| 16983 |
+
"epoch": 9.11,
|
| 16984 |
+
"eval_loss": 0.1734277456998825,
|
| 16985 |
+
"eval_runtime": 2.6737,
|
| 16986 |
+
"eval_samples_per_second": 859.112,
|
| 16987 |
+
"eval_steps_per_second": 13.465,
|
| 16988 |
+
"step": 849000
|
| 16989 |
+
},
|
| 16990 |
+
{
|
| 16991 |
+
"epoch": 9.12,
|
| 16992 |
+
"learning_rate": 1.8492009071555703e-05,
|
| 16993 |
+
"loss": 0.1846,
|
| 16994 |
+
"step": 849500
|
| 16995 |
+
},
|
| 16996 |
+
{
|
| 16997 |
+
"epoch": 9.12,
|
| 16998 |
+
"learning_rate": 1.8436837415545772e-05,
|
| 16999 |
+
"loss": 0.1848,
|
| 17000 |
+
"step": 850000
|
| 17001 |
+
},
|
| 17002 |
+
{
|
| 17003 |
+
"epoch": 9.12,
|
| 17004 |
+
"eval_loss": 0.17425018548965454,
|
| 17005 |
+
"eval_runtime": 2.6779,
|
| 17006 |
+
"eval_samples_per_second": 857.76,
|
| 17007 |
+
"eval_steps_per_second": 13.443,
|
| 17008 |
+
"step": 850000
|
| 17009 |
}
|
| 17010 |
],
|
| 17011 |
"max_steps": 1000000,
|
| 17012 |
"num_train_epochs": 12,
|
| 17013 |
+
"total_flos": 5.958472899551867e+22,
|
| 17014 |
"trial_name": null,
|
| 17015 |
"trial_params": null
|
| 17016 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c2b61d52c517c0fe4a2eaad25ea86eca5fed6ebf386f54b8df5e3364654d10f
|
| 3 |
size 449471589
|