Training in progress, step 260000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +2 -2
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80fb9af3e1214502ea925d3599d3b6efda5d8711659b2c90eec052ea369703f7
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ab3051ab890d6ef671960c94245505fe32b7894ead10b5d7877e9fabce99ffc
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:72757db14fb233c9a7d81cef99ff1e84d48a76f7d40a8342399239cffc8c3e9f
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8fbef78331483305594b3f85c6f32ce7bab8b9f165e2fb904f362fb3239105f
|
| 3 |
+
size 14567
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b31003ab7fcda6d6f6ff162f700b04d49c90c118129057bd1f4ee624c393e588
|
| 3 |
+
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1da43c666b0af3cbe77f19371961622ab99b1114e6beec7303d695cd496caee4
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04396ff3c1feb0819b380c26b60d419205385a8e624d7c59a71eaea179611c46
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 3.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1856,11 +1856,85 @@
|
|
| 1856 |
"eval_samples_per_second": 983.434,
|
| 1857 |
"eval_steps_per_second": 15.735,
|
| 1858 |
"step": 250000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1859 |
}
|
| 1860 |
],
|
| 1861 |
"max_steps": 1000000,
|
| 1862 |
"num_train_epochs": 16,
|
| 1863 |
-
"total_flos": 1.
|
| 1864 |
"trial_name": null,
|
| 1865 |
"trial_params": null
|
| 1866 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 3.970253638126651,
|
| 5 |
+
"global_step": 260000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1856 |
"eval_samples_per_second": 983.434,
|
| 1857 |
"eval_steps_per_second": 15.735,
|
| 1858 |
"step": 250000
|
| 1859 |
+
},
|
| 1860 |
+
{
|
| 1861 |
+
"epoch": 3.83,
|
| 1862 |
+
"learning_rate": 0.00013509735236440766,
|
| 1863 |
+
"loss": 0.3003,
|
| 1864 |
+
"step": 251000
|
| 1865 |
+
},
|
| 1866 |
+
{
|
| 1867 |
+
"epoch": 3.85,
|
| 1868 |
+
"learning_rate": 0.00013495426653524972,
|
| 1869 |
+
"loss": 0.3,
|
| 1870 |
+
"step": 252000
|
| 1871 |
+
},
|
| 1872 |
+
{
|
| 1873 |
+
"epoch": 3.86,
|
| 1874 |
+
"learning_rate": 0.00013481057973503742,
|
| 1875 |
+
"loss": 0.3,
|
| 1876 |
+
"step": 253000
|
| 1877 |
+
},
|
| 1878 |
+
{
|
| 1879 |
+
"epoch": 3.88,
|
| 1880 |
+
"learning_rate": 0.00013466629353510651,
|
| 1881 |
+
"loss": 0.2997,
|
| 1882 |
+
"step": 254000
|
| 1883 |
+
},
|
| 1884 |
+
{
|
| 1885 |
+
"epoch": 3.89,
|
| 1886 |
+
"learning_rate": 0.00013452140951334787,
|
| 1887 |
+
"loss": 0.2995,
|
| 1888 |
+
"step": 255000
|
| 1889 |
+
},
|
| 1890 |
+
{
|
| 1891 |
+
"epoch": 3.89,
|
| 1892 |
+
"eval_runtime": 0.8192,
|
| 1893 |
+
"eval_samples_per_second": 1220.744,
|
| 1894 |
+
"eval_steps_per_second": 19.532,
|
| 1895 |
+
"step": 255000
|
| 1896 |
+
},
|
| 1897 |
+
{
|
| 1898 |
+
"epoch": 3.91,
|
| 1899 |
+
"learning_rate": 0.00013437592925418985,
|
| 1900 |
+
"loss": 0.2996,
|
| 1901 |
+
"step": 256000
|
| 1902 |
+
},
|
| 1903 |
+
{
|
| 1904 |
+
"epoch": 3.92,
|
| 1905 |
+
"learning_rate": 0.00013422985434858133,
|
| 1906 |
+
"loss": 0.299,
|
| 1907 |
+
"step": 257000
|
| 1908 |
+
},
|
| 1909 |
+
{
|
| 1910 |
+
"epoch": 3.94,
|
| 1911 |
+
"learning_rate": 0.00013408318639397405,
|
| 1912 |
+
"loss": 0.2987,
|
| 1913 |
+
"step": 258000
|
| 1914 |
+
},
|
| 1915 |
+
{
|
| 1916 |
+
"epoch": 3.95,
|
| 1917 |
+
"learning_rate": 0.00013393592699430525,
|
| 1918 |
+
"loss": 0.2986,
|
| 1919 |
+
"step": 259000
|
| 1920 |
+
},
|
| 1921 |
+
{
|
| 1922 |
+
"epoch": 3.97,
|
| 1923 |
+
"learning_rate": 0.00013378807775998012,
|
| 1924 |
+
"loss": 0.2984,
|
| 1925 |
+
"step": 260000
|
| 1926 |
+
},
|
| 1927 |
+
{
|
| 1928 |
+
"epoch": 3.97,
|
| 1929 |
+
"eval_runtime": 1.0461,
|
| 1930 |
+
"eval_samples_per_second": 955.963,
|
| 1931 |
+
"eval_steps_per_second": 15.295,
|
| 1932 |
+
"step": 260000
|
| 1933 |
}
|
| 1934 |
],
|
| 1935 |
"max_steps": 1000000,
|
| 1936 |
"num_train_epochs": 16,
|
| 1937 |
+
"total_flos": 1.8226048155845795e+22,
|
| 1938 |
"trial_name": null,
|
| 1939 |
"trial_params": null
|
| 1940 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ab3051ab890d6ef671960c94245505fe32b7894ead10b5d7877e9fabce99ffc
|
| 3 |
size 449471589
|