Training in progress, step 250000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:981f9cd48efb9aa651b5446906cfef65e3f5e49845175ea67dca9f635e9c1038
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22078b7cadf342376e128bb1b061228e01c38cac2488c2ab30ca05231e48ad59
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac406c207b2f7429e3395f91a987edf514c0aeff745dd88acf0897c2d8c0d2f5
|
| 3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a150bbb8b25e860b9b4e4d20308c75cd4211d4f66e73ca0ae830a1483ae3793d
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14439
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba99bff5574a5463122f2331dbbdbc3c75f9c55701d70a2ea0f51810fd185527
|
| 3 |
size 14439
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b4ebfa40194a9690e4a04456095f6b6e0d44abd914d615940586748348af8ed
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1301777ffe2dc7d45e2808f549b6c6f37d8616c9227a415a7757312ea67a80a1
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 3.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1782,11 +1782,85 @@
|
|
| 1782 |
"eval_samples_per_second": 990.396,
|
| 1783 |
"eval_steps_per_second": 15.846,
|
| 1784 |
"step": 240000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1785 |
}
|
| 1786 |
],
|
| 1787 |
"max_steps": 1000000,
|
| 1788 |
"num_train_epochs": 16,
|
| 1789 |
-
"total_flos": 1.
|
| 1790 |
"trial_name": null,
|
| 1791 |
"trial_params": null
|
| 1792 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 3.81755157512178,
|
| 5 |
+
"global_step": 250000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1782 |
"eval_samples_per_second": 990.396,
|
| 1783 |
"eval_steps_per_second": 15.846,
|
| 1784 |
"step": 240000
|
| 1785 |
+
},
|
| 1786 |
+
{
|
| 1787 |
+
"epoch": 3.68,
|
| 1788 |
+
"learning_rate": 0.00013649481627508181,
|
| 1789 |
+
"loss": 0.3031,
|
| 1790 |
+
"step": 241000
|
| 1791 |
+
},
|
| 1792 |
+
{
|
| 1793 |
+
"epoch": 3.7,
|
| 1794 |
+
"learning_rate": 0.0001363578251256578,
|
| 1795 |
+
"loss": 0.3023,
|
| 1796 |
+
"step": 242000
|
| 1797 |
+
},
|
| 1798 |
+
{
|
| 1799 |
+
"epoch": 3.71,
|
| 1800 |
+
"learning_rate": 0.00013622021765608754,
|
| 1801 |
+
"loss": 0.3022,
|
| 1802 |
+
"step": 243000
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 3.73,
|
| 1806 |
+
"learning_rate": 0.00013608199537122425,
|
| 1807 |
+
"loss": 0.3017,
|
| 1808 |
+
"step": 244000
|
| 1809 |
+
},
|
| 1810 |
+
{
|
| 1811 |
+
"epoch": 3.74,
|
| 1812 |
+
"learning_rate": 0.0001359431597826447,
|
| 1813 |
+
"loss": 0.3019,
|
| 1814 |
+
"step": 245000
|
| 1815 |
+
},
|
| 1816 |
+
{
|
| 1817 |
+
"epoch": 3.74,
|
| 1818 |
+
"eval_runtime": 1.0744,
|
| 1819 |
+
"eval_samples_per_second": 930.717,
|
| 1820 |
+
"eval_steps_per_second": 14.891,
|
| 1821 |
+
"step": 245000
|
| 1822 |
+
},
|
| 1823 |
+
{
|
| 1824 |
+
"epoch": 3.76,
|
| 1825 |
+
"learning_rate": 0.0001358037124086327,
|
| 1826 |
+
"loss": 0.3015,
|
| 1827 |
+
"step": 246000
|
| 1828 |
+
},
|
| 1829 |
+
{
|
| 1830 |
+
"epoch": 3.77,
|
| 1831 |
+
"learning_rate": 0.00013566365477416233,
|
| 1832 |
+
"loss": 0.3018,
|
| 1833 |
+
"step": 247000
|
| 1834 |
+
},
|
| 1835 |
+
{
|
| 1836 |
+
"epoch": 3.79,
|
| 1837 |
+
"learning_rate": 0.00013552298841088144,
|
| 1838 |
+
"loss": 0.3013,
|
| 1839 |
+
"step": 248000
|
| 1840 |
+
},
|
| 1841 |
+
{
|
| 1842 |
+
"epoch": 3.8,
|
| 1843 |
+
"learning_rate": 0.00013538171485709486,
|
| 1844 |
+
"loss": 0.3006,
|
| 1845 |
+
"step": 249000
|
| 1846 |
+
},
|
| 1847 |
+
{
|
| 1848 |
+
"epoch": 3.82,
|
| 1849 |
+
"learning_rate": 0.00013523983565774753,
|
| 1850 |
+
"loss": 0.3008,
|
| 1851 |
+
"step": 250000
|
| 1852 |
+
},
|
| 1853 |
+
{
|
| 1854 |
+
"epoch": 3.82,
|
| 1855 |
+
"eval_runtime": 1.0168,
|
| 1856 |
+
"eval_samples_per_second": 983.434,
|
| 1857 |
+
"eval_steps_per_second": 15.735,
|
| 1858 |
+
"step": 250000
|
| 1859 |
}
|
| 1860 |
],
|
| 1861 |
"max_steps": 1000000,
|
| 1862 |
"num_train_epochs": 16,
|
| 1863 |
+
"total_flos": 1.7525045545542324e+22,
|
| 1864 |
"trial_name": null,
|
| 1865 |
"trial_params": null
|
| 1866 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22078b7cadf342376e128bb1b061228e01c38cac2488c2ab30ca05231e48ad59
|
| 3 |
size 449471589
|