Training in progress, epoch 1, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step1200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step1200/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -4
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 98088784
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01f2d5d0f7a2b7fede001e37991bd6985fc274f063f73ff62dc59d392b4e63a6
|
| 3 |
size 98088784
|
last-checkpoint/global_step1200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fab8ffa94bdc27c1cc20ad5cc46550ded319ddd7deec8f5a4a8a5fe810936ac4
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0b05b9eadac0276f908244e2a084bbb3b6806cbe8f7998440b46725b88d99b2
|
| 3 |
+
size 73939813
|
last-checkpoint/global_step1200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b84ae5befcf9368f35047b2404051cfe9aa23d0602a2b9b5e44ad1dc94e35bbf
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4d9cfbca81ff0d2f09b172c91877eee8aef7d78f584e23310e2f9d6aba5d9d0
|
| 3 |
+
size 73939877
|
last-checkpoint/global_step1200/mp_rank_00_model_states.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab095ce3b82f509fc00d1719d519d97ecfb3a34cd304cff3cee56d691d7ae983
|
| 3 |
+
size 564993061
|
last-checkpoint/latest
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
|
|
|
|
| 1 |
+
global_step1200
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb8226f3cefe922b522e2875b7ca4cafd422d0b379b34caed43be50f8a6af00c
|
| 3 |
size 15429
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27e5beba2802aecc2c31190f0e1445fda449914542cb3a995952912264b92bf2
|
| 3 |
size 15429
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7ccf030e1a7531894174f97468eb482cc1210a67efd80cadbf1d6b45c1e05c6
|
| 3 |
size 15429
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15429
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b14d6df95725c0e3824b9ffbf675c3cdedc21103310c246d38cae48315d53791
|
| 3 |
size 15429
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8dd9ccd3b73af1b44ab373f6253ca88811f20b0e9b7b73611705899de6d0dbb
|
| 3 |
size 1401
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 50,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1802,6 +1802,84 @@
|
|
| 1802 |
"eval_samples_per_second": 125.864,
|
| 1803 |
"eval_steps_per_second": 15.741,
|
| 1804 |
"step": 1150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1805 |
}
|
| 1806 |
],
|
| 1807 |
"logging_steps": 5,
|
|
@@ -1830,7 +1908,7 @@
|
|
| 1830 |
"attributes": {}
|
| 1831 |
}
|
| 1832 |
},
|
| 1833 |
-
"total_flos":
|
| 1834 |
"train_batch_size": 2,
|
| 1835 |
"trial_name": null,
|
| 1836 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": null,
|
| 3 |
+
"best_metric": 0.6825479865074158,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.1763625229638701,
|
| 6 |
"eval_steps": 50,
|
| 7 |
+
"global_step": 1200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1802 |
"eval_samples_per_second": 125.864,
|
| 1803 |
"eval_steps_per_second": 15.741,
|
| 1804 |
"step": 1150
|
| 1805 |
+
},
|
| 1806 |
+
{
|
| 1807 |
+
"epoch": 1.1322718922229027,
|
| 1808 |
+
"grad_norm": 0.22210359573364258,
|
| 1809 |
+
"learning_rate": 9.862231492251444e-05,
|
| 1810 |
+
"loss": 0.6897,
|
| 1811 |
+
"step": 1155
|
| 1812 |
+
},
|
| 1813 |
+
{
|
| 1814 |
+
"epoch": 1.1371708511941212,
|
| 1815 |
+
"grad_norm": 0.1984894573688507,
|
| 1816 |
+
"learning_rate": 9.851101062895398e-05,
|
| 1817 |
+
"loss": 0.7213,
|
| 1818 |
+
"step": 1160
|
| 1819 |
+
},
|
| 1820 |
+
{
|
| 1821 |
+
"epoch": 1.14206981016534,
|
| 1822 |
+
"grad_norm": 0.2018108069896698,
|
| 1823 |
+
"learning_rate": 9.839926328906811e-05,
|
| 1824 |
+
"loss": 0.6896,
|
| 1825 |
+
"step": 1165
|
| 1826 |
+
},
|
| 1827 |
+
{
|
| 1828 |
+
"epoch": 1.1469687691365584,
|
| 1829 |
+
"grad_norm": 0.19112059473991394,
|
| 1830 |
+
"learning_rate": 9.828707405123364e-05,
|
| 1831 |
+
"loss": 0.7003,
|
| 1832 |
+
"step": 1170
|
| 1833 |
+
},
|
| 1834 |
+
{
|
| 1835 |
+
"epoch": 1.1518677281077772,
|
| 1836 |
+
"grad_norm": 0.2068580538034439,
|
| 1837 |
+
"learning_rate": 9.817444406836856e-05,
|
| 1838 |
+
"loss": 0.716,
|
| 1839 |
+
"step": 1175
|
| 1840 |
+
},
|
| 1841 |
+
{
|
| 1842 |
+
"epoch": 1.1567666870789957,
|
| 1843 |
+
"grad_norm": 0.2238154113292694,
|
| 1844 |
+
"learning_rate": 9.80613744979202e-05,
|
| 1845 |
+
"loss": 0.7058,
|
| 1846 |
+
"step": 1180
|
| 1847 |
+
},
|
| 1848 |
+
{
|
| 1849 |
+
"epoch": 1.1616656460502144,
|
| 1850 |
+
"grad_norm": 0.19843433797359467,
|
| 1851 |
+
"learning_rate": 9.794786650185339e-05,
|
| 1852 |
+
"loss": 0.6938,
|
| 1853 |
+
"step": 1185
|
| 1854 |
+
},
|
| 1855 |
+
{
|
| 1856 |
+
"epoch": 1.1665646050214329,
|
| 1857 |
+
"grad_norm": 0.23146703839302063,
|
| 1858 |
+
"learning_rate": 9.783392124663834e-05,
|
| 1859 |
+
"loss": 0.6892,
|
| 1860 |
+
"step": 1190
|
| 1861 |
+
},
|
| 1862 |
+
{
|
| 1863 |
+
"epoch": 1.1714635639926516,
|
| 1864 |
+
"grad_norm": 0.22127410769462585,
|
| 1865 |
+
"learning_rate": 9.77195399032389e-05,
|
| 1866 |
+
"loss": 0.6976,
|
| 1867 |
+
"step": 1195
|
| 1868 |
+
},
|
| 1869 |
+
{
|
| 1870 |
+
"epoch": 1.1763625229638701,
|
| 1871 |
+
"grad_norm": 0.20067089796066284,
|
| 1872 |
+
"learning_rate": 9.760472364710031e-05,
|
| 1873 |
+
"loss": 0.7033,
|
| 1874 |
+
"step": 1200
|
| 1875 |
+
},
|
| 1876 |
+
{
|
| 1877 |
+
"epoch": 1.1763625229638701,
|
| 1878 |
+
"eval_loss": 0.6825479865074158,
|
| 1879 |
+
"eval_runtime": 15.459,
|
| 1880 |
+
"eval_samples_per_second": 126.722,
|
| 1881 |
+
"eval_steps_per_second": 15.848,
|
| 1882 |
+
"step": 1200
|
| 1883 |
}
|
| 1884 |
],
|
| 1885 |
"logging_steps": 5,
|
|
|
|
| 1908 |
"attributes": {}
|
| 1909 |
}
|
| 1910 |
},
|
| 1911 |
+
"total_flos": 6.194548673033011e+17,
|
| 1912 |
"train_batch_size": 2,
|
| 1913 |
"trial_name": null,
|
| 1914 |
"trial_params": null
|