FormlessAI commited on
Commit
ee4ebc0
·
verified ·
1 Parent(s): 41bc65c

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0762ec721b93d1a0e10ada578c7538ccb87f010928b297b4505a645b3aec697
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01f2d5d0f7a2b7fede001e37991bd6985fc274f063f73ff62dc59d392b4e63a6
3
  size 98088784
last-checkpoint/global_step1200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fab8ffa94bdc27c1cc20ad5cc46550ded319ddd7deec8f5a4a8a5fe810936ac4
3
+ size 73939813
last-checkpoint/global_step1200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b05b9eadac0276f908244e2a084bbb3b6806cbe8f7998440b46725b88d99b2
3
+ size 73939813
last-checkpoint/global_step1200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b84ae5befcf9368f35047b2404051cfe9aa23d0602a2b9b5e44ad1dc94e35bbf
3
+ size 73939877
last-checkpoint/global_step1200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4d9cfbca81ff0d2f09b172c91877eee8aef7d78f584e23310e2f9d6aba5d9d0
3
+ size 73939877
last-checkpoint/global_step1200/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab095ce3b82f509fc00d1719d519d97ecfb3a34cd304cff3cee56d691d7ae983
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1150
 
1
+ global_step1200
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e2c783f044e208693875b6618820b4692ab8369227ed5fcfe75de8c98cb2f5
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8226f3cefe922b522e2875b7ca4cafd422d0b379b34caed43be50f8a6af00c
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9a009ec584589b323bfde6fb332132397a948a68665dbf47ae6b13108a76ac8
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e5beba2802aecc2c31190f0e1445fda449914542cb3a995952912264b92bf2
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9eba47f1f3f2aaeb1ee30212c3d28966395e9b15ce04d718f220251a1b885544
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ccf030e1a7531894174f97468eb482cc1210a67efd80cadbf1d6b45c1e05c6
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:969e35a2eee24aa5d0640e276157b14ed3586e426e68f6139c80b9bdb3012f62
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b14d6df95725c0e3824b9ffbf675c3cdedc21103310c246d38cae48315d53791
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f599b3f2fdaee9f298de483bc342667a86479cffdd08dfb05aebfb998561b471
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8dd9ccd3b73af1b44ab373f6253ca88811f20b0e9b7b73611705899de6d0dbb
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6847204566001892,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.127372933251684,
6
  "eval_steps": 50,
7
- "global_step": 1150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1802,6 +1802,84 @@
1802
  "eval_samples_per_second": 125.864,
1803
  "eval_steps_per_second": 15.741,
1804
  "step": 1150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1805
  }
1806
  ],
1807
  "logging_steps": 5,
@@ -1830,7 +1908,7 @@
1830
  "attributes": {}
1831
  }
1832
  },
1833
- "total_flos": 5.936913961881436e+17,
1834
  "train_batch_size": 2,
1835
  "trial_name": null,
1836
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6825479865074158,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.1763625229638701,
6
  "eval_steps": 50,
7
+ "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1802
  "eval_samples_per_second": 125.864,
1803
  "eval_steps_per_second": 15.741,
1804
  "step": 1150
1805
+ },
1806
+ {
1807
+ "epoch": 1.1322718922229027,
1808
+ "grad_norm": 0.22210359573364258,
1809
+ "learning_rate": 9.862231492251444e-05,
1810
+ "loss": 0.6897,
1811
+ "step": 1155
1812
+ },
1813
+ {
1814
+ "epoch": 1.1371708511941212,
1815
+ "grad_norm": 0.1984894573688507,
1816
+ "learning_rate": 9.851101062895398e-05,
1817
+ "loss": 0.7213,
1818
+ "step": 1160
1819
+ },
1820
+ {
1821
+ "epoch": 1.14206981016534,
1822
+ "grad_norm": 0.2018108069896698,
1823
+ "learning_rate": 9.839926328906811e-05,
1824
+ "loss": 0.6896,
1825
+ "step": 1165
1826
+ },
1827
+ {
1828
+ "epoch": 1.1469687691365584,
1829
+ "grad_norm": 0.19112059473991394,
1830
+ "learning_rate": 9.828707405123364e-05,
1831
+ "loss": 0.7003,
1832
+ "step": 1170
1833
+ },
1834
+ {
1835
+ "epoch": 1.1518677281077772,
1836
+ "grad_norm": 0.2068580538034439,
1837
+ "learning_rate": 9.817444406836856e-05,
1838
+ "loss": 0.716,
1839
+ "step": 1175
1840
+ },
1841
+ {
1842
+ "epoch": 1.1567666870789957,
1843
+ "grad_norm": 0.2238154113292694,
1844
+ "learning_rate": 9.80613744979202e-05,
1845
+ "loss": 0.7058,
1846
+ "step": 1180
1847
+ },
1848
+ {
1849
+ "epoch": 1.1616656460502144,
1850
+ "grad_norm": 0.19843433797359467,
1851
+ "learning_rate": 9.794786650185339e-05,
1852
+ "loss": 0.6938,
1853
+ "step": 1185
1854
+ },
1855
+ {
1856
+ "epoch": 1.1665646050214329,
1857
+ "grad_norm": 0.23146703839302063,
1858
+ "learning_rate": 9.783392124663834e-05,
1859
+ "loss": 0.6892,
1860
+ "step": 1190
1861
+ },
1862
+ {
1863
+ "epoch": 1.1714635639926516,
1864
+ "grad_norm": 0.22127410769462585,
1865
+ "learning_rate": 9.77195399032389e-05,
1866
+ "loss": 0.6976,
1867
+ "step": 1195
1868
+ },
1869
+ {
1870
+ "epoch": 1.1763625229638701,
1871
+ "grad_norm": 0.20067089796066284,
1872
+ "learning_rate": 9.760472364710031e-05,
1873
+ "loss": 0.7033,
1874
+ "step": 1200
1875
+ },
1876
+ {
1877
+ "epoch": 1.1763625229638701,
1878
+ "eval_loss": 0.6825479865074158,
1879
+ "eval_runtime": 15.459,
1880
+ "eval_samples_per_second": 126.722,
1881
+ "eval_steps_per_second": 15.848,
1882
+ "step": 1200
1883
  }
1884
  ],
1885
  "logging_steps": 5,
 
1908
  "attributes": {}
1909
  }
1910
  },
1911
+ "total_flos": 6.194548673033011e+17,
1912
  "train_batch_size": 2,
1913
  "trial_name": null,
1914
  "trial_params": null