Nadav commited on
Commit
6c4551a
·
1 Parent(s): 4800f91

Training in progress, step 95000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14d8d23556dde86498d9bec72b8d91249dd67487020ee43ffaba2d4a9670c816
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87faff5dd87328fcefec427ea0cb702408ad590c1646c0f7d82e8de25452a283
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:280e3b8af6c535b6231490cffafeefbe7d58fc21536c2088588347c8f50e536d
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0479c3f6649af96b0cacdd70e8babe32246fd880992e4fd5742192a6d7c00e5d
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac83572cac812d05eb9798aaa4eeb4ec6a277964f63a5c6414ba65bbee7b2879
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ac2097dd35c516db233db35554b7dda10d7d27ccd52d7408994cce86d7c83aa
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:faa0eac9b0ca6f76b1a38ea18d17cb1c00d76f583a4c4c949f12c073b7c172f0
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74798da0c97f6f81c79c3a97ed18dc37829dc36f2e8346e520721378fc2c4935
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb667bcf48468e7c33d6bd49b2f45e30dd3d0f2a562e310780db60a92c6b3333
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef3a8846658935c38489fb3bdfd532e16ae98f8d1a9e1c24218ae3f20a78e624
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.655665192242259,
5
- "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1806,11 +1806,111 @@
1806
  "eval_samples_per_second": 30.652,
1807
  "eval_steps_per_second": 0.981,
1808
  "step": 90000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1809
  }
1810
  ],
1811
  "max_steps": 1000000,
1812
  "num_train_epochs": 86,
1813
- "total_flos": 4.140183627812062e+21,
1814
  "trial_name": null,
1815
  "trial_params": null
1816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.080979925144607,
5
+ "global_step": 95000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1806
  "eval_samples_per_second": 30.652,
1807
  "eval_steps_per_second": 0.981,
1808
  "step": 90000
1809
+ },
1810
+ {
1811
+ "epoch": 7.7,
1812
+ "learning_rate": 9.999999999999999e-06,
1813
+ "loss": 0.4017,
1814
+ "step": 90500
1815
+ },
1816
+ {
1817
+ "epoch": 7.74,
1818
+ "learning_rate": 9.999999999999999e-06,
1819
+ "loss": 0.4016,
1820
+ "step": 91000
1821
+ },
1822
+ {
1823
+ "epoch": 7.74,
1824
+ "eval_loss": 0.3734327256679535,
1825
+ "eval_runtime": 17.5409,
1826
+ "eval_samples_per_second": 28.505,
1827
+ "eval_steps_per_second": 0.912,
1828
+ "step": 91000
1829
+ },
1830
+ {
1831
+ "epoch": 7.78,
1832
+ "learning_rate": 9.999999999999999e-06,
1833
+ "loss": 0.3998,
1834
+ "step": 91500
1835
+ },
1836
+ {
1837
+ "epoch": 7.83,
1838
+ "learning_rate": 9.999999999999999e-06,
1839
+ "loss": 0.4006,
1840
+ "step": 92000
1841
+ },
1842
+ {
1843
+ "epoch": 7.83,
1844
+ "eval_loss": 0.3747243583202362,
1845
+ "eval_runtime": 17.4755,
1846
+ "eval_samples_per_second": 28.611,
1847
+ "eval_steps_per_second": 0.916,
1848
+ "step": 92000
1849
+ },
1850
+ {
1851
+ "epoch": 7.87,
1852
+ "learning_rate": 9.999999999999999e-06,
1853
+ "loss": 0.4013,
1854
+ "step": 92500
1855
+ },
1856
+ {
1857
+ "epoch": 7.91,
1858
+ "learning_rate": 9.999999999999999e-06,
1859
+ "loss": 0.4008,
1860
+ "step": 93000
1861
+ },
1862
+ {
1863
+ "epoch": 7.91,
1864
+ "eval_loss": 0.37303251028060913,
1865
+ "eval_runtime": 17.0856,
1866
+ "eval_samples_per_second": 29.264,
1867
+ "eval_steps_per_second": 0.936,
1868
+ "step": 93000
1869
+ },
1870
+ {
1871
+ "epoch": 7.95,
1872
+ "learning_rate": 9.999999999999999e-06,
1873
+ "loss": 0.4008,
1874
+ "step": 93500
1875
+ },
1876
+ {
1877
+ "epoch": 8.0,
1878
+ "learning_rate": 9.999999999999999e-06,
1879
+ "loss": 0.402,
1880
+ "step": 94000
1881
+ },
1882
+ {
1883
+ "epoch": 8.0,
1884
+ "eval_loss": 0.37281692028045654,
1885
+ "eval_runtime": 17.9894,
1886
+ "eval_samples_per_second": 27.794,
1887
+ "eval_steps_per_second": 0.889,
1888
+ "step": 94000
1889
+ },
1890
+ {
1891
+ "epoch": 8.04,
1892
+ "learning_rate": 9.999999999999999e-06,
1893
+ "loss": 0.4005,
1894
+ "step": 94500
1895
+ },
1896
+ {
1897
+ "epoch": 8.08,
1898
+ "learning_rate": 9.999999999999999e-06,
1899
+ "loss": 0.4008,
1900
+ "step": 95000
1901
+ },
1902
+ {
1903
+ "epoch": 8.08,
1904
+ "eval_loss": 0.37092164158821106,
1905
+ "eval_runtime": 17.2285,
1906
+ "eval_samples_per_second": 29.022,
1907
+ "eval_steps_per_second": 0.929,
1908
+ "step": 95000
1909
  }
1910
  ],
1911
  "max_steps": 1000000,
1912
  "num_train_epochs": 86,
1913
+ "total_flos": 4.3701839562951146e+21,
1914
  "trial_name": null,
1915
  "trial_params": null
1916
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:280e3b8af6c535b6231490cffafeefbe7d58fc21536c2088588347c8f50e536d
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0479c3f6649af96b0cacdd70e8babe32246fd880992e4fd5742192a6d7c00e5d
3
  size 449471589