Text Classification
Transformers
PyTorch
distilbert
digital forensics
text-embeddings-inference
johannes-garstenauer commited on
Commit
d0f88eb
·
1 Parent(s): 164a819

Training in progress, step 64080, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d86e4fe2de30a69460ddbd5a942e2bdeaafdcced717f26aaf0f853b53ef615a
3
  size 133863493
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b86a84182ff88a7c986add22427bacc5f8a51e9b3e4bef6daf556d62b008c732
3
  size 133863493
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8dcb8b35abdc0e9950d3a2e2d25f8ebb8d2ffa2dc1520ac6dbd6a23b6e065b0
3
  size 266276525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15f196a437e4c6c59c2a4538f2c12c86af84e2522719c72b5cd92e246b7260d0
3
  size 266276525
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36050cdd6c371ff514eeea269cdeb435f87d40ac6c2717d3000dadc8e8f763bd
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e339ec1c96e2c63049ee0fe678a8a0a0e5482e752b4392d3980403c0937608
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87f5e4bd8575c3017fbf0da73fe3a93ea1c9f2eea5656020bfcacccea1efc8a1
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:437c711e9d81dff0bcf10e9282fa03116f3762fe3ad325416b5a0d33bd998163
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.4000337083386003,
5
  "eval_steps": 500,
6
- "global_step": 56960,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1739,13 +1739,229 @@
1739
  "learning_rate": 1.4226876951923482e-07,
1740
  "loss": 0.0107,
1741
  "step": 56800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1742
  }
1743
  ],
1744
  "logging_steps": 200,
1745
  "max_steps": 71199,
1746
  "num_train_epochs": 3,
1747
  "save_steps": 7120,
1748
- "total_flos": 4.8291268727755776e+17,
1749
  "trial_name": null,
1750
  "trial_params": null
1751
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7000379218809254,
5
  "eval_steps": 500,
6
+ "global_step": 64080,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1739
  "learning_rate": 1.4226876951923482e-07,
1740
  "loss": 0.0107,
1741
  "step": 56800
1742
+ },
1743
+ {
1744
+ "epoch": 2.4,
1745
+ "learning_rate": 1.4029267715838705e-07,
1746
+ "loss": 0.0067,
1747
+ "step": 57000
1748
+ },
1749
+ {
1750
+ "epoch": 2.41,
1751
+ "learning_rate": 1.3831658479753928e-07,
1752
+ "loss": 0.0197,
1753
+ "step": 57200
1754
+ },
1755
+ {
1756
+ "epoch": 2.42,
1757
+ "learning_rate": 1.3634049243669152e-07,
1758
+ "loss": 0.0094,
1759
+ "step": 57400
1760
+ },
1761
+ {
1762
+ "epoch": 2.43,
1763
+ "learning_rate": 1.3436440007584375e-07,
1764
+ "loss": 0.0125,
1765
+ "step": 57600
1766
+ },
1767
+ {
1768
+ "epoch": 2.44,
1769
+ "learning_rate": 1.3238830771499598e-07,
1770
+ "loss": 0.0099,
1771
+ "step": 57800
1772
+ },
1773
+ {
1774
+ "epoch": 2.44,
1775
+ "learning_rate": 1.3041221535414824e-07,
1776
+ "loss": 0.0128,
1777
+ "step": 58000
1778
+ },
1779
+ {
1780
+ "epoch": 2.45,
1781
+ "learning_rate": 1.2843612299330047e-07,
1782
+ "loss": 0.0129,
1783
+ "step": 58200
1784
+ },
1785
+ {
1786
+ "epoch": 2.46,
1787
+ "learning_rate": 1.264600306324527e-07,
1788
+ "loss": 0.0071,
1789
+ "step": 58400
1790
+ },
1791
+ {
1792
+ "epoch": 2.47,
1793
+ "learning_rate": 1.2448393827160493e-07,
1794
+ "loss": 0.0184,
1795
+ "step": 58600
1796
+ },
1797
+ {
1798
+ "epoch": 2.48,
1799
+ "learning_rate": 1.2250784591075716e-07,
1800
+ "loss": 0.0127,
1801
+ "step": 58800
1802
+ },
1803
+ {
1804
+ "epoch": 2.49,
1805
+ "learning_rate": 1.205317535499094e-07,
1806
+ "loss": 0.0117,
1807
+ "step": 59000
1808
+ },
1809
+ {
1810
+ "epoch": 2.49,
1811
+ "learning_rate": 1.1855566118906164e-07,
1812
+ "loss": 0.0133,
1813
+ "step": 59200
1814
+ },
1815
+ {
1816
+ "epoch": 2.5,
1817
+ "learning_rate": 1.1657956882821387e-07,
1818
+ "loss": 0.0073,
1819
+ "step": 59400
1820
+ },
1821
+ {
1822
+ "epoch": 2.51,
1823
+ "learning_rate": 1.1460347646736612e-07,
1824
+ "loss": 0.0115,
1825
+ "step": 59600
1826
+ },
1827
+ {
1828
+ "epoch": 2.52,
1829
+ "learning_rate": 1.1262738410651835e-07,
1830
+ "loss": 0.0059,
1831
+ "step": 59800
1832
+ },
1833
+ {
1834
+ "epoch": 2.53,
1835
+ "learning_rate": 1.1065129174567058e-07,
1836
+ "loss": 0.0073,
1837
+ "step": 60000
1838
+ },
1839
+ {
1840
+ "epoch": 2.54,
1841
+ "learning_rate": 1.0867519938482282e-07,
1842
+ "loss": 0.01,
1843
+ "step": 60200
1844
+ },
1845
+ {
1846
+ "epoch": 2.54,
1847
+ "learning_rate": 1.0669910702397505e-07,
1848
+ "loss": 0.0071,
1849
+ "step": 60400
1850
+ },
1851
+ {
1852
+ "epoch": 2.55,
1853
+ "learning_rate": 1.0472301466312729e-07,
1854
+ "loss": 0.0063,
1855
+ "step": 60600
1856
+ },
1857
+ {
1858
+ "epoch": 2.56,
1859
+ "learning_rate": 1.0274692230227953e-07,
1860
+ "loss": 0.0054,
1861
+ "step": 60800
1862
+ },
1863
+ {
1864
+ "epoch": 2.57,
1865
+ "learning_rate": 1.0077082994143176e-07,
1866
+ "loss": 0.0048,
1867
+ "step": 61000
1868
+ },
1869
+ {
1870
+ "epoch": 2.58,
1871
+ "learning_rate": 9.8794737580584e-08,
1872
+ "loss": 0.0047,
1873
+ "step": 61200
1874
+ },
1875
+ {
1876
+ "epoch": 2.59,
1877
+ "learning_rate": 9.681864521973623e-08,
1878
+ "loss": 0.008,
1879
+ "step": 61400
1880
+ },
1881
+ {
1882
+ "epoch": 2.6,
1883
+ "learning_rate": 9.484255285888847e-08,
1884
+ "loss": 0.0086,
1885
+ "step": 61600
1886
+ },
1887
+ {
1888
+ "epoch": 2.6,
1889
+ "learning_rate": 9.28664604980407e-08,
1890
+ "loss": 0.008,
1891
+ "step": 61800
1892
+ },
1893
+ {
1894
+ "epoch": 2.61,
1895
+ "learning_rate": 9.089036813719293e-08,
1896
+ "loss": 0.0067,
1897
+ "step": 62000
1898
+ },
1899
+ {
1900
+ "epoch": 2.62,
1901
+ "learning_rate": 8.891427577634518e-08,
1902
+ "loss": 0.006,
1903
+ "step": 62200
1904
+ },
1905
+ {
1906
+ "epoch": 2.63,
1907
+ "learning_rate": 8.69381834154974e-08,
1908
+ "loss": 0.0075,
1909
+ "step": 62400
1910
+ },
1911
+ {
1912
+ "epoch": 2.64,
1913
+ "learning_rate": 8.496209105464964e-08,
1914
+ "loss": 0.005,
1915
+ "step": 62600
1916
+ },
1917
+ {
1918
+ "epoch": 2.65,
1919
+ "learning_rate": 8.298599869380187e-08,
1920
+ "loss": 0.0012,
1921
+ "step": 62800
1922
+ },
1923
+ {
1924
+ "epoch": 2.65,
1925
+ "learning_rate": 8.10099063329541e-08,
1926
+ "loss": 0.0038,
1927
+ "step": 63000
1928
+ },
1929
+ {
1930
+ "epoch": 2.66,
1931
+ "learning_rate": 7.903381397210635e-08,
1932
+ "loss": 0.0077,
1933
+ "step": 63200
1934
+ },
1935
+ {
1936
+ "epoch": 2.67,
1937
+ "learning_rate": 7.705772161125858e-08,
1938
+ "loss": 0.0092,
1939
+ "step": 63400
1940
+ },
1941
+ {
1942
+ "epoch": 2.68,
1943
+ "learning_rate": 7.508162925041081e-08,
1944
+ "loss": 0.0075,
1945
+ "step": 63600
1946
+ },
1947
+ {
1948
+ "epoch": 2.69,
1949
+ "learning_rate": 7.310553688956304e-08,
1950
+ "loss": 0.0047,
1951
+ "step": 63800
1952
+ },
1953
+ {
1954
+ "epoch": 2.7,
1955
+ "learning_rate": 7.112944452871529e-08,
1956
+ "loss": 0.0057,
1957
+ "step": 64000
1958
  }
1959
  ],
1960
  "logging_steps": 200,
1961
  "max_steps": 71199,
1962
  "num_train_epochs": 3,
1963
  "save_steps": 7120,
1964
+ "total_flos": 5.4327866094867456e+17,
1965
  "trial_name": null,
1966
  "trial_params": null
1967
  }