azherali commited on
Commit
ed0eaec
·
verified ·
1 Parent(s): d1fe10f

Training in progress, step 28000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63d8709297afd894af99a163299f58c25d0ecf4c1109fb33c6ca599b695c1163
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975b3db65f8ee4e6188b6e86efc13e7b0764db49eec13babdd30b47f2f1608b9
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c62a5ee613bedcbe24f1782d277ddaa2c1353f7999a9b5d9c7e67060088462a
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b11ba4eb38c9365e25a19aa1849379c94eb8123f5f95fdedf2db233422df02e9
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:732f3f8948a77bf9ef00b9a71ed21980be5bb4dfbe32b4f3c8a750669102d82c
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c9c0d47048189414b4e2674d62285eb69167766265a633b317b379e2de5cad7
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:809d9e108b4a097a87eb7ca47b7d07656e83c40d06f4eb5e0ec99a3edb4019d0
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a918540d48e856372c74bc697e7d28a86a7fdf24471d9a4adf2ffeeae4217207
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23fadf64bbbf065d62df9416650b6887ec1c2b6763b291ed80fa5ee507341cf0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4ab9378d6970094f99c33aa1f077f43da045d25f84f4484908cc97d7399e61f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 20000,
3
- "best_metric": 0.9846426496660109,
4
- "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-20000",
5
- "epoch": 0.768,
6
  "eval_steps": 4000,
7
- "global_step": 24000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1760,6 +1760,298 @@
1760
  "eval_samples_per_second": 129.122,
1761
  "eval_steps_per_second": 8.07,
1762
  "step": 24000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1763
  }
1764
  ],
1765
  "logging_steps": 100,
@@ -1774,7 +2066,7 @@
1774
  "early_stopping_threshold": 0.0
1775
  },
1776
  "attributes": {
1777
- "early_stopping_patience_counter": 1
1778
  }
1779
  },
1780
  "TrainerControl": {
@@ -1788,7 +2080,7 @@
1788
  "attributes": {}
1789
  }
1790
  },
1791
- "total_flos": 1.0191521072952346e+17,
1792
  "train_batch_size": 16,
1793
  "trial_name": null,
1794
  "trial_params": null
 
1
  {
2
+ "best_global_step": 28000,
3
+ "best_metric": 0.9878109998616666,
4
+ "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-28000",
5
+ "epoch": 0.896,
6
  "eval_steps": 4000,
7
+ "global_step": 28000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1760
  "eval_samples_per_second": 129.122,
1761
  "eval_steps_per_second": 8.07,
1762
  "step": 24000
1763
+ },
1764
+ {
1765
+ "epoch": 0.7712,
1766
+ "grad_norm": 0.025170153006911278,
1767
+ "learning_rate": 1.6969630818619586e-05,
1768
+ "loss": 0.0476,
1769
+ "step": 24100
1770
+ },
1771
+ {
1772
+ "epoch": 0.7744,
1773
+ "grad_norm": 0.005416714586317539,
1774
+ "learning_rate": 1.6956789727126807e-05,
1775
+ "loss": 0.0625,
1776
+ "step": 24200
1777
+ },
1778
+ {
1779
+ "epoch": 0.7776,
1780
+ "grad_norm": 35.962677001953125,
1781
+ "learning_rate": 1.6943948635634032e-05,
1782
+ "loss": 0.0492,
1783
+ "step": 24300
1784
+ },
1785
+ {
1786
+ "epoch": 0.7808,
1787
+ "grad_norm": 0.019019972532987595,
1788
+ "learning_rate": 1.6931107544141254e-05,
1789
+ "loss": 0.0567,
1790
+ "step": 24400
1791
+ },
1792
+ {
1793
+ "epoch": 0.784,
1794
+ "grad_norm": 4.536252021789551,
1795
+ "learning_rate": 1.691826645264848e-05,
1796
+ "loss": 0.0564,
1797
+ "step": 24500
1798
+ },
1799
+ {
1800
+ "epoch": 0.7872,
1801
+ "grad_norm": 20.884357452392578,
1802
+ "learning_rate": 1.69054253611557e-05,
1803
+ "loss": 0.059,
1804
+ "step": 24600
1805
+ },
1806
+ {
1807
+ "epoch": 0.7904,
1808
+ "grad_norm": 4.154411792755127,
1809
+ "learning_rate": 1.689258426966292e-05,
1810
+ "loss": 0.0567,
1811
+ "step": 24700
1812
+ },
1813
+ {
1814
+ "epoch": 0.7936,
1815
+ "grad_norm": 0.23815973103046417,
1816
+ "learning_rate": 1.6879743178170146e-05,
1817
+ "loss": 0.0593,
1818
+ "step": 24800
1819
+ },
1820
+ {
1821
+ "epoch": 0.7968,
1822
+ "grad_norm": 0.027695050463080406,
1823
+ "learning_rate": 1.6866902086677368e-05,
1824
+ "loss": 0.0545,
1825
+ "step": 24900
1826
+ },
1827
+ {
1828
+ "epoch": 0.8,
1829
+ "grad_norm": 0.052229318767786026,
1830
+ "learning_rate": 1.6854060995184592e-05,
1831
+ "loss": 0.0584,
1832
+ "step": 25000
1833
+ },
1834
+ {
1835
+ "epoch": 0.8032,
1836
+ "grad_norm": 0.007615574169903994,
1837
+ "learning_rate": 1.6841219903691814e-05,
1838
+ "loss": 0.0492,
1839
+ "step": 25100
1840
+ },
1841
+ {
1842
+ "epoch": 0.8064,
1843
+ "grad_norm": 0.34495148062705994,
1844
+ "learning_rate": 1.682837881219904e-05,
1845
+ "loss": 0.0512,
1846
+ "step": 25200
1847
+ },
1848
+ {
1849
+ "epoch": 0.8096,
1850
+ "grad_norm": 0.4945124685764313,
1851
+ "learning_rate": 1.681553772070626e-05,
1852
+ "loss": 0.0659,
1853
+ "step": 25300
1854
+ },
1855
+ {
1856
+ "epoch": 0.8128,
1857
+ "grad_norm": 3.508509397506714,
1858
+ "learning_rate": 1.6802696629213485e-05,
1859
+ "loss": 0.0431,
1860
+ "step": 25400
1861
+ },
1862
+ {
1863
+ "epoch": 0.816,
1864
+ "grad_norm": 9.751060485839844,
1865
+ "learning_rate": 1.6789855537720706e-05,
1866
+ "loss": 0.0609,
1867
+ "step": 25500
1868
+ },
1869
+ {
1870
+ "epoch": 0.8192,
1871
+ "grad_norm": 13.404821395874023,
1872
+ "learning_rate": 1.677701444622793e-05,
1873
+ "loss": 0.0451,
1874
+ "step": 25600
1875
+ },
1876
+ {
1877
+ "epoch": 0.8224,
1878
+ "grad_norm": 0.09565193206071854,
1879
+ "learning_rate": 1.6764173354735153e-05,
1880
+ "loss": 0.0545,
1881
+ "step": 25700
1882
+ },
1883
+ {
1884
+ "epoch": 0.8256,
1885
+ "grad_norm": 0.010966203175485134,
1886
+ "learning_rate": 1.6751332263242377e-05,
1887
+ "loss": 0.0703,
1888
+ "step": 25800
1889
+ },
1890
+ {
1891
+ "epoch": 0.8288,
1892
+ "grad_norm": 0.18196341395378113,
1893
+ "learning_rate": 1.67384911717496e-05,
1894
+ "loss": 0.0392,
1895
+ "step": 25900
1896
+ },
1897
+ {
1898
+ "epoch": 0.832,
1899
+ "grad_norm": 15.195699691772461,
1900
+ "learning_rate": 1.6725650080256824e-05,
1901
+ "loss": 0.0429,
1902
+ "step": 26000
1903
+ },
1904
+ {
1905
+ "epoch": 0.8352,
1906
+ "grad_norm": 0.2465728521347046,
1907
+ "learning_rate": 1.6712808988764045e-05,
1908
+ "loss": 0.0546,
1909
+ "step": 26100
1910
+ },
1911
+ {
1912
+ "epoch": 0.8384,
1913
+ "grad_norm": 15.004085540771484,
1914
+ "learning_rate": 1.669996789727127e-05,
1915
+ "loss": 0.0716,
1916
+ "step": 26200
1917
+ },
1918
+ {
1919
+ "epoch": 0.8416,
1920
+ "grad_norm": 0.17839568853378296,
1921
+ "learning_rate": 1.668712680577849e-05,
1922
+ "loss": 0.0413,
1923
+ "step": 26300
1924
+ },
1925
+ {
1926
+ "epoch": 0.8448,
1927
+ "grad_norm": 30.638254165649414,
1928
+ "learning_rate": 1.6674285714285716e-05,
1929
+ "loss": 0.0391,
1930
+ "step": 26400
1931
+ },
1932
+ {
1933
+ "epoch": 0.848,
1934
+ "grad_norm": 8.340238571166992,
1935
+ "learning_rate": 1.6661444622792938e-05,
1936
+ "loss": 0.0579,
1937
+ "step": 26500
1938
+ },
1939
+ {
1940
+ "epoch": 0.8512,
1941
+ "grad_norm": 0.026733344420790672,
1942
+ "learning_rate": 1.6648603531300163e-05,
1943
+ "loss": 0.0647,
1944
+ "step": 26600
1945
+ },
1946
+ {
1947
+ "epoch": 0.8544,
1948
+ "grad_norm": 0.013905318453907967,
1949
+ "learning_rate": 1.6635762439807387e-05,
1950
+ "loss": 0.0258,
1951
+ "step": 26700
1952
+ },
1953
+ {
1954
+ "epoch": 0.8576,
1955
+ "grad_norm": 0.011157176457345486,
1956
+ "learning_rate": 1.662292134831461e-05,
1957
+ "loss": 0.0497,
1958
+ "step": 26800
1959
+ },
1960
+ {
1961
+ "epoch": 0.8608,
1962
+ "grad_norm": 0.029731517657637596,
1963
+ "learning_rate": 1.6610080256821834e-05,
1964
+ "loss": 0.0567,
1965
+ "step": 26900
1966
+ },
1967
+ {
1968
+ "epoch": 0.864,
1969
+ "grad_norm": 0.3443889319896698,
1970
+ "learning_rate": 1.6597239165329055e-05,
1971
+ "loss": 0.0533,
1972
+ "step": 27000
1973
+ },
1974
+ {
1975
+ "epoch": 0.8672,
1976
+ "grad_norm": 0.5098379254341125,
1977
+ "learning_rate": 1.6584398073836277e-05,
1978
+ "loss": 0.0532,
1979
+ "step": 27100
1980
+ },
1981
+ {
1982
+ "epoch": 0.8704,
1983
+ "grad_norm": 0.045412395149469376,
1984
+ "learning_rate": 1.6571556982343498e-05,
1985
+ "loss": 0.0533,
1986
+ "step": 27200
1987
+ },
1988
+ {
1989
+ "epoch": 0.8736,
1990
+ "grad_norm": 3.6077659130096436,
1991
+ "learning_rate": 1.6558715890850723e-05,
1992
+ "loss": 0.0432,
1993
+ "step": 27300
1994
+ },
1995
+ {
1996
+ "epoch": 0.8768,
1997
+ "grad_norm": 27.717470169067383,
1998
+ "learning_rate": 1.6545874799357948e-05,
1999
+ "loss": 0.0599,
2000
+ "step": 27400
2001
+ },
2002
+ {
2003
+ "epoch": 0.88,
2004
+ "grad_norm": 8.342029571533203,
2005
+ "learning_rate": 1.653303370786517e-05,
2006
+ "loss": 0.066,
2007
+ "step": 27500
2008
+ },
2009
+ {
2010
+ "epoch": 0.8832,
2011
+ "grad_norm": 0.09634260088205338,
2012
+ "learning_rate": 1.6520192616372394e-05,
2013
+ "loss": 0.0516,
2014
+ "step": 27600
2015
+ },
2016
+ {
2017
+ "epoch": 0.8864,
2018
+ "grad_norm": 0.13491930067539215,
2019
+ "learning_rate": 1.6507351524879615e-05,
2020
+ "loss": 0.0539,
2021
+ "step": 27700
2022
+ },
2023
+ {
2024
+ "epoch": 0.8896,
2025
+ "grad_norm": 0.23464186489582062,
2026
+ "learning_rate": 1.649451043338684e-05,
2027
+ "loss": 0.0415,
2028
+ "step": 27800
2029
+ },
2030
+ {
2031
+ "epoch": 0.8928,
2032
+ "grad_norm": 0.46864306926727295,
2033
+ "learning_rate": 1.648166934189406e-05,
2034
+ "loss": 0.0535,
2035
+ "step": 27900
2036
+ },
2037
+ {
2038
+ "epoch": 0.896,
2039
+ "grad_norm": 6.844827175140381,
2040
+ "learning_rate": 1.6468828250401286e-05,
2041
+ "loss": 0.064,
2042
+ "step": 28000
2043
+ },
2044
+ {
2045
+ "epoch": 0.896,
2046
+ "eval_accuracy": 0.98781,
2047
+ "eval_f1": 0.9878109998616666,
2048
+ "eval_loss": 0.04953546077013016,
2049
+ "eval_precision": 0.9878187731391986,
2050
+ "eval_recall": 0.98781,
2051
+ "eval_runtime": 778.6432,
2052
+ "eval_samples_per_second": 128.429,
2053
+ "eval_steps_per_second": 8.027,
2054
+ "step": 28000
2055
  }
2056
  ],
2057
  "logging_steps": 100,
 
2066
  "early_stopping_threshold": 0.0
2067
  },
2068
  "attributes": {
2069
+ "early_stopping_patience_counter": 0
2070
  }
2071
  },
2072
  "TrainerControl": {
 
2080
  "attributes": {}
2081
  }
2082
  },
2083
+ "total_flos": 1.1889930140587046e+17,
2084
  "train_batch_size": 16,
2085
  "trial_name": null,
2086
  "trial_params": null