CocoRoF commited on
Commit
8a6d1cb
·
verified ·
1 Parent(s): 62f19e5

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d26418e85327bf4072ba76aa01dde18a1716eaa86660c26fb299ceaf6d71c5cd
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec89bf2039662963370b7b120f954195514e1fceebb4b4ea6003a3e21329206
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6df5d87deb87ff09bd2a9c94e587bd25dd1604f3de6ca09e11fd8c4b22cb9b3a
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17b4021c7c06c88903f5c45803a9ae6b47edbd2a52a39dfd2ae2ba28dcdce07c
3
  size 1475248442
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05888e792c6fd0be576ebc92f377eec054b707510467523fa4c87d94e18c0540
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e26a12cbe9066670dcd780a67f036cdb9a7df96f07de60a4ccff4560738ea392
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1715089034676662,
5
  "eval_steps": 2.0,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1757,6 +1757,356 @@
1757
  "learning_rate": 2.8535613870665418e-05,
1758
  "loss": 0.2286,
1759
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1760
  }
1761
  ],
1762
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4058106841611997,
5
  "eval_steps": 2.0,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1757
  "learning_rate": 2.8535613870665418e-05,
1758
  "loss": 0.2286,
1759
  "step": 2500
1760
+ },
1761
+ {
1762
+ "epoch": 1.176194939081537,
1763
+ "grad_norm": 1.6267486810684204,
1764
+ "learning_rate": 2.852975632614808e-05,
1765
+ "loss": 0.209,
1766
+ "step": 2510
1767
+ },
1768
+ {
1769
+ "epoch": 1.1808809746954076,
1770
+ "grad_norm": 1.575627326965332,
1771
+ "learning_rate": 2.852389878163074e-05,
1772
+ "loss": 0.2097,
1773
+ "step": 2520
1774
+ },
1775
+ {
1776
+ "epoch": 1.1855670103092784,
1777
+ "grad_norm": 1.7547259330749512,
1778
+ "learning_rate": 2.85180412371134e-05,
1779
+ "loss": 0.2064,
1780
+ "step": 2530
1781
+ },
1782
+ {
1783
+ "epoch": 1.190253045923149,
1784
+ "grad_norm": 1.4216328859329224,
1785
+ "learning_rate": 2.8512183692596064e-05,
1786
+ "loss": 0.1916,
1787
+ "step": 2540
1788
+ },
1789
+ {
1790
+ "epoch": 1.1949390815370198,
1791
+ "grad_norm": 1.8073385953903198,
1792
+ "learning_rate": 2.8506326148078726e-05,
1793
+ "loss": 0.209,
1794
+ "step": 2550
1795
+ },
1796
+ {
1797
+ "epoch": 1.1996251171508903,
1798
+ "grad_norm": 2.0226638317108154,
1799
+ "learning_rate": 2.850046860356139e-05,
1800
+ "loss": 0.2173,
1801
+ "step": 2560
1802
+ },
1803
+ {
1804
+ "epoch": 1.204311152764761,
1805
+ "grad_norm": 1.981974720954895,
1806
+ "learning_rate": 2.8494611059044048e-05,
1807
+ "loss": 0.2287,
1808
+ "step": 2570
1809
+ },
1810
+ {
1811
+ "epoch": 1.2089971883786317,
1812
+ "grad_norm": 1.7734295129776,
1813
+ "learning_rate": 2.8488753514526713e-05,
1814
+ "loss": 0.1927,
1815
+ "step": 2580
1816
+ },
1817
+ {
1818
+ "epoch": 1.2136832239925024,
1819
+ "grad_norm": 1.7739965915679932,
1820
+ "learning_rate": 2.8482895970009372e-05,
1821
+ "loss": 0.2214,
1822
+ "step": 2590
1823
+ },
1824
+ {
1825
+ "epoch": 1.218369259606373,
1826
+ "grad_norm": 2.006999969482422,
1827
+ "learning_rate": 2.8477038425492035e-05,
1828
+ "loss": 0.2059,
1829
+ "step": 2600
1830
+ },
1831
+ {
1832
+ "epoch": 1.2230552952202436,
1833
+ "grad_norm": 2.000728130340576,
1834
+ "learning_rate": 2.8471180880974697e-05,
1835
+ "loss": 0.2521,
1836
+ "step": 2610
1837
+ },
1838
+ {
1839
+ "epoch": 1.2277413308341143,
1840
+ "grad_norm": 1.466030478477478,
1841
+ "learning_rate": 2.846532333645736e-05,
1842
+ "loss": 0.2231,
1843
+ "step": 2620
1844
+ },
1845
+ {
1846
+ "epoch": 1.232427366447985,
1847
+ "grad_norm": 1.846488356590271,
1848
+ "learning_rate": 2.845946579194002e-05,
1849
+ "loss": 0.2119,
1850
+ "step": 2630
1851
+ },
1852
+ {
1853
+ "epoch": 1.2371134020618557,
1854
+ "grad_norm": 1.8259059190750122,
1855
+ "learning_rate": 2.845360824742268e-05,
1856
+ "loss": 0.2614,
1857
+ "step": 2640
1858
+ },
1859
+ {
1860
+ "epoch": 1.2417994376757264,
1861
+ "grad_norm": 1.2573256492614746,
1862
+ "learning_rate": 2.8447750702905343e-05,
1863
+ "loss": 0.2017,
1864
+ "step": 2650
1865
+ },
1866
+ {
1867
+ "epoch": 1.246485473289597,
1868
+ "grad_norm": 1.8557478189468384,
1869
+ "learning_rate": 2.8441893158388006e-05,
1870
+ "loss": 0.2138,
1871
+ "step": 2660
1872
+ },
1873
+ {
1874
+ "epoch": 1.2511715089034676,
1875
+ "grad_norm": 1.8336118459701538,
1876
+ "learning_rate": 2.8436035613870665e-05,
1877
+ "loss": 0.2029,
1878
+ "step": 2670
1879
+ },
1880
+ {
1881
+ "epoch": 1.2558575445173383,
1882
+ "grad_norm": 1.7261340618133545,
1883
+ "learning_rate": 2.8430178069353327e-05,
1884
+ "loss": 0.2217,
1885
+ "step": 2680
1886
+ },
1887
+ {
1888
+ "epoch": 1.260543580131209,
1889
+ "grad_norm": 1.7685104608535767,
1890
+ "learning_rate": 2.842432052483599e-05,
1891
+ "loss": 0.2102,
1892
+ "step": 2690
1893
+ },
1894
+ {
1895
+ "epoch": 1.2652296157450795,
1896
+ "grad_norm": 1.707825779914856,
1897
+ "learning_rate": 2.8418462980318652e-05,
1898
+ "loss": 0.24,
1899
+ "step": 2700
1900
+ },
1901
+ {
1902
+ "epoch": 1.2699156513589505,
1903
+ "grad_norm": 1.5372973680496216,
1904
+ "learning_rate": 2.841260543580131e-05,
1905
+ "loss": 0.2111,
1906
+ "step": 2710
1907
+ },
1908
+ {
1909
+ "epoch": 1.274601686972821,
1910
+ "grad_norm": 1.1972296237945557,
1911
+ "learning_rate": 2.8406747891283973e-05,
1912
+ "loss": 0.1747,
1913
+ "step": 2720
1914
+ },
1915
+ {
1916
+ "epoch": 1.2792877225866917,
1917
+ "grad_norm": 1.636772632598877,
1918
+ "learning_rate": 2.8400890346766636e-05,
1919
+ "loss": 0.2217,
1920
+ "step": 2730
1921
+ },
1922
+ {
1923
+ "epoch": 1.2839737582005624,
1924
+ "grad_norm": 1.6649293899536133,
1925
+ "learning_rate": 2.8395032802249298e-05,
1926
+ "loss": 0.2181,
1927
+ "step": 2740
1928
+ },
1929
+ {
1930
+ "epoch": 1.2886597938144329,
1931
+ "grad_norm": 2.4171957969665527,
1932
+ "learning_rate": 2.8389175257731957e-05,
1933
+ "loss": 0.2494,
1934
+ "step": 2750
1935
+ },
1936
+ {
1937
+ "epoch": 1.2933458294283038,
1938
+ "grad_norm": 1.6188627481460571,
1939
+ "learning_rate": 2.8383317713214623e-05,
1940
+ "loss": 0.2465,
1941
+ "step": 2760
1942
+ },
1943
+ {
1944
+ "epoch": 1.2980318650421743,
1945
+ "grad_norm": 1.4358820915222168,
1946
+ "learning_rate": 2.8377460168697282e-05,
1947
+ "loss": 0.1961,
1948
+ "step": 2770
1949
+ },
1950
+ {
1951
+ "epoch": 1.302717900656045,
1952
+ "grad_norm": 1.7824815511703491,
1953
+ "learning_rate": 2.8371602624179944e-05,
1954
+ "loss": 0.2143,
1955
+ "step": 2780
1956
+ },
1957
+ {
1958
+ "epoch": 1.3074039362699157,
1959
+ "grad_norm": 1.7759006023406982,
1960
+ "learning_rate": 2.8365745079662607e-05,
1961
+ "loss": 0.203,
1962
+ "step": 2790
1963
+ },
1964
+ {
1965
+ "epoch": 1.3120899718837864,
1966
+ "grad_norm": 1.5349335670471191,
1967
+ "learning_rate": 2.835988753514527e-05,
1968
+ "loss": 0.2215,
1969
+ "step": 2800
1970
+ },
1971
+ {
1972
+ "epoch": 1.316776007497657,
1973
+ "grad_norm": 1.7191510200500488,
1974
+ "learning_rate": 2.835402999062793e-05,
1975
+ "loss": 0.2107,
1976
+ "step": 2810
1977
+ },
1978
+ {
1979
+ "epoch": 1.3214620431115276,
1980
+ "grad_norm": 1.4442410469055176,
1981
+ "learning_rate": 2.834817244611059e-05,
1982
+ "loss": 0.2025,
1983
+ "step": 2820
1984
+ },
1985
+ {
1986
+ "epoch": 1.3261480787253983,
1987
+ "grad_norm": 1.820534586906433,
1988
+ "learning_rate": 2.8342314901593253e-05,
1989
+ "loss": 0.2038,
1990
+ "step": 2830
1991
+ },
1992
+ {
1993
+ "epoch": 1.330834114339269,
1994
+ "grad_norm": 1.6275863647460938,
1995
+ "learning_rate": 2.8336457357075915e-05,
1996
+ "loss": 0.2099,
1997
+ "step": 2840
1998
+ },
1999
+ {
2000
+ "epoch": 1.3355201499531397,
2001
+ "grad_norm": 2.054147958755493,
2002
+ "learning_rate": 2.8330599812558578e-05,
2003
+ "loss": 0.1928,
2004
+ "step": 2850
2005
+ },
2006
+ {
2007
+ "epoch": 1.3402061855670104,
2008
+ "grad_norm": 1.7183698415756226,
2009
+ "learning_rate": 2.8324742268041237e-05,
2010
+ "loss": 0.2058,
2011
+ "step": 2860
2012
+ },
2013
+ {
2014
+ "epoch": 1.344892221180881,
2015
+ "grad_norm": 1.5442510843276978,
2016
+ "learning_rate": 2.83188847235239e-05,
2017
+ "loss": 0.2018,
2018
+ "step": 2870
2019
+ },
2020
+ {
2021
+ "epoch": 1.3495782567947516,
2022
+ "grad_norm": 1.6647236347198486,
2023
+ "learning_rate": 2.831302717900656e-05,
2024
+ "loss": 0.219,
2025
+ "step": 2880
2026
+ },
2027
+ {
2028
+ "epoch": 1.3542642924086223,
2029
+ "grad_norm": 1.468214750289917,
2030
+ "learning_rate": 2.8307169634489224e-05,
2031
+ "loss": 0.1845,
2032
+ "step": 2890
2033
+ },
2034
+ {
2035
+ "epoch": 1.358950328022493,
2036
+ "grad_norm": 1.7281159162521362,
2037
+ "learning_rate": 2.8301312089971883e-05,
2038
+ "loss": 0.2005,
2039
+ "step": 2900
2040
+ },
2041
+ {
2042
+ "epoch": 1.3636363636363638,
2043
+ "grad_norm": 1.5063496828079224,
2044
+ "learning_rate": 2.829545454545455e-05,
2045
+ "loss": 0.232,
2046
+ "step": 2910
2047
+ },
2048
+ {
2049
+ "epoch": 1.3683223992502342,
2050
+ "grad_norm": 1.8239761590957642,
2051
+ "learning_rate": 2.8289597000937208e-05,
2052
+ "loss": 0.1877,
2053
+ "step": 2920
2054
+ },
2055
+ {
2056
+ "epoch": 1.373008434864105,
2057
+ "grad_norm": 2.1553969383239746,
2058
+ "learning_rate": 2.828373945641987e-05,
2059
+ "loss": 0.2486,
2060
+ "step": 2930
2061
+ },
2062
+ {
2063
+ "epoch": 1.3776944704779757,
2064
+ "grad_norm": 1.4251559972763062,
2065
+ "learning_rate": 2.827788191190253e-05,
2066
+ "loss": 0.2043,
2067
+ "step": 2940
2068
+ },
2069
+ {
2070
+ "epoch": 1.3823805060918464,
2071
+ "grad_norm": 1.5066052675247192,
2072
+ "learning_rate": 2.8272024367385195e-05,
2073
+ "loss": 0.2319,
2074
+ "step": 2950
2075
+ },
2076
+ {
2077
+ "epoch": 1.387066541705717,
2078
+ "grad_norm": 1.7995860576629639,
2079
+ "learning_rate": 2.8266166822867854e-05,
2080
+ "loss": 0.2079,
2081
+ "step": 2960
2082
+ },
2083
+ {
2084
+ "epoch": 1.3917525773195876,
2085
+ "grad_norm": 1.6791844367980957,
2086
+ "learning_rate": 2.8260309278350516e-05,
2087
+ "loss": 0.1996,
2088
+ "step": 2970
2089
+ },
2090
+ {
2091
+ "epoch": 1.3964386129334583,
2092
+ "grad_norm": 1.1830016374588013,
2093
+ "learning_rate": 2.8254451733833175e-05,
2094
+ "loss": 0.2317,
2095
+ "step": 2980
2096
+ },
2097
+ {
2098
+ "epoch": 1.401124648547329,
2099
+ "grad_norm": 1.872058629989624,
2100
+ "learning_rate": 2.824859418931584e-05,
2101
+ "loss": 0.2036,
2102
+ "step": 2990
2103
+ },
2104
+ {
2105
+ "epoch": 1.4058106841611997,
2106
+ "grad_norm": 1.6045550107955933,
2107
+ "learning_rate": 2.82427366447985e-05,
2108
+ "loss": 0.2173,
2109
+ "step": 3000
2110
  }
2111
  ],
2112
  "logging_steps": 10,