YeBhoneLin10 commited on
Commit
7169c0f
·
verified ·
1 Parent(s): e354563

Training in progress, step 8208, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db6b483591e6416e7d4f8a18d61cf489a09f100cc908f3414c25862b88d459d4
3
  size 290403936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51c63f2ddd7a6e10a97d28920b370b15420c9cbf7029f8fda3d06ba1218f04c1
3
  size 290403936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27a2b3e1b42c3ecd4d5740c17317387c21bbddd315b4cc62823fbbe564820876
3
  size 574811979
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d4591400ba60c7024d3e4f2557c0eec73519bb30ceefe230c80e60c2fcbe08
3
  size 574811979
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f4c1a6b0277c8d16286a636e685faba7be3459f584860fa89ca0ff0a59d3423
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c80acdc2532ec7797b634e601103d59024528d38f5012f3d98746fee880405d0
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9655e30afb6b6d38806552ed057c2e103c2ca5366b1829ec9981ece6a04acc9e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fac151c2fc0bed5786af9225b2875725cef39eda30f3cfd01728ae1cd6a89cd
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 16.973751274209988,
3
- "best_model_checkpoint": "./whissm-plus/checkpoint-6156",
4
- "epoch": 0.75,
5
  "eval_steps": 2052,
6
- "global_step": 6156,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1756,6 +1756,589 @@
1756
  "eval_steps_per_second": 0.338,
1757
  "eval_wer": 16.973751274209988,
1758
  "step": 6156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1759
  }
1760
  ],
1761
  "logging_steps": 25,
@@ -1770,12 +2353,12 @@
1770
  "should_evaluate": false,
1771
  "should_log": false,
1772
  "should_save": true,
1773
- "should_training_stop": false
1774
  },
1775
  "attributes": {}
1776
  }
1777
  },
1778
- "total_flos": 1.59711254544384e+18,
1779
  "train_batch_size": 4,
1780
  "trial_name": null,
1781
  "trial_params": null
 
1
  {
2
+ "best_metric": 14.794215086646279,
3
+ "best_model_checkpoint": "./whissm-plus/checkpoint-8208",
4
+ "epoch": 1.0,
5
  "eval_steps": 2052,
6
+ "global_step": 8208,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1756
  "eval_steps_per_second": 0.338,
1757
  "eval_wer": 16.973751274209988,
1758
  "step": 6156
1759
+ },
1760
+ {
1761
+ "epoch": 0.7523148148148148,
1762
+ "grad_norm": 6.905988693237305,
1763
+ "learning_rate": 2.64270887389725e-06,
1764
+ "loss": 0.0611,
1765
+ "step": 6175
1766
+ },
1767
+ {
1768
+ "epoch": 0.7553606237816765,
1769
+ "grad_norm": 5.302619457244873,
1770
+ "learning_rate": 2.610275038920602e-06,
1771
+ "loss": 0.0487,
1772
+ "step": 6200
1773
+ },
1774
+ {
1775
+ "epoch": 0.758406432748538,
1776
+ "grad_norm": 2.7596232891082764,
1777
+ "learning_rate": 2.5778412039439545e-06,
1778
+ "loss": 0.0431,
1779
+ "step": 6225
1780
+ },
1781
+ {
1782
+ "epoch": 0.7614522417153996,
1783
+ "grad_norm": 2.1329431533813477,
1784
+ "learning_rate": 2.545407368967307e-06,
1785
+ "loss": 0.0511,
1786
+ "step": 6250
1787
+ },
1788
+ {
1789
+ "epoch": 0.7644980506822612,
1790
+ "grad_norm": 5.41237211227417,
1791
+ "learning_rate": 2.512973533990659e-06,
1792
+ "loss": 0.059,
1793
+ "step": 6275
1794
+ },
1795
+ {
1796
+ "epoch": 0.7675438596491229,
1797
+ "grad_norm": 4.981463432312012,
1798
+ "learning_rate": 2.4805396990140115e-06,
1799
+ "loss": 0.0545,
1800
+ "step": 6300
1801
+ },
1802
+ {
1803
+ "epoch": 0.7705896686159844,
1804
+ "grad_norm": 2.5017101764678955,
1805
+ "learning_rate": 2.448105864037364e-06,
1806
+ "loss": 0.0515,
1807
+ "step": 6325
1808
+ },
1809
+ {
1810
+ "epoch": 0.773635477582846,
1811
+ "grad_norm": 2.7259082794189453,
1812
+ "learning_rate": 2.4156720290607163e-06,
1813
+ "loss": 0.0458,
1814
+ "step": 6350
1815
+ },
1816
+ {
1817
+ "epoch": 0.7766812865497076,
1818
+ "grad_norm": 5.928169250488281,
1819
+ "learning_rate": 2.3832381940840686e-06,
1820
+ "loss": 0.0556,
1821
+ "step": 6375
1822
+ },
1823
+ {
1824
+ "epoch": 0.7797270955165692,
1825
+ "grad_norm": 4.780055999755859,
1826
+ "learning_rate": 2.350804359107421e-06,
1827
+ "loss": 0.0474,
1828
+ "step": 6400
1829
+ },
1830
+ {
1831
+ "epoch": 0.7827729044834308,
1832
+ "grad_norm": 6.110754013061523,
1833
+ "learning_rate": 2.3183705241307733e-06,
1834
+ "loss": 0.0495,
1835
+ "step": 6425
1836
+ },
1837
+ {
1838
+ "epoch": 0.7858187134502924,
1839
+ "grad_norm": 5.388917922973633,
1840
+ "learning_rate": 2.2859366891541257e-06,
1841
+ "loss": 0.0483,
1842
+ "step": 6450
1843
+ },
1844
+ {
1845
+ "epoch": 0.788864522417154,
1846
+ "grad_norm": 3.1466193199157715,
1847
+ "learning_rate": 2.253502854177478e-06,
1848
+ "loss": 0.0435,
1849
+ "step": 6475
1850
+ },
1851
+ {
1852
+ "epoch": 0.7919103313840156,
1853
+ "grad_norm": 2.081556797027588,
1854
+ "learning_rate": 2.2210690192008304e-06,
1855
+ "loss": 0.0644,
1856
+ "step": 6500
1857
+ },
1858
+ {
1859
+ "epoch": 0.7949561403508771,
1860
+ "grad_norm": 4.22133731842041,
1861
+ "learning_rate": 2.1886351842241828e-06,
1862
+ "loss": 0.0466,
1863
+ "step": 6525
1864
+ },
1865
+ {
1866
+ "epoch": 0.7980019493177388,
1867
+ "grad_norm": 2.9023261070251465,
1868
+ "learning_rate": 2.156201349247535e-06,
1869
+ "loss": 0.0486,
1870
+ "step": 6550
1871
+ },
1872
+ {
1873
+ "epoch": 0.8010477582846004,
1874
+ "grad_norm": 5.409677982330322,
1875
+ "learning_rate": 2.1237675142708875e-06,
1876
+ "loss": 0.0402,
1877
+ "step": 6575
1878
+ },
1879
+ {
1880
+ "epoch": 0.804093567251462,
1881
+ "grad_norm": 4.244589328765869,
1882
+ "learning_rate": 2.09133367929424e-06,
1883
+ "loss": 0.0557,
1884
+ "step": 6600
1885
+ },
1886
+ {
1887
+ "epoch": 0.8071393762183235,
1888
+ "grad_norm": 8.207276344299316,
1889
+ "learning_rate": 2.058899844317592e-06,
1890
+ "loss": 0.0537,
1891
+ "step": 6625
1892
+ },
1893
+ {
1894
+ "epoch": 0.8101851851851852,
1895
+ "grad_norm": 3.6732988357543945,
1896
+ "learning_rate": 2.0264660093409446e-06,
1897
+ "loss": 0.0452,
1898
+ "step": 6650
1899
+ },
1900
+ {
1901
+ "epoch": 0.8132309941520468,
1902
+ "grad_norm": 3.1428933143615723,
1903
+ "learning_rate": 1.994032174364297e-06,
1904
+ "loss": 0.0465,
1905
+ "step": 6675
1906
+ },
1907
+ {
1908
+ "epoch": 0.8162768031189084,
1909
+ "grad_norm": 3.1963727474212646,
1910
+ "learning_rate": 1.9615983393876493e-06,
1911
+ "loss": 0.0479,
1912
+ "step": 6700
1913
+ },
1914
+ {
1915
+ "epoch": 0.81932261208577,
1916
+ "grad_norm": 7.395984649658203,
1917
+ "learning_rate": 1.9291645044110016e-06,
1918
+ "loss": 0.0489,
1919
+ "step": 6725
1920
+ },
1921
+ {
1922
+ "epoch": 0.8223684210526315,
1923
+ "grad_norm": 2.5216851234436035,
1924
+ "learning_rate": 1.896730669434354e-06,
1925
+ "loss": 0.044,
1926
+ "step": 6750
1927
+ },
1928
+ {
1929
+ "epoch": 0.8254142300194932,
1930
+ "grad_norm": 2.955092668533325,
1931
+ "learning_rate": 1.8642968344577064e-06,
1932
+ "loss": 0.0483,
1933
+ "step": 6775
1934
+ },
1935
+ {
1936
+ "epoch": 0.8284600389863548,
1937
+ "grad_norm": 6.115810871124268,
1938
+ "learning_rate": 1.831862999481059e-06,
1939
+ "loss": 0.0512,
1940
+ "step": 6800
1941
+ },
1942
+ {
1943
+ "epoch": 0.8315058479532164,
1944
+ "grad_norm": 3.0200977325439453,
1945
+ "learning_rate": 1.7994291645044113e-06,
1946
+ "loss": 0.0509,
1947
+ "step": 6825
1948
+ },
1949
+ {
1950
+ "epoch": 0.8345516569200779,
1951
+ "grad_norm": 3.954040288925171,
1952
+ "learning_rate": 1.7669953295277634e-06,
1953
+ "loss": 0.0474,
1954
+ "step": 6850
1955
+ },
1956
+ {
1957
+ "epoch": 0.8375974658869396,
1958
+ "grad_norm": 4.320163249969482,
1959
+ "learning_rate": 1.7345614945511158e-06,
1960
+ "loss": 0.0429,
1961
+ "step": 6875
1962
+ },
1963
+ {
1964
+ "epoch": 0.8406432748538012,
1965
+ "grad_norm": 2.413540840148926,
1966
+ "learning_rate": 1.7021276595744682e-06,
1967
+ "loss": 0.0428,
1968
+ "step": 6900
1969
+ },
1970
+ {
1971
+ "epoch": 0.8436890838206628,
1972
+ "grad_norm": 4.413974761962891,
1973
+ "learning_rate": 1.6696938245978205e-06,
1974
+ "loss": 0.0439,
1975
+ "step": 6925
1976
+ },
1977
+ {
1978
+ "epoch": 0.8467348927875243,
1979
+ "grad_norm": 6.30503511428833,
1980
+ "learning_rate": 1.637259989621173e-06,
1981
+ "loss": 0.043,
1982
+ "step": 6950
1983
+ },
1984
+ {
1985
+ "epoch": 0.8497807017543859,
1986
+ "grad_norm": 3.963675022125244,
1987
+ "learning_rate": 1.6048261546445255e-06,
1988
+ "loss": 0.0413,
1989
+ "step": 6975
1990
+ },
1991
+ {
1992
+ "epoch": 0.8528265107212476,
1993
+ "grad_norm": 3.7029097080230713,
1994
+ "learning_rate": 1.5723923196678776e-06,
1995
+ "loss": 0.0387,
1996
+ "step": 7000
1997
+ },
1998
+ {
1999
+ "epoch": 0.8558723196881092,
2000
+ "grad_norm": 4.904066562652588,
2001
+ "learning_rate": 1.53995848469123e-06,
2002
+ "loss": 0.046,
2003
+ "step": 7025
2004
+ },
2005
+ {
2006
+ "epoch": 0.8589181286549707,
2007
+ "grad_norm": 2.4294283390045166,
2008
+ "learning_rate": 1.5075246497145823e-06,
2009
+ "loss": 0.0542,
2010
+ "step": 7050
2011
+ },
2012
+ {
2013
+ "epoch": 0.8619639376218323,
2014
+ "grad_norm": 3.33664870262146,
2015
+ "learning_rate": 1.4750908147379347e-06,
2016
+ "loss": 0.0453,
2017
+ "step": 7075
2018
+ },
2019
+ {
2020
+ "epoch": 0.865009746588694,
2021
+ "grad_norm": 4.458865165710449,
2022
+ "learning_rate": 1.4426569797612872e-06,
2023
+ "loss": 0.0492,
2024
+ "step": 7100
2025
+ },
2026
+ {
2027
+ "epoch": 0.8680555555555556,
2028
+ "grad_norm": 6.270244121551514,
2029
+ "learning_rate": 1.4102231447846396e-06,
2030
+ "loss": 0.0455,
2031
+ "step": 7125
2032
+ },
2033
+ {
2034
+ "epoch": 0.8711013645224172,
2035
+ "grad_norm": 4.222137451171875,
2036
+ "learning_rate": 1.3777893098079918e-06,
2037
+ "loss": 0.0445,
2038
+ "step": 7150
2039
+ },
2040
+ {
2041
+ "epoch": 0.8741471734892787,
2042
+ "grad_norm": 3.1992828845977783,
2043
+ "learning_rate": 1.3453554748313441e-06,
2044
+ "loss": 0.0471,
2045
+ "step": 7175
2046
+ },
2047
+ {
2048
+ "epoch": 0.8771929824561403,
2049
+ "grad_norm": 4.043613433837891,
2050
+ "learning_rate": 1.3129216398546965e-06,
2051
+ "loss": 0.0386,
2052
+ "step": 7200
2053
+ },
2054
+ {
2055
+ "epoch": 0.880238791423002,
2056
+ "grad_norm": 2.8727216720581055,
2057
+ "learning_rate": 1.2804878048780488e-06,
2058
+ "loss": 0.0417,
2059
+ "step": 7225
2060
+ },
2061
+ {
2062
+ "epoch": 0.8832846003898636,
2063
+ "grad_norm": 4.605580806732178,
2064
+ "learning_rate": 1.2480539699014012e-06,
2065
+ "loss": 0.0485,
2066
+ "step": 7250
2067
+ },
2068
+ {
2069
+ "epoch": 0.8863304093567251,
2070
+ "grad_norm": 6.22200345993042,
2071
+ "learning_rate": 1.2156201349247536e-06,
2072
+ "loss": 0.0424,
2073
+ "step": 7275
2074
+ },
2075
+ {
2076
+ "epoch": 0.8893762183235867,
2077
+ "grad_norm": 3.0598998069763184,
2078
+ "learning_rate": 1.183186299948106e-06,
2079
+ "loss": 0.0534,
2080
+ "step": 7300
2081
+ },
2082
+ {
2083
+ "epoch": 0.8924220272904484,
2084
+ "grad_norm": 4.829048156738281,
2085
+ "learning_rate": 1.1507524649714583e-06,
2086
+ "loss": 0.0456,
2087
+ "step": 7325
2088
+ },
2089
+ {
2090
+ "epoch": 0.89546783625731,
2091
+ "grad_norm": 5.607174873352051,
2092
+ "learning_rate": 1.1183186299948106e-06,
2093
+ "loss": 0.0394,
2094
+ "step": 7350
2095
+ },
2096
+ {
2097
+ "epoch": 0.8985136452241715,
2098
+ "grad_norm": 2.056725025177002,
2099
+ "learning_rate": 1.085884795018163e-06,
2100
+ "loss": 0.0423,
2101
+ "step": 7375
2102
+ },
2103
+ {
2104
+ "epoch": 0.9015594541910331,
2105
+ "grad_norm": 1.4574615955352783,
2106
+ "learning_rate": 1.0534509600415153e-06,
2107
+ "loss": 0.0432,
2108
+ "step": 7400
2109
+ },
2110
+ {
2111
+ "epoch": 0.9046052631578947,
2112
+ "grad_norm": 2.392552137374878,
2113
+ "learning_rate": 1.0210171250648677e-06,
2114
+ "loss": 0.0331,
2115
+ "step": 7425
2116
+ },
2117
+ {
2118
+ "epoch": 0.9076510721247564,
2119
+ "grad_norm": 3.653775215148926,
2120
+ "learning_rate": 9.8858329008822e-07,
2121
+ "loss": 0.0469,
2122
+ "step": 7450
2123
+ },
2124
+ {
2125
+ "epoch": 0.910696881091618,
2126
+ "grad_norm": 3.350886344909668,
2127
+ "learning_rate": 9.561494551115724e-07,
2128
+ "loss": 0.044,
2129
+ "step": 7475
2130
+ },
2131
+ {
2132
+ "epoch": 0.9137426900584795,
2133
+ "grad_norm": 6.157811164855957,
2134
+ "learning_rate": 9.237156201349248e-07,
2135
+ "loss": 0.044,
2136
+ "step": 7500
2137
+ },
2138
+ {
2139
+ "epoch": 0.9167884990253411,
2140
+ "grad_norm": 4.67850399017334,
2141
+ "learning_rate": 8.912817851582773e-07,
2142
+ "loss": 0.0545,
2143
+ "step": 7525
2144
+ },
2145
+ {
2146
+ "epoch": 0.9198343079922028,
2147
+ "grad_norm": 2.9768331050872803,
2148
+ "learning_rate": 8.588479501816295e-07,
2149
+ "loss": 0.0469,
2150
+ "step": 7550
2151
+ },
2152
+ {
2153
+ "epoch": 0.9228801169590644,
2154
+ "grad_norm": 2.5755043029785156,
2155
+ "learning_rate": 8.264141152049819e-07,
2156
+ "loss": 0.0436,
2157
+ "step": 7575
2158
+ },
2159
+ {
2160
+ "epoch": 0.9259259259259259,
2161
+ "grad_norm": 5.110294342041016,
2162
+ "learning_rate": 7.939802802283343e-07,
2163
+ "loss": 0.0461,
2164
+ "step": 7600
2165
+ },
2166
+ {
2167
+ "epoch": 0.9289717348927875,
2168
+ "grad_norm": 3.600740432739258,
2169
+ "learning_rate": 7.615464452516866e-07,
2170
+ "loss": 0.0378,
2171
+ "step": 7625
2172
+ },
2173
+ {
2174
+ "epoch": 0.9320175438596491,
2175
+ "grad_norm": 6.710562705993652,
2176
+ "learning_rate": 7.291126102750389e-07,
2177
+ "loss": 0.0461,
2178
+ "step": 7650
2179
+ },
2180
+ {
2181
+ "epoch": 0.9350633528265108,
2182
+ "grad_norm": 6.342244625091553,
2183
+ "learning_rate": 6.966787752983914e-07,
2184
+ "loss": 0.0478,
2185
+ "step": 7675
2186
+ },
2187
+ {
2188
+ "epoch": 0.9381091617933723,
2189
+ "grad_norm": 5.005722999572754,
2190
+ "learning_rate": 6.642449403217437e-07,
2191
+ "loss": 0.0455,
2192
+ "step": 7700
2193
+ },
2194
+ {
2195
+ "epoch": 0.9411549707602339,
2196
+ "grad_norm": 4.427570343017578,
2197
+ "learning_rate": 6.31811105345096e-07,
2198
+ "loss": 0.0388,
2199
+ "step": 7725
2200
+ },
2201
+ {
2202
+ "epoch": 0.9442007797270955,
2203
+ "grad_norm": 3.7985281944274902,
2204
+ "learning_rate": 5.993772703684484e-07,
2205
+ "loss": 0.0488,
2206
+ "step": 7750
2207
+ },
2208
+ {
2209
+ "epoch": 0.9472465886939572,
2210
+ "grad_norm": 6.278728008270264,
2211
+ "learning_rate": 5.669434353918007e-07,
2212
+ "loss": 0.0414,
2213
+ "step": 7775
2214
+ },
2215
+ {
2216
+ "epoch": 0.9502923976608187,
2217
+ "grad_norm": 4.945337772369385,
2218
+ "learning_rate": 5.345096004151531e-07,
2219
+ "loss": 0.0425,
2220
+ "step": 7800
2221
+ },
2222
+ {
2223
+ "epoch": 0.9533382066276803,
2224
+ "grad_norm": 3.97603440284729,
2225
+ "learning_rate": 5.020757654385055e-07,
2226
+ "loss": 0.0443,
2227
+ "step": 7825
2228
+ },
2229
+ {
2230
+ "epoch": 0.9563840155945419,
2231
+ "grad_norm": 5.937435150146484,
2232
+ "learning_rate": 4.696419304618578e-07,
2233
+ "loss": 0.0546,
2234
+ "step": 7850
2235
+ },
2236
+ {
2237
+ "epoch": 0.9594298245614035,
2238
+ "grad_norm": 2.2830593585968018,
2239
+ "learning_rate": 4.3720809548521023e-07,
2240
+ "loss": 0.0461,
2241
+ "step": 7875
2242
+ },
2243
+ {
2244
+ "epoch": 0.9624756335282652,
2245
+ "grad_norm": 1.9695100784301758,
2246
+ "learning_rate": 4.047742605085626e-07,
2247
+ "loss": 0.0398,
2248
+ "step": 7900
2249
+ },
2250
+ {
2251
+ "epoch": 0.9655214424951267,
2252
+ "grad_norm": 3.1578965187072754,
2253
+ "learning_rate": 3.723404255319149e-07,
2254
+ "loss": 0.043,
2255
+ "step": 7925
2256
+ },
2257
+ {
2258
+ "epoch": 0.9685672514619883,
2259
+ "grad_norm": 3.1749889850616455,
2260
+ "learning_rate": 3.399065905552673e-07,
2261
+ "loss": 0.0496,
2262
+ "step": 7950
2263
+ },
2264
+ {
2265
+ "epoch": 0.9716130604288499,
2266
+ "grad_norm": 3.351229190826416,
2267
+ "learning_rate": 3.0747275557861967e-07,
2268
+ "loss": 0.0433,
2269
+ "step": 7975
2270
+ },
2271
+ {
2272
+ "epoch": 0.9746588693957114,
2273
+ "grad_norm": 3.811175584793091,
2274
+ "learning_rate": 2.75038920601972e-07,
2275
+ "loss": 0.0348,
2276
+ "step": 8000
2277
+ },
2278
+ {
2279
+ "epoch": 0.9777046783625731,
2280
+ "grad_norm": 1.274682879447937,
2281
+ "learning_rate": 2.4260508562532433e-07,
2282
+ "loss": 0.046,
2283
+ "step": 8025
2284
+ },
2285
+ {
2286
+ "epoch": 0.9807504873294347,
2287
+ "grad_norm": 3.756114959716797,
2288
+ "learning_rate": 2.1017125064867672e-07,
2289
+ "loss": 0.043,
2290
+ "step": 8050
2291
+ },
2292
+ {
2293
+ "epoch": 0.9837962962962963,
2294
+ "grad_norm": 4.1706037521362305,
2295
+ "learning_rate": 1.7773741567202908e-07,
2296
+ "loss": 0.0359,
2297
+ "step": 8075
2298
+ },
2299
+ {
2300
+ "epoch": 0.9868421052631579,
2301
+ "grad_norm": 4.425180435180664,
2302
+ "learning_rate": 1.4530358069538144e-07,
2303
+ "loss": 0.039,
2304
+ "step": 8100
2305
+ },
2306
+ {
2307
+ "epoch": 0.9898879142300195,
2308
+ "grad_norm": 3.7593772411346436,
2309
+ "learning_rate": 1.128697457187338e-07,
2310
+ "loss": 0.0458,
2311
+ "step": 8125
2312
+ },
2313
+ {
2314
+ "epoch": 0.9929337231968811,
2315
+ "grad_norm": 4.999473571777344,
2316
+ "learning_rate": 8.043591074208614e-08,
2317
+ "loss": 0.0493,
2318
+ "step": 8150
2319
+ },
2320
+ {
2321
+ "epoch": 0.9959795321637427,
2322
+ "grad_norm": 1.8714449405670166,
2323
+ "learning_rate": 4.800207576543851e-08,
2324
+ "loss": 0.0433,
2325
+ "step": 8175
2326
+ },
2327
+ {
2328
+ "epoch": 0.9990253411306043,
2329
+ "grad_norm": 1.6236313581466675,
2330
+ "learning_rate": 1.5568240788790867e-08,
2331
+ "loss": 0.0387,
2332
+ "step": 8200
2333
+ },
2334
+ {
2335
+ "epoch": 1.0,
2336
+ "eval_loss": 0.04804893210530281,
2337
+ "eval_runtime": 6129.2535,
2338
+ "eval_samples_per_second": 1.34,
2339
+ "eval_steps_per_second": 0.335,
2340
+ "eval_wer": 14.794215086646279,
2341
+ "step": 8208
2342
  }
2343
  ],
2344
  "logging_steps": 25,
 
2353
  "should_evaluate": false,
2354
  "should_log": false,
2355
  "should_save": true,
2356
+ "should_training_stop": true
2357
  },
2358
  "attributes": {}
2359
  }
2360
  },
2361
+ "total_flos": 2.12948339392512e+18,
2362
  "train_batch_size": 4,
2363
  "trial_name": null,
2364
  "trial_params": null