pylu5229 commited on
Commit
7ecfdf5
·
verified ·
1 Parent(s): 64045c7

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25ef5980a866c075243cbca6296c7973e976c78a14321fe2714e84fa4887656f
3
  size 110385904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:116bc00da50d25dcc272533129d200b856bacec241e236b6db2a9ecf18c05922
3
  size 110385904
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89d5648ac686d3b7617b3067c5ba0c213c7a760c4922f82dd2c2c53e72d473aa
3
  size 220436730
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df46f260a7759a7c30b2145c459383d33624e387eb96c8208346b53716d81a4
3
  size 220436730
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4928b39aef16b4ccfdae8117738f87968b9461ea10d0859ffd4b43ce42030e6e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fea20947238c1f9d5daa925821c4146833bd2c976bc44d7f6f2f755645070f2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d1e3c127b2fc073a228b2fd82f0458974e79c45f15454c1c54a51df347dead6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64e1e8324d63cee4f88909dbf92a51081cdf1093a4acda403ce37f08bc679a7e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.0,
3
  "best_model_checkpoint": "/content/drive/MyDrive/Colab Notebooks/16_label_check_point/checkpoint-563",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 2249,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1611,6 +1611,405 @@
1611
  "eval_samples_per_second": 192.275,
1612
  "eval_steps_per_second": 6.009,
1613
  "step": 2249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1614
  }
1615
  ],
1616
  "logging_steps": 10,
@@ -1625,12 +2024,12 @@
1625
  "should_evaluate": false,
1626
  "should_log": false,
1627
  "should_save": true,
1628
- "should_training_stop": false
1629
  },
1630
  "attributes": {}
1631
  }
1632
  },
1633
- "total_flos": 7.154959575454581e+18,
1634
  "train_batch_size": 32,
1635
  "trial_name": null,
1636
  "trial_params": null
 
1
  {
2
  "best_metric": 1.0,
3
  "best_model_checkpoint": "/content/drive/MyDrive/Colab Notebooks/16_label_check_point/checkpoint-563",
4
+ "epoch": 4.997333333333334,
5
  "eval_steps": 500,
6
+ "global_step": 2810,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1611
  "eval_samples_per_second": 192.275,
1612
  "eval_steps_per_second": 6.009,
1613
  "step": 2249
1614
+ },
1615
+ {
1616
+ "epoch": 4.001777777777778,
1617
+ "grad_norm": 0.0036902178544551134,
1618
+ "learning_rate": 1.1071569790431002e-05,
1619
+ "loss": 0.0003,
1620
+ "step": 2250
1621
+ },
1622
+ {
1623
+ "epoch": 4.019555555555556,
1624
+ "grad_norm": 0.00998405460268259,
1625
+ "learning_rate": 1.0873863187030448e-05,
1626
+ "loss": 0.002,
1627
+ "step": 2260
1628
+ },
1629
+ {
1630
+ "epoch": 4.037333333333334,
1631
+ "grad_norm": 23.953229904174805,
1632
+ "learning_rate": 1.0676156583629894e-05,
1633
+ "loss": 0.0187,
1634
+ "step": 2270
1635
+ },
1636
+ {
1637
+ "epoch": 4.0551111111111116,
1638
+ "grad_norm": 0.008150537498295307,
1639
+ "learning_rate": 1.0478449980229341e-05,
1640
+ "loss": 0.0153,
1641
+ "step": 2280
1642
+ },
1643
+ {
1644
+ "epoch": 4.072888888888889,
1645
+ "grad_norm": 0.5894471406936646,
1646
+ "learning_rate": 1.0280743376828787e-05,
1647
+ "loss": 0.0002,
1648
+ "step": 2290
1649
+ },
1650
+ {
1651
+ "epoch": 4.0906666666666665,
1652
+ "grad_norm": 0.07007890194654465,
1653
+ "learning_rate": 1.0083036773428232e-05,
1654
+ "loss": 0.0367,
1655
+ "step": 2300
1656
+ },
1657
+ {
1658
+ "epoch": 4.108444444444444,
1659
+ "grad_norm": 0.07020383328199387,
1660
+ "learning_rate": 9.885330170027678e-06,
1661
+ "loss": 0.0017,
1662
+ "step": 2310
1663
+ },
1664
+ {
1665
+ "epoch": 4.126222222222222,
1666
+ "grad_norm": 0.0013433824060484767,
1667
+ "learning_rate": 9.687623566627125e-06,
1668
+ "loss": 0.0176,
1669
+ "step": 2320
1670
+ },
1671
+ {
1672
+ "epoch": 4.144,
1673
+ "grad_norm": 0.0036678831093013287,
1674
+ "learning_rate": 9.489916963226571e-06,
1675
+ "loss": 0.001,
1676
+ "step": 2330
1677
+ },
1678
+ {
1679
+ "epoch": 4.161777777777778,
1680
+ "grad_norm": 20.646207809448242,
1681
+ "learning_rate": 9.29221035982602e-06,
1682
+ "loss": 0.0071,
1683
+ "step": 2340
1684
+ },
1685
+ {
1686
+ "epoch": 4.179555555555556,
1687
+ "grad_norm": 0.004499041475355625,
1688
+ "learning_rate": 9.094503756425466e-06,
1689
+ "loss": 0.0004,
1690
+ "step": 2350
1691
+ },
1692
+ {
1693
+ "epoch": 4.197333333333333,
1694
+ "grad_norm": 0.0007168107549659908,
1695
+ "learning_rate": 8.896797153024912e-06,
1696
+ "loss": 0.0025,
1697
+ "step": 2360
1698
+ },
1699
+ {
1700
+ "epoch": 4.215111111111111,
1701
+ "grad_norm": 0.015021364204585552,
1702
+ "learning_rate": 8.699090549624359e-06,
1703
+ "loss": 0.0277,
1704
+ "step": 2370
1705
+ },
1706
+ {
1707
+ "epoch": 4.232888888888889,
1708
+ "grad_norm": 0.006119410507380962,
1709
+ "learning_rate": 8.501383946223804e-06,
1710
+ "loss": 0.0009,
1711
+ "step": 2380
1712
+ },
1713
+ {
1714
+ "epoch": 4.250666666666667,
1715
+ "grad_norm": 0.0018322835676372051,
1716
+ "learning_rate": 8.30367734282325e-06,
1717
+ "loss": 0.0009,
1718
+ "step": 2390
1719
+ },
1720
+ {
1721
+ "epoch": 4.2684444444444445,
1722
+ "grad_norm": 0.0025883447378873825,
1723
+ "learning_rate": 8.105970739422696e-06,
1724
+ "loss": 0.0179,
1725
+ "step": 2400
1726
+ },
1727
+ {
1728
+ "epoch": 4.286222222222222,
1729
+ "grad_norm": 0.010295086540281773,
1730
+ "learning_rate": 7.908264136022143e-06,
1731
+ "loss": 0.0002,
1732
+ "step": 2410
1733
+ },
1734
+ {
1735
+ "epoch": 4.304,
1736
+ "grad_norm": 0.27159199118614197,
1737
+ "learning_rate": 7.710557532621591e-06,
1738
+ "loss": 0.0003,
1739
+ "step": 2420
1740
+ },
1741
+ {
1742
+ "epoch": 4.321777777777778,
1743
+ "grad_norm": 0.014537914656102657,
1744
+ "learning_rate": 7.5128509292210365e-06,
1745
+ "loss": 0.0108,
1746
+ "step": 2430
1747
+ },
1748
+ {
1749
+ "epoch": 4.339555555555555,
1750
+ "grad_norm": 0.001482433988712728,
1751
+ "learning_rate": 7.315144325820483e-06,
1752
+ "loss": 0.0003,
1753
+ "step": 2440
1754
+ },
1755
+ {
1756
+ "epoch": 4.357333333333333,
1757
+ "grad_norm": 0.0015277402708306909,
1758
+ "learning_rate": 7.11743772241993e-06,
1759
+ "loss": 0.0496,
1760
+ "step": 2450
1761
+ },
1762
+ {
1763
+ "epoch": 4.375111111111111,
1764
+ "grad_norm": 0.005141290370374918,
1765
+ "learning_rate": 6.919731119019375e-06,
1766
+ "loss": 0.0001,
1767
+ "step": 2460
1768
+ },
1769
+ {
1770
+ "epoch": 4.392888888888889,
1771
+ "grad_norm": 27.9423770904541,
1772
+ "learning_rate": 6.722024515618821e-06,
1773
+ "loss": 0.0393,
1774
+ "step": 2470
1775
+ },
1776
+ {
1777
+ "epoch": 4.410666666666667,
1778
+ "grad_norm": 0.010187560692429543,
1779
+ "learning_rate": 6.524317912218268e-06,
1780
+ "loss": 0.0001,
1781
+ "step": 2480
1782
+ },
1783
+ {
1784
+ "epoch": 4.428444444444445,
1785
+ "grad_norm": 0.002554529346525669,
1786
+ "learning_rate": 6.326611308817715e-06,
1787
+ "loss": 0.0031,
1788
+ "step": 2490
1789
+ },
1790
+ {
1791
+ "epoch": 4.4462222222222225,
1792
+ "grad_norm": 4.20240592956543,
1793
+ "learning_rate": 6.1289047054171615e-06,
1794
+ "loss": 0.0016,
1795
+ "step": 2500
1796
+ },
1797
+ {
1798
+ "epoch": 4.464,
1799
+ "grad_norm": 0.013741197995841503,
1800
+ "learning_rate": 5.931198102016608e-06,
1801
+ "loss": 0.0003,
1802
+ "step": 2510
1803
+ },
1804
+ {
1805
+ "epoch": 4.481777777777777,
1806
+ "grad_norm": 0.043951794505119324,
1807
+ "learning_rate": 5.7334914986160535e-06,
1808
+ "loss": 0.0125,
1809
+ "step": 2520
1810
+ },
1811
+ {
1812
+ "epoch": 4.499555555555555,
1813
+ "grad_norm": 0.11376281827688217,
1814
+ "learning_rate": 5.535784895215501e-06,
1815
+ "loss": 0.003,
1816
+ "step": 2530
1817
+ },
1818
+ {
1819
+ "epoch": 4.517333333333333,
1820
+ "grad_norm": 0.05734412372112274,
1821
+ "learning_rate": 5.338078291814947e-06,
1822
+ "loss": 0.0009,
1823
+ "step": 2540
1824
+ },
1825
+ {
1826
+ "epoch": 4.535111111111111,
1827
+ "grad_norm": 0.0010632964549586177,
1828
+ "learning_rate": 5.140371688414394e-06,
1829
+ "loss": 0.0196,
1830
+ "step": 2550
1831
+ },
1832
+ {
1833
+ "epoch": 4.552888888888889,
1834
+ "grad_norm": 0.0036729658022522926,
1835
+ "learning_rate": 4.942665085013839e-06,
1836
+ "loss": 0.0071,
1837
+ "step": 2560
1838
+ },
1839
+ {
1840
+ "epoch": 4.570666666666667,
1841
+ "grad_norm": 5.985267162322998,
1842
+ "learning_rate": 4.744958481613286e-06,
1843
+ "loss": 0.0311,
1844
+ "step": 2570
1845
+ },
1846
+ {
1847
+ "epoch": 4.588444444444445,
1848
+ "grad_norm": 0.996809720993042,
1849
+ "learning_rate": 4.547251878212733e-06,
1850
+ "loss": 0.0012,
1851
+ "step": 2580
1852
+ },
1853
+ {
1854
+ "epoch": 4.606222222222222,
1855
+ "grad_norm": 0.11869648844003677,
1856
+ "learning_rate": 4.349545274812179e-06,
1857
+ "loss": 0.0003,
1858
+ "step": 2590
1859
+ },
1860
+ {
1861
+ "epoch": 4.624,
1862
+ "grad_norm": 0.006143218372017145,
1863
+ "learning_rate": 4.151838671411625e-06,
1864
+ "loss": 0.0013,
1865
+ "step": 2600
1866
+ },
1867
+ {
1868
+ "epoch": 4.641777777777778,
1869
+ "grad_norm": 0.024631284177303314,
1870
+ "learning_rate": 3.954132068011071e-06,
1871
+ "loss": 0.0388,
1872
+ "step": 2610
1873
+ },
1874
+ {
1875
+ "epoch": 4.6595555555555555,
1876
+ "grad_norm": 0.0017836794722825289,
1877
+ "learning_rate": 3.7564254646105183e-06,
1878
+ "loss": 0.0015,
1879
+ "step": 2620
1880
+ },
1881
+ {
1882
+ "epoch": 4.677333333333333,
1883
+ "grad_norm": 0.003801500890403986,
1884
+ "learning_rate": 3.558718861209965e-06,
1885
+ "loss": 0.0003,
1886
+ "step": 2630
1887
+ },
1888
+ {
1889
+ "epoch": 4.695111111111111,
1890
+ "grad_norm": 0.004178278613835573,
1891
+ "learning_rate": 3.3610122578094107e-06,
1892
+ "loss": 0.001,
1893
+ "step": 2640
1894
+ },
1895
+ {
1896
+ "epoch": 4.712888888888889,
1897
+ "grad_norm": 0.0044832993298769,
1898
+ "learning_rate": 3.1633056544088575e-06,
1899
+ "loss": 0.0194,
1900
+ "step": 2650
1901
+ },
1902
+ {
1903
+ "epoch": 4.730666666666667,
1904
+ "grad_norm": 0.0029173328075557947,
1905
+ "learning_rate": 2.965599051008304e-06,
1906
+ "loss": 0.0302,
1907
+ "step": 2660
1908
+ },
1909
+ {
1910
+ "epoch": 4.748444444444445,
1911
+ "grad_norm": 0.0005038917297497392,
1912
+ "learning_rate": 2.7678924476077504e-06,
1913
+ "loss": 0.011,
1914
+ "step": 2670
1915
+ },
1916
+ {
1917
+ "epoch": 4.766222222222222,
1918
+ "grad_norm": 0.01968969963490963,
1919
+ "learning_rate": 2.570185844207197e-06,
1920
+ "loss": 0.0002,
1921
+ "step": 2680
1922
+ },
1923
+ {
1924
+ "epoch": 4.784,
1925
+ "grad_norm": 0.02507755346596241,
1926
+ "learning_rate": 2.372479240806643e-06,
1927
+ "loss": 0.0012,
1928
+ "step": 2690
1929
+ },
1930
+ {
1931
+ "epoch": 4.801777777777778,
1932
+ "grad_norm": 6.288967609405518,
1933
+ "learning_rate": 2.1747726374060897e-06,
1934
+ "loss": 0.0195,
1935
+ "step": 2700
1936
+ },
1937
+ {
1938
+ "epoch": 4.819555555555556,
1939
+ "grad_norm": 0.19547367095947266,
1940
+ "learning_rate": 1.9770660340055357e-06,
1941
+ "loss": 0.0034,
1942
+ "step": 2710
1943
+ },
1944
+ {
1945
+ "epoch": 4.8373333333333335,
1946
+ "grad_norm": 0.007160472217947245,
1947
+ "learning_rate": 1.7793594306049826e-06,
1948
+ "loss": 0.0008,
1949
+ "step": 2720
1950
+ },
1951
+ {
1952
+ "epoch": 4.855111111111111,
1953
+ "grad_norm": 0.1027381643652916,
1954
+ "learning_rate": 1.5816528272044288e-06,
1955
+ "loss": 0.0006,
1956
+ "step": 2730
1957
+ },
1958
+ {
1959
+ "epoch": 4.872888888888889,
1960
+ "grad_norm": 0.6163949966430664,
1961
+ "learning_rate": 1.3839462238038752e-06,
1962
+ "loss": 0.0001,
1963
+ "step": 2740
1964
+ },
1965
+ {
1966
+ "epoch": 4.890666666666666,
1967
+ "grad_norm": 0.0065285759046673775,
1968
+ "learning_rate": 1.1862396204033214e-06,
1969
+ "loss": 0.0,
1970
+ "step": 2750
1971
+ },
1972
+ {
1973
+ "epoch": 4.908444444444444,
1974
+ "grad_norm": 2.0664429664611816,
1975
+ "learning_rate": 9.885330170027678e-07,
1976
+ "loss": 0.0033,
1977
+ "step": 2760
1978
+ },
1979
+ {
1980
+ "epoch": 4.926222222222222,
1981
+ "grad_norm": 0.04097575694322586,
1982
+ "learning_rate": 7.908264136022144e-07,
1983
+ "loss": 0.001,
1984
+ "step": 2770
1985
+ },
1986
+ {
1987
+ "epoch": 4.944,
1988
+ "grad_norm": 0.0015862607397139072,
1989
+ "learning_rate": 5.931198102016607e-07,
1990
+ "loss": 0.0001,
1991
+ "step": 2780
1992
+ },
1993
+ {
1994
+ "epoch": 4.961777777777778,
1995
+ "grad_norm": 0.0021847274620085955,
1996
+ "learning_rate": 3.954132068011072e-07,
1997
+ "loss": 0.0006,
1998
+ "step": 2790
1999
+ },
2000
+ {
2001
+ "epoch": 4.979555555555556,
2002
+ "grad_norm": 0.00527564063668251,
2003
+ "learning_rate": 1.977066034005536e-07,
2004
+ "loss": 0.0243,
2005
+ "step": 2800
2006
+ },
2007
+ {
2008
+ "epoch": 4.997333333333334,
2009
+ "grad_norm": 0.0007434898870997131,
2010
+ "learning_rate": 0.0,
2011
+ "loss": 0.0525,
2012
+ "step": 2810
2013
  }
2014
  ],
2015
  "logging_steps": 10,
 
2024
  "should_evaluate": false,
2025
  "should_log": false,
2026
  "should_save": true,
2027
+ "should_training_stop": true
2028
  },
2029
  "attributes": {}
2030
  }
2031
  },
2032
+ "total_flos": 8.94051665811918e+18,
2033
  "train_batch_size": 32,
2034
  "trial_name": null,
2035
  "trial_params": null