Training in progress, epoch 7
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +288 -3
- pytorch_model.bin +1 -1
- runs/Feb20_18-27-58_ubuntu-2004/events.out.tfevents.1676885321.ubuntu-2004.886785.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 236470789
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2266edb0b6b0e74ee02fbe5aa2f5218baeeafe1a239137bf990ae0aeab9a119
|
| 3 |
size 236470789
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118243218
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c433f5760508a1599460a99e03b20a531880dfdad44d71ab00bd1c682027dadd
|
| 3 |
size 118243218
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15597
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb85f55f522538f02d3eea0fa023913981174e2e0027d28652cb76e91ebd4d8d
|
| 3 |
size 15597
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 557
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1a10f0e22563a2ad91f9f31ad1fc6a7a42e9711d892d03058453301106a5f72
|
| 3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8f3332503ed7c858b6a78cb5232c8214dfa941a5425ab04fab1ad9da09e728b
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1710,11 +1710,296 @@
|
|
| 1710 |
"eval_samples_per_second": 496.625,
|
| 1711 |
"eval_steps_per_second": 31.039,
|
| 1712 |
"step": 137640
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1713 |
}
|
| 1714 |
],
|
| 1715 |
"max_steps": 321160,
|
| 1716 |
"num_train_epochs": 14,
|
| 1717 |
-
"total_flos": 1.
|
| 1718 |
"trial_name": null,
|
| 1719 |
"trial_params": null
|
| 1720 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.0,
|
| 5 |
+
"global_step": 160580,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1710 |
"eval_samples_per_second": 496.625,
|
| 1711 |
"eval_steps_per_second": 31.039,
|
| 1712 |
"step": 137640
|
| 1713 |
+
},
|
| 1714 |
+
{
|
| 1715 |
+
"epoch": 6.02,
|
| 1716 |
+
"learning_rate": 5.762640431768718e-05,
|
| 1717 |
+
"loss": 1.9502,
|
| 1718 |
+
"step": 138000
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"epoch": 6.04,
|
| 1722 |
+
"learning_rate": 5.746914589807138e-05,
|
| 1723 |
+
"loss": 1.9521,
|
| 1724 |
+
"step": 138500
|
| 1725 |
+
},
|
| 1726 |
+
{
|
| 1727 |
+
"epoch": 6.06,
|
| 1728 |
+
"learning_rate": 5.7311887478455593e-05,
|
| 1729 |
+
"loss": 1.9533,
|
| 1730 |
+
"step": 139000
|
| 1731 |
+
},
|
| 1732 |
+
{
|
| 1733 |
+
"epoch": 6.08,
|
| 1734 |
+
"learning_rate": 5.715462905883982e-05,
|
| 1735 |
+
"loss": 1.9533,
|
| 1736 |
+
"step": 139500
|
| 1737 |
+
},
|
| 1738 |
+
{
|
| 1739 |
+
"epoch": 6.1,
|
| 1740 |
+
"learning_rate": 5.699768515606326e-05,
|
| 1741 |
+
"loss": 1.9513,
|
| 1742 |
+
"step": 140000
|
| 1743 |
+
},
|
| 1744 |
+
{
|
| 1745 |
+
"epoch": 6.12,
|
| 1746 |
+
"learning_rate": 5.684042673644747e-05,
|
| 1747 |
+
"loss": 1.9491,
|
| 1748 |
+
"step": 140500
|
| 1749 |
+
},
|
| 1750 |
+
{
|
| 1751 |
+
"epoch": 6.15,
|
| 1752 |
+
"learning_rate": 5.668316831683168e-05,
|
| 1753 |
+
"loss": 1.9514,
|
| 1754 |
+
"step": 141000
|
| 1755 |
+
},
|
| 1756 |
+
{
|
| 1757 |
+
"epoch": 6.17,
|
| 1758 |
+
"learning_rate": 5.65259098972159e-05,
|
| 1759 |
+
"loss": 1.9492,
|
| 1760 |
+
"step": 141500
|
| 1761 |
+
},
|
| 1762 |
+
{
|
| 1763 |
+
"epoch": 6.19,
|
| 1764 |
+
"learning_rate": 5.6368965994439346e-05,
|
| 1765 |
+
"loss": 1.947,
|
| 1766 |
+
"step": 142000
|
| 1767 |
+
},
|
| 1768 |
+
{
|
| 1769 |
+
"epoch": 6.21,
|
| 1770 |
+
"learning_rate": 5.6211707574823556e-05,
|
| 1771 |
+
"loss": 1.945,
|
| 1772 |
+
"step": 142500
|
| 1773 |
+
},
|
| 1774 |
+
{
|
| 1775 |
+
"epoch": 6.23,
|
| 1776 |
+
"learning_rate": 5.605444915520778e-05,
|
| 1777 |
+
"loss": 1.9505,
|
| 1778 |
+
"step": 143000
|
| 1779 |
+
},
|
| 1780 |
+
{
|
| 1781 |
+
"epoch": 6.26,
|
| 1782 |
+
"learning_rate": 5.5897190735591984e-05,
|
| 1783 |
+
"loss": 1.9488,
|
| 1784 |
+
"step": 143500
|
| 1785 |
+
},
|
| 1786 |
+
{
|
| 1787 |
+
"epoch": 6.28,
|
| 1788 |
+
"learning_rate": 5.5740246832815436e-05,
|
| 1789 |
+
"loss": 1.9451,
|
| 1790 |
+
"step": 144000
|
| 1791 |
+
},
|
| 1792 |
+
{
|
| 1793 |
+
"epoch": 6.3,
|
| 1794 |
+
"learning_rate": 5.5582988413199646e-05,
|
| 1795 |
+
"loss": 1.9416,
|
| 1796 |
+
"step": 144500
|
| 1797 |
+
},
|
| 1798 |
+
{
|
| 1799 |
+
"epoch": 6.32,
|
| 1800 |
+
"learning_rate": 5.542572999358386e-05,
|
| 1801 |
+
"loss": 1.9487,
|
| 1802 |
+
"step": 145000
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 6.34,
|
| 1806 |
+
"learning_rate": 5.5268471573968074e-05,
|
| 1807 |
+
"loss": 1.9394,
|
| 1808 |
+
"step": 145500
|
| 1809 |
+
},
|
| 1810 |
+
{
|
| 1811 |
+
"epoch": 6.36,
|
| 1812 |
+
"learning_rate": 5.511152767119152e-05,
|
| 1813 |
+
"loss": 1.9463,
|
| 1814 |
+
"step": 146000
|
| 1815 |
+
},
|
| 1816 |
+
{
|
| 1817 |
+
"epoch": 6.39,
|
| 1818 |
+
"learning_rate": 5.495426925157573e-05,
|
| 1819 |
+
"loss": 1.947,
|
| 1820 |
+
"step": 146500
|
| 1821 |
+
},
|
| 1822 |
+
{
|
| 1823 |
+
"epoch": 6.41,
|
| 1824 |
+
"learning_rate": 5.4797010831959947e-05,
|
| 1825 |
+
"loss": 1.9424,
|
| 1826 |
+
"step": 147000
|
| 1827 |
+
},
|
| 1828 |
+
{
|
| 1829 |
+
"epoch": 6.43,
|
| 1830 |
+
"learning_rate": 5.463975241234416e-05,
|
| 1831 |
+
"loss": 1.9433,
|
| 1832 |
+
"step": 147500
|
| 1833 |
+
},
|
| 1834 |
+
{
|
| 1835 |
+
"epoch": 6.45,
|
| 1836 |
+
"learning_rate": 5.448280850956761e-05,
|
| 1837 |
+
"loss": 1.9416,
|
| 1838 |
+
"step": 148000
|
| 1839 |
+
},
|
| 1840 |
+
{
|
| 1841 |
+
"epoch": 6.47,
|
| 1842 |
+
"learning_rate": 5.432555008995181e-05,
|
| 1843 |
+
"loss": 1.9443,
|
| 1844 |
+
"step": 148500
|
| 1845 |
+
},
|
| 1846 |
+
{
|
| 1847 |
+
"epoch": 6.5,
|
| 1848 |
+
"learning_rate": 5.4168291670336036e-05,
|
| 1849 |
+
"loss": 1.9422,
|
| 1850 |
+
"step": 149000
|
| 1851 |
+
},
|
| 1852 |
+
{
|
| 1853 |
+
"epoch": 6.52,
|
| 1854 |
+
"learning_rate": 5.401103325072025e-05,
|
| 1855 |
+
"loss": 1.9421,
|
| 1856 |
+
"step": 149500
|
| 1857 |
+
},
|
| 1858 |
+
{
|
| 1859 |
+
"epoch": 6.54,
|
| 1860 |
+
"learning_rate": 5.385408934794369e-05,
|
| 1861 |
+
"loss": 1.9412,
|
| 1862 |
+
"step": 150000
|
| 1863 |
+
},
|
| 1864 |
+
{
|
| 1865 |
+
"epoch": 6.56,
|
| 1866 |
+
"learning_rate": 5.36968309283279e-05,
|
| 1867 |
+
"loss": 1.9411,
|
| 1868 |
+
"step": 150500
|
| 1869 |
+
},
|
| 1870 |
+
{
|
| 1871 |
+
"epoch": 6.58,
|
| 1872 |
+
"learning_rate": 5.353957250871212e-05,
|
| 1873 |
+
"loss": 1.9375,
|
| 1874 |
+
"step": 151000
|
| 1875 |
+
},
|
| 1876 |
+
{
|
| 1877 |
+
"epoch": 6.6,
|
| 1878 |
+
"learning_rate": 5.338231408909633e-05,
|
| 1879 |
+
"loss": 1.9399,
|
| 1880 |
+
"step": 151500
|
| 1881 |
+
},
|
| 1882 |
+
{
|
| 1883 |
+
"epoch": 6.63,
|
| 1884 |
+
"learning_rate": 5.3225370186319776e-05,
|
| 1885 |
+
"loss": 1.9344,
|
| 1886 |
+
"step": 152000
|
| 1887 |
+
},
|
| 1888 |
+
{
|
| 1889 |
+
"epoch": 6.65,
|
| 1890 |
+
"learning_rate": 5.3068111766703986e-05,
|
| 1891 |
+
"loss": 1.9419,
|
| 1892 |
+
"step": 152500
|
| 1893 |
+
},
|
| 1894 |
+
{
|
| 1895 |
+
"epoch": 6.67,
|
| 1896 |
+
"learning_rate": 5.291085334708821e-05,
|
| 1897 |
+
"loss": 1.9353,
|
| 1898 |
+
"step": 153000
|
| 1899 |
+
},
|
| 1900 |
+
{
|
| 1901 |
+
"epoch": 6.69,
|
| 1902 |
+
"learning_rate": 5.275359492747241e-05,
|
| 1903 |
+
"loss": 1.9386,
|
| 1904 |
+
"step": 153500
|
| 1905 |
+
},
|
| 1906 |
+
{
|
| 1907 |
+
"epoch": 6.71,
|
| 1908 |
+
"learning_rate": 5.259633650785664e-05,
|
| 1909 |
+
"loss": 1.9403,
|
| 1910 |
+
"step": 154000
|
| 1911 |
+
},
|
| 1912 |
+
{
|
| 1913 |
+
"epoch": 6.73,
|
| 1914 |
+
"learning_rate": 5.2439392605080076e-05,
|
| 1915 |
+
"loss": 1.9336,
|
| 1916 |
+
"step": 154500
|
| 1917 |
+
},
|
| 1918 |
+
{
|
| 1919 |
+
"epoch": 6.76,
|
| 1920 |
+
"learning_rate": 5.228213418546429e-05,
|
| 1921 |
+
"loss": 1.934,
|
| 1922 |
+
"step": 155000
|
| 1923 |
+
},
|
| 1924 |
+
{
|
| 1925 |
+
"epoch": 6.78,
|
| 1926 |
+
"learning_rate": 5.21248757658485e-05,
|
| 1927 |
+
"loss": 1.9322,
|
| 1928 |
+
"step": 155500
|
| 1929 |
+
},
|
| 1930 |
+
{
|
| 1931 |
+
"epoch": 6.8,
|
| 1932 |
+
"learning_rate": 5.196761734623272e-05,
|
| 1933 |
+
"loss": 1.9316,
|
| 1934 |
+
"step": 156000
|
| 1935 |
+
},
|
| 1936 |
+
{
|
| 1937 |
+
"epoch": 6.82,
|
| 1938 |
+
"learning_rate": 5.181067344345617e-05,
|
| 1939 |
+
"loss": 1.9319,
|
| 1940 |
+
"step": 156500
|
| 1941 |
+
},
|
| 1942 |
+
{
|
| 1943 |
+
"epoch": 6.84,
|
| 1944 |
+
"learning_rate": 5.1653415023840376e-05,
|
| 1945 |
+
"loss": 1.937,
|
| 1946 |
+
"step": 157000
|
| 1947 |
+
},
|
| 1948 |
+
{
|
| 1949 |
+
"epoch": 6.87,
|
| 1950 |
+
"learning_rate": 5.1496156604224586e-05,
|
| 1951 |
+
"loss": 1.9324,
|
| 1952 |
+
"step": 157500
|
| 1953 |
+
},
|
| 1954 |
+
{
|
| 1955 |
+
"epoch": 6.89,
|
| 1956 |
+
"learning_rate": 5.133889818460881e-05,
|
| 1957 |
+
"loss": 1.9305,
|
| 1958 |
+
"step": 158000
|
| 1959 |
+
},
|
| 1960 |
+
{
|
| 1961 |
+
"epoch": 6.91,
|
| 1962 |
+
"learning_rate": 5.1181954281832256e-05,
|
| 1963 |
+
"loss": 1.932,
|
| 1964 |
+
"step": 158500
|
| 1965 |
+
},
|
| 1966 |
+
{
|
| 1967 |
+
"epoch": 6.93,
|
| 1968 |
+
"learning_rate": 5.1024695862216466e-05,
|
| 1969 |
+
"loss": 1.9298,
|
| 1970 |
+
"step": 159000
|
| 1971 |
+
},
|
| 1972 |
+
{
|
| 1973 |
+
"epoch": 6.95,
|
| 1974 |
+
"learning_rate": 5.0867437442600676e-05,
|
| 1975 |
+
"loss": 1.9289,
|
| 1976 |
+
"step": 159500
|
| 1977 |
+
},
|
| 1978 |
+
{
|
| 1979 |
+
"epoch": 6.97,
|
| 1980 |
+
"learning_rate": 5.071017902298489e-05,
|
| 1981 |
+
"loss": 1.9263,
|
| 1982 |
+
"step": 160000
|
| 1983 |
+
},
|
| 1984 |
+
{
|
| 1985 |
+
"epoch": 7.0,
|
| 1986 |
+
"learning_rate": 5.055323512020834e-05,
|
| 1987 |
+
"loss": 1.9313,
|
| 1988 |
+
"step": 160500
|
| 1989 |
+
},
|
| 1990 |
+
{
|
| 1991 |
+
"epoch": 7.0,
|
| 1992 |
+
"eval_accuracy": 0.631738439030596,
|
| 1993 |
+
"eval_loss": 1.8091248273849487,
|
| 1994 |
+
"eval_runtime": 359.593,
|
| 1995 |
+
"eval_samples_per_second": 494.871,
|
| 1996 |
+
"eval_steps_per_second": 30.929,
|
| 1997 |
+
"step": 160580
|
| 1998 |
}
|
| 1999 |
],
|
| 2000 |
"max_steps": 321160,
|
| 2001 |
"num_train_epochs": 14,
|
| 2002 |
+
"total_flos": 1.2150058886378496e+18,
|
| 2003 |
"trial_name": null,
|
| 2004 |
"trial_params": null
|
| 2005 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118243218
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c433f5760508a1599460a99e03b20a531880dfdad44d71ab00bd1c682027dadd
|
| 3 |
size 118243218
|
runs/Feb20_18-27-58_ubuntu-2004/events.out.tfevents.1676885321.ubuntu-2004.886785.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76ccf92c99516f4744f2ccb27d9c0dd34d687200b637d2a59ebecfb67ba42c1f
|
| 3 |
+
size 57319
|