Training in progress, epoch 7
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +288 -3
- pytorch_model.bin +1 -1
- runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 236491269
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a49ec4140754981eb351649fecf5d3e3d44b0e29fa9e01bf8460a2dcc5b91392
|
| 3 |
size 236491269
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118253458
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acc77a725d52d6e34c7f61d7c679e4c1b46be2370324f266e5a1ff1d1bebc2bf
|
| 3 |
size 118253458
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15597
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2593a21d27b6d3490c2b6104d1f46ccef142af342ac4030549c5bf8e21edca72
|
| 3 |
size 15597
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 557
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74546aa0cb21fe7508cf9d0a3ed65e894eded209c32829312f983360c4339967
|
| 3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1981182cf21e486b0f1de0f86d848f914d636f6e137316378a492b50ad1a4d9c
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -1710,11 +1710,296 @@
|
|
| 1710 |
"eval_samples_per_second": 603.291,
|
| 1711 |
"eval_steps_per_second": 37.706,
|
| 1712 |
"step": 137640
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1713 |
}
|
| 1714 |
],
|
| 1715 |
"max_steps": 321160,
|
| 1716 |
"num_train_epochs": 14,
|
| 1717 |
-
"total_flos": 1.
|
| 1718 |
"trial_name": null,
|
| 1719 |
"trial_params": null
|
| 1720 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.0,
|
| 5 |
+
"global_step": 160580,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 1710 |
"eval_samples_per_second": 603.291,
|
| 1711 |
"eval_steps_per_second": 37.706,
|
| 1712 |
"step": 137640
|
| 1713 |
+
},
|
| 1714 |
+
{
|
| 1715 |
+
"epoch": 6.02,
|
| 1716 |
+
"learning_rate": 5.762640431768718e-05,
|
| 1717 |
+
"loss": 1.6789,
|
| 1718 |
+
"step": 138000
|
| 1719 |
+
},
|
| 1720 |
+
{
|
| 1721 |
+
"epoch": 6.04,
|
| 1722 |
+
"learning_rate": 5.746946041491061e-05,
|
| 1723 |
+
"loss": 1.6869,
|
| 1724 |
+
"step": 138500
|
| 1725 |
+
},
|
| 1726 |
+
{
|
| 1727 |
+
"epoch": 6.06,
|
| 1728 |
+
"learning_rate": 5.7312201995294836e-05,
|
| 1729 |
+
"loss": 1.6766,
|
| 1730 |
+
"step": 139000
|
| 1731 |
+
},
|
| 1732 |
+
{
|
| 1733 |
+
"epoch": 6.08,
|
| 1734 |
+
"learning_rate": 5.7154943575679046e-05,
|
| 1735 |
+
"loss": 1.6819,
|
| 1736 |
+
"step": 139500
|
| 1737 |
+
},
|
| 1738 |
+
{
|
| 1739 |
+
"epoch": 6.1,
|
| 1740 |
+
"learning_rate": 5.699768515606326e-05,
|
| 1741 |
+
"loss": 1.6812,
|
| 1742 |
+
"step": 140000
|
| 1743 |
+
},
|
| 1744 |
+
{
|
| 1745 |
+
"epoch": 6.12,
|
| 1746 |
+
"learning_rate": 5.68407412532867e-05,
|
| 1747 |
+
"loss": 1.6802,
|
| 1748 |
+
"step": 140500
|
| 1749 |
+
},
|
| 1750 |
+
{
|
| 1751 |
+
"epoch": 6.15,
|
| 1752 |
+
"learning_rate": 5.668348283367092e-05,
|
| 1753 |
+
"loss": 1.6788,
|
| 1754 |
+
"step": 141000
|
| 1755 |
+
},
|
| 1756 |
+
{
|
| 1757 |
+
"epoch": 6.17,
|
| 1758 |
+
"learning_rate": 5.652622441405513e-05,
|
| 1759 |
+
"loss": 1.6786,
|
| 1760 |
+
"step": 141500
|
| 1761 |
+
},
|
| 1762 |
+
{
|
| 1763 |
+
"epoch": 6.19,
|
| 1764 |
+
"learning_rate": 5.6368965994439346e-05,
|
| 1765 |
+
"loss": 1.6798,
|
| 1766 |
+
"step": 142000
|
| 1767 |
+
},
|
| 1768 |
+
{
|
| 1769 |
+
"epoch": 6.21,
|
| 1770 |
+
"learning_rate": 5.6212022091662785e-05,
|
| 1771 |
+
"loss": 1.6758,
|
| 1772 |
+
"step": 142500
|
| 1773 |
+
},
|
| 1774 |
+
{
|
| 1775 |
+
"epoch": 6.23,
|
| 1776 |
+
"learning_rate": 5.605476367204701e-05,
|
| 1777 |
+
"loss": 1.6775,
|
| 1778 |
+
"step": 143000
|
| 1779 |
+
},
|
| 1780 |
+
{
|
| 1781 |
+
"epoch": 6.26,
|
| 1782 |
+
"learning_rate": 5.589750525243121e-05,
|
| 1783 |
+
"loss": 1.6764,
|
| 1784 |
+
"step": 143500
|
| 1785 |
+
},
|
| 1786 |
+
{
|
| 1787 |
+
"epoch": 6.28,
|
| 1788 |
+
"learning_rate": 5.5740246832815436e-05,
|
| 1789 |
+
"loss": 1.6735,
|
| 1790 |
+
"step": 144000
|
| 1791 |
+
},
|
| 1792 |
+
{
|
| 1793 |
+
"epoch": 6.3,
|
| 1794 |
+
"learning_rate": 5.5583302930038875e-05,
|
| 1795 |
+
"loss": 1.6758,
|
| 1796 |
+
"step": 144500
|
| 1797 |
+
},
|
| 1798 |
+
{
|
| 1799 |
+
"epoch": 6.32,
|
| 1800 |
+
"learning_rate": 5.542604451042309e-05,
|
| 1801 |
+
"loss": 1.6746,
|
| 1802 |
+
"step": 145000
|
| 1803 |
+
},
|
| 1804 |
+
{
|
| 1805 |
+
"epoch": 6.34,
|
| 1806 |
+
"learning_rate": 5.52687860908073e-05,
|
| 1807 |
+
"loss": 1.678,
|
| 1808 |
+
"step": 145500
|
| 1809 |
+
},
|
| 1810 |
+
{
|
| 1811 |
+
"epoch": 6.36,
|
| 1812 |
+
"learning_rate": 5.511152767119152e-05,
|
| 1813 |
+
"loss": 1.6724,
|
| 1814 |
+
"step": 146000
|
| 1815 |
+
},
|
| 1816 |
+
{
|
| 1817 |
+
"epoch": 6.39,
|
| 1818 |
+
"learning_rate": 5.495458376841497e-05,
|
| 1819 |
+
"loss": 1.6727,
|
| 1820 |
+
"step": 146500
|
| 1821 |
+
},
|
| 1822 |
+
{
|
| 1823 |
+
"epoch": 6.41,
|
| 1824 |
+
"learning_rate": 5.4797325348799175e-05,
|
| 1825 |
+
"loss": 1.6726,
|
| 1826 |
+
"step": 147000
|
| 1827 |
+
},
|
| 1828 |
+
{
|
| 1829 |
+
"epoch": 6.43,
|
| 1830 |
+
"learning_rate": 5.4640066929183386e-05,
|
| 1831 |
+
"loss": 1.6726,
|
| 1832 |
+
"step": 147500
|
| 1833 |
+
},
|
| 1834 |
+
{
|
| 1835 |
+
"epoch": 6.45,
|
| 1836 |
+
"learning_rate": 5.448280850956761e-05,
|
| 1837 |
+
"loss": 1.6735,
|
| 1838 |
+
"step": 148000
|
| 1839 |
+
},
|
| 1840 |
+
{
|
| 1841 |
+
"epoch": 6.47,
|
| 1842 |
+
"learning_rate": 5.4325864606791055e-05,
|
| 1843 |
+
"loss": 1.6722,
|
| 1844 |
+
"step": 148500
|
| 1845 |
+
},
|
| 1846 |
+
{
|
| 1847 |
+
"epoch": 6.5,
|
| 1848 |
+
"learning_rate": 5.4168606187175265e-05,
|
| 1849 |
+
"loss": 1.6672,
|
| 1850 |
+
"step": 149000
|
| 1851 |
+
},
|
| 1852 |
+
{
|
| 1853 |
+
"epoch": 6.52,
|
| 1854 |
+
"learning_rate": 5.4011347767559475e-05,
|
| 1855 |
+
"loss": 1.6716,
|
| 1856 |
+
"step": 149500
|
| 1857 |
+
},
|
| 1858 |
+
{
|
| 1859 |
+
"epoch": 6.54,
|
| 1860 |
+
"learning_rate": 5.385408934794369e-05,
|
| 1861 |
+
"loss": 1.6746,
|
| 1862 |
+
"step": 150000
|
| 1863 |
+
},
|
| 1864 |
+
{
|
| 1865 |
+
"epoch": 6.56,
|
| 1866 |
+
"learning_rate": 5.369714544516714e-05,
|
| 1867 |
+
"loss": 1.6714,
|
| 1868 |
+
"step": 150500
|
| 1869 |
+
},
|
| 1870 |
+
{
|
| 1871 |
+
"epoch": 6.58,
|
| 1872 |
+
"learning_rate": 5.353988702555135e-05,
|
| 1873 |
+
"loss": 1.6631,
|
| 1874 |
+
"step": 151000
|
| 1875 |
+
},
|
| 1876 |
+
{
|
| 1877 |
+
"epoch": 6.6,
|
| 1878 |
+
"learning_rate": 5.338262860593557e-05,
|
| 1879 |
+
"loss": 1.667,
|
| 1880 |
+
"step": 151500
|
| 1881 |
+
},
|
| 1882 |
+
{
|
| 1883 |
+
"epoch": 6.63,
|
| 1884 |
+
"learning_rate": 5.3225370186319776e-05,
|
| 1885 |
+
"loss": 1.6716,
|
| 1886 |
+
"step": 152000
|
| 1887 |
+
},
|
| 1888 |
+
{
|
| 1889 |
+
"epoch": 6.65,
|
| 1890 |
+
"learning_rate": 5.306842628354323e-05,
|
| 1891 |
+
"loss": 1.6653,
|
| 1892 |
+
"step": 152500
|
| 1893 |
+
},
|
| 1894 |
+
{
|
| 1895 |
+
"epoch": 6.67,
|
| 1896 |
+
"learning_rate": 5.291116786392744e-05,
|
| 1897 |
+
"loss": 1.6648,
|
| 1898 |
+
"step": 153000
|
| 1899 |
+
},
|
| 1900 |
+
{
|
| 1901 |
+
"epoch": 6.69,
|
| 1902 |
+
"learning_rate": 5.2753909444311655e-05,
|
| 1903 |
+
"loss": 1.6645,
|
| 1904 |
+
"step": 153500
|
| 1905 |
+
},
|
| 1906 |
+
{
|
| 1907 |
+
"epoch": 6.71,
|
| 1908 |
+
"learning_rate": 5.2596651024695866e-05,
|
| 1909 |
+
"loss": 1.6682,
|
| 1910 |
+
"step": 154000
|
| 1911 |
+
},
|
| 1912 |
+
{
|
| 1913 |
+
"epoch": 6.73,
|
| 1914 |
+
"learning_rate": 5.243970712191931e-05,
|
| 1915 |
+
"loss": 1.6631,
|
| 1916 |
+
"step": 154500
|
| 1917 |
+
},
|
| 1918 |
+
{
|
| 1919 |
+
"epoch": 6.76,
|
| 1920 |
+
"learning_rate": 5.228244870230352e-05,
|
| 1921 |
+
"loss": 1.6637,
|
| 1922 |
+
"step": 155000
|
| 1923 |
+
},
|
| 1924 |
+
{
|
| 1925 |
+
"epoch": 6.78,
|
| 1926 |
+
"learning_rate": 5.212519028268774e-05,
|
| 1927 |
+
"loss": 1.664,
|
| 1928 |
+
"step": 155500
|
| 1929 |
+
},
|
| 1930 |
+
{
|
| 1931 |
+
"epoch": 6.8,
|
| 1932 |
+
"learning_rate": 5.196793186307195e-05,
|
| 1933 |
+
"loss": 1.6665,
|
| 1934 |
+
"step": 156000
|
| 1935 |
+
},
|
| 1936 |
+
{
|
| 1937 |
+
"epoch": 6.82,
|
| 1938 |
+
"learning_rate": 5.18109879602954e-05,
|
| 1939 |
+
"loss": 1.6624,
|
| 1940 |
+
"step": 156500
|
| 1941 |
+
},
|
| 1942 |
+
{
|
| 1943 |
+
"epoch": 6.84,
|
| 1944 |
+
"learning_rate": 5.1653729540679605e-05,
|
| 1945 |
+
"loss": 1.6611,
|
| 1946 |
+
"step": 157000
|
| 1947 |
+
},
|
| 1948 |
+
{
|
| 1949 |
+
"epoch": 6.87,
|
| 1950 |
+
"learning_rate": 5.149647112106383e-05,
|
| 1951 |
+
"loss": 1.6642,
|
| 1952 |
+
"step": 157500
|
| 1953 |
+
},
|
| 1954 |
+
{
|
| 1955 |
+
"epoch": 6.89,
|
| 1956 |
+
"learning_rate": 5.133921270144804e-05,
|
| 1957 |
+
"loss": 1.6595,
|
| 1958 |
+
"step": 158000
|
| 1959 |
+
},
|
| 1960 |
+
{
|
| 1961 |
+
"epoch": 6.91,
|
| 1962 |
+
"learning_rate": 5.1182268798671485e-05,
|
| 1963 |
+
"loss": 1.6607,
|
| 1964 |
+
"step": 158500
|
| 1965 |
+
},
|
| 1966 |
+
{
|
| 1967 |
+
"epoch": 6.93,
|
| 1968 |
+
"learning_rate": 5.1025010379055695e-05,
|
| 1969 |
+
"loss": 1.663,
|
| 1970 |
+
"step": 159000
|
| 1971 |
+
},
|
| 1972 |
+
{
|
| 1973 |
+
"epoch": 6.95,
|
| 1974 |
+
"learning_rate": 5.086775195943991e-05,
|
| 1975 |
+
"loss": 1.6598,
|
| 1976 |
+
"step": 159500
|
| 1977 |
+
},
|
| 1978 |
+
{
|
| 1979 |
+
"epoch": 6.97,
|
| 1980 |
+
"learning_rate": 5.071049353982412e-05,
|
| 1981 |
+
"loss": 1.662,
|
| 1982 |
+
"step": 160000
|
| 1983 |
+
},
|
| 1984 |
+
{
|
| 1985 |
+
"epoch": 7.0,
|
| 1986 |
+
"learning_rate": 5.055354963704757e-05,
|
| 1987 |
+
"loss": 1.658,
|
| 1988 |
+
"step": 160500
|
| 1989 |
+
},
|
| 1990 |
+
{
|
| 1991 |
+
"epoch": 7.0,
|
| 1992 |
+
"eval_accuracy": 0.681177174568378,
|
| 1993 |
+
"eval_loss": 1.5331339836120605,
|
| 1994 |
+
"eval_runtime": 342.8004,
|
| 1995 |
+
"eval_samples_per_second": 519.113,
|
| 1996 |
+
"eval_steps_per_second": 32.445,
|
| 1997 |
+
"step": 160580
|
| 1998 |
}
|
| 1999 |
],
|
| 2000 |
"max_steps": 321160,
|
| 2001 |
"num_train_epochs": 14,
|
| 2002 |
+
"total_flos": 1.2209738648603072e+18,
|
| 2003 |
"trial_name": null,
|
| 2004 |
"trial_params": null
|
| 2005 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 118253458
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acc77a725d52d6e34c7f61d7c679e4c1b46be2370324f266e5a1ff1d1bebc2bf
|
| 3 |
size 118253458
|
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be416dbc50f0cb836aa31ee963aba13302069cd1befb9ceaf72286a0cd5d4676
|
| 3 |
+
size 57335
|