robertou2 commited on
Commit
176febd
·
verified ·
1 Parent(s): a9f25ff

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. adapter_model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +6 -1206
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5529bbd8b02900353e5a9edb1b0cc3a12d5828ce3583f1e939e8a5cd7869147d
3
  size 92309112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91deaa0df19cb3a4603aed93b3ea53b071dfc7c0b2e4fdaaec06eb2240d2d81c
3
  size 92309112
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abc81f39457613379a6b53d39a8e9a20485a39bc6441c72daab7e852c4611bd3
3
  size 184765003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18d99d58786c67973fe2041dabca67551378777424f0b36426db8c429e7ae955
3
  size 184765003
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96d2f22d26bc65f3aeedce5509461616d5bf62bde9362cbb9270a9fe00a8d63a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dbc0a5b32ad7d5de753e64fe048720f783b76e89c603c1c55e1c06734520c91
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1217dd157c01a1c43f8d1f2eafc858dc7730cb63e7c08068881fa71d637b5c4a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:904ce4dc1d5cd57472f50779861a9053d20471c1dc9e146ec99c9316f40e0da7
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 180,
3
- "best_metric": 0.037015657871961594,
4
- "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-180",
5
- "epoch": 9.473684210526315,
6
  "eval_steps": 1,
7
- "global_step": 180,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1508,1206 +1508,6 @@
1508
  "eval_samples_per_second": 8.824,
1509
  "eval_steps_per_second": 1.177,
1510
  "step": 100
1511
- },
1512
- {
1513
- "epoch": 5.315789473684211,
1514
- "grad_norm": 0.8746724128723145,
1515
- "learning_rate": 0.0003552336906070838,
1516
- "loss": 0.4326,
1517
- "step": 101
1518
- },
1519
- {
1520
- "epoch": 5.315789473684211,
1521
- "eval_loss": 0.4299829304218292,
1522
- "eval_runtime": 3.3922,
1523
- "eval_samples_per_second": 8.844,
1524
- "eval_steps_per_second": 1.179,
1525
- "step": 101
1526
- },
1527
- {
1528
- "epoch": 5.368421052631579,
1529
- "grad_norm": 0.8440446257591248,
1530
- "learning_rate": 0.000352192771017753,
1531
- "loss": 0.4344,
1532
- "step": 102
1533
- },
1534
- {
1535
- "epoch": 5.368421052631579,
1536
- "eval_loss": 0.4243197739124298,
1537
- "eval_runtime": 3.3935,
1538
- "eval_samples_per_second": 8.84,
1539
- "eval_steps_per_second": 1.179,
1540
- "step": 102
1541
- },
1542
- {
1543
- "epoch": 5.421052631578947,
1544
- "grad_norm": 0.7834837436676025,
1545
- "learning_rate": 0.0003491335881925407,
1546
- "loss": 0.4662,
1547
- "step": 103
1548
- },
1549
- {
1550
- "epoch": 5.421052631578947,
1551
- "eval_loss": 0.42057812213897705,
1552
- "eval_runtime": 3.3965,
1553
- "eval_samples_per_second": 8.833,
1554
- "eval_steps_per_second": 1.178,
1555
- "step": 103
1556
- },
1557
- {
1558
- "epoch": 5.473684210526316,
1559
- "grad_norm": 0.7472103834152222,
1560
- "learning_rate": 0.0003460566888489593,
1561
- "loss": 0.418,
1562
- "step": 104
1563
- },
1564
- {
1565
- "epoch": 5.473684210526316,
1566
- "eval_loss": 0.4140828549861908,
1567
- "eval_runtime": 3.3928,
1568
- "eval_samples_per_second": 8.842,
1569
- "eval_steps_per_second": 1.179,
1570
- "step": 104
1571
- },
1572
- {
1573
- "epoch": 5.526315789473684,
1574
- "grad_norm": 0.8624552488327026,
1575
- "learning_rate": 0.00034296262287070335,
1576
- "loss": 0.3972,
1577
- "step": 105
1578
- },
1579
- {
1580
- "epoch": 5.526315789473684,
1581
- "eval_loss": 0.4123520851135254,
1582
- "eval_runtime": 3.3971,
1583
- "eval_samples_per_second": 8.831,
1584
- "eval_steps_per_second": 1.177,
1585
- "step": 105
1586
- },
1587
- {
1588
- "epoch": 5.578947368421053,
1589
- "grad_norm": 0.8890901207923889,
1590
- "learning_rate": 0.0003398519432093782,
1591
- "loss": 0.4657,
1592
- "step": 106
1593
- },
1594
- {
1595
- "epoch": 5.578947368421053,
1596
- "eval_loss": 0.40413472056388855,
1597
- "eval_runtime": 3.3959,
1598
- "eval_samples_per_second": 8.834,
1599
- "eval_steps_per_second": 1.178,
1600
- "step": 106
1601
- },
1602
- {
1603
- "epoch": 5.631578947368421,
1604
- "grad_norm": 0.7559741139411926,
1605
- "learning_rate": 0.0003367252057856802,
1606
- "loss": 0.4583,
1607
- "step": 107
1608
- },
1609
- {
1610
- "epoch": 5.631578947368421,
1611
- "eval_loss": 0.3957214951515198,
1612
- "eval_runtime": 3.3997,
1613
- "eval_samples_per_second": 8.824,
1614
- "eval_steps_per_second": 1.177,
1615
- "step": 107
1616
- },
1617
- {
1618
- "epoch": 5.684210526315789,
1619
- "grad_norm": 0.7100098729133606,
1620
- "learning_rate": 0.00033358296939004547,
1621
- "loss": 0.4226,
1622
- "step": 108
1623
- },
1624
- {
1625
- "epoch": 5.684210526315789,
1626
- "eval_loss": 0.3925686478614807,
1627
- "eval_runtime": 3.3977,
1628
- "eval_samples_per_second": 8.83,
1629
- "eval_steps_per_second": 1.177,
1630
- "step": 108
1631
- },
1632
- {
1633
- "epoch": 5.7368421052631575,
1634
- "grad_norm": 0.693897545337677,
1635
- "learning_rate": 0.00033042579558278717,
1636
- "loss": 0.4317,
1637
- "step": 109
1638
- },
1639
- {
1640
- "epoch": 5.7368421052631575,
1641
- "eval_loss": 0.38951781392097473,
1642
- "eval_runtime": 3.3988,
1643
- "eval_samples_per_second": 8.827,
1644
- "eval_steps_per_second": 1.177,
1645
- "step": 109
1646
- },
1647
- {
1648
- "epoch": 5.7894736842105265,
1649
- "grad_norm": 0.8033037781715393,
1650
- "learning_rate": 0.00032725424859373687,
1651
- "loss": 0.4543,
1652
- "step": 110
1653
- },
1654
- {
1655
- "epoch": 5.7894736842105265,
1656
- "eval_loss": 0.38237908482551575,
1657
- "eval_runtime": 3.3962,
1658
- "eval_samples_per_second": 8.833,
1659
- "eval_steps_per_second": 1.178,
1660
- "step": 110
1661
- },
1662
- {
1663
- "epoch": 5.842105263157895,
1664
- "grad_norm": 0.8621124625205994,
1665
- "learning_rate": 0.0003240688952214085,
1666
- "loss": 0.4746,
1667
- "step": 111
1668
- },
1669
- {
1670
- "epoch": 5.842105263157895,
1671
- "eval_loss": 0.373757928609848,
1672
- "eval_runtime": 3.3971,
1673
- "eval_samples_per_second": 8.831,
1674
- "eval_steps_per_second": 1.177,
1675
- "step": 111
1676
- },
1677
- {
1678
- "epoch": 5.894736842105263,
1679
- "grad_norm": 0.8101131319999695,
1680
- "learning_rate": 0.00032087030473170445,
1681
- "loss": 0.3917,
1682
- "step": 112
1683
- },
1684
- {
1685
- "epoch": 5.894736842105263,
1686
- "eval_loss": 0.3614272177219391,
1687
- "eval_runtime": 3.39,
1688
- "eval_samples_per_second": 8.85,
1689
- "eval_steps_per_second": 1.18,
1690
- "step": 112
1691
- },
1692
- {
1693
- "epoch": 5.947368421052632,
1694
- "grad_norm": 0.6941331624984741,
1695
- "learning_rate": 0.00031765904875617973,
1696
- "loss": 0.3344,
1697
- "step": 113
1698
- },
1699
- {
1700
- "epoch": 5.947368421052632,
1701
- "eval_loss": 0.35045164823532104,
1702
- "eval_runtime": 3.3993,
1703
- "eval_samples_per_second": 8.825,
1704
- "eval_steps_per_second": 1.177,
1705
- "step": 113
1706
- },
1707
- {
1708
- "epoch": 6.0,
1709
- "grad_norm": 0.6586763262748718,
1710
- "learning_rate": 0.00031443570118988356,
1711
- "loss": 0.3539,
1712
- "step": 114
1713
- },
1714
- {
1715
- "epoch": 6.0,
1716
- "eval_loss": 0.34484100341796875,
1717
- "eval_runtime": 3.3948,
1718
- "eval_samples_per_second": 8.837,
1719
- "eval_steps_per_second": 1.178,
1720
- "step": 114
1721
- },
1722
- {
1723
- "epoch": 6.052631578947368,
1724
- "grad_norm": 0.7052369713783264,
1725
- "learning_rate": 0.00031120083808879663,
1726
- "loss": 0.3257,
1727
- "step": 115
1728
- },
1729
- {
1730
- "epoch": 6.052631578947368,
1731
- "eval_loss": 0.3385400176048279,
1732
- "eval_runtime": 3.4041,
1733
- "eval_samples_per_second": 8.813,
1734
- "eval_steps_per_second": 1.175,
1735
- "step": 115
1736
- },
1737
- {
1738
- "epoch": 6.105263157894737,
1739
- "grad_norm": 0.8040263056755066,
1740
- "learning_rate": 0.0003079550375668821,
1741
- "loss": 0.335,
1742
- "step": 116
1743
- },
1744
- {
1745
- "epoch": 6.105263157894737,
1746
- "eval_loss": 0.3320732116699219,
1747
- "eval_runtime": 3.4012,
1748
- "eval_samples_per_second": 8.82,
1749
- "eval_steps_per_second": 1.176,
1750
- "step": 116
1751
- },
1752
- {
1753
- "epoch": 6.157894736842105,
1754
- "grad_norm": 0.9117230772972107,
1755
- "learning_rate": 0.00030469887969276877,
1756
- "loss": 0.3133,
1757
- "step": 117
1758
- },
1759
- {
1760
- "epoch": 6.157894736842105,
1761
- "eval_loss": 0.328256756067276,
1762
- "eval_runtime": 3.4045,
1763
- "eval_samples_per_second": 8.812,
1764
- "eval_steps_per_second": 1.175,
1765
- "step": 117
1766
- },
1767
- {
1768
- "epoch": 6.2105263157894735,
1769
- "grad_norm": 0.8745028972625732,
1770
- "learning_rate": 0.00030143294638608487,
1771
- "loss": 0.2972,
1772
- "step": 118
1773
- },
1774
- {
1775
- "epoch": 6.2105263157894735,
1776
- "eval_loss": 0.3161332905292511,
1777
- "eval_runtime": 3.3982,
1778
- "eval_samples_per_second": 8.828,
1779
- "eval_steps_per_second": 1.177,
1780
- "step": 118
1781
- },
1782
- {
1783
- "epoch": 6.2631578947368425,
1784
- "grad_norm": 0.893980085849762,
1785
- "learning_rate": 0.00029815782131346137,
1786
- "loss": 0.3135,
1787
- "step": 119
1788
- },
1789
- {
1790
- "epoch": 6.2631578947368425,
1791
- "eval_loss": 0.3072938621044159,
1792
- "eval_runtime": 3.3977,
1793
- "eval_samples_per_second": 8.83,
1794
- "eval_steps_per_second": 1.177,
1795
- "step": 119
1796
- },
1797
- {
1798
- "epoch": 6.315789473684211,
1799
- "grad_norm": 0.8993279337882996,
1800
- "learning_rate": 0.0002948740897842223,
1801
- "loss": 0.2965,
1802
- "step": 120
1803
- },
1804
- {
1805
- "epoch": 6.315789473684211,
1806
- "eval_loss": 0.3108386695384979,
1807
- "eval_runtime": 3.3904,
1808
- "eval_samples_per_second": 8.849,
1809
- "eval_steps_per_second": 1.18,
1810
- "step": 120
1811
- },
1812
- {
1813
- "epoch": 6.368421052631579,
1814
- "grad_norm": 0.8741037845611572,
1815
- "learning_rate": 0.00029158233864578256,
1816
- "loss": 0.2753,
1817
- "step": 121
1818
- },
1819
- {
1820
- "epoch": 6.368421052631579,
1821
- "eval_loss": 0.31585294008255005,
1822
- "eval_runtime": 3.3843,
1823
- "eval_samples_per_second": 8.865,
1824
- "eval_steps_per_second": 1.182,
1825
- "step": 121
1826
- },
1827
- {
1828
- "epoch": 6.421052631578947,
1829
- "grad_norm": 0.8745630979537964,
1830
- "learning_rate": 0.00028828315617877,
1831
- "loss": 0.305,
1832
- "step": 122
1833
- },
1834
- {
1835
- "epoch": 6.421052631578947,
1836
- "eval_loss": 0.31079187989234924,
1837
- "eval_runtime": 3.3933,
1838
- "eval_samples_per_second": 8.841,
1839
- "eval_steps_per_second": 1.179,
1840
- "step": 122
1841
- },
1842
- {
1843
- "epoch": 6.473684210526316,
1844
- "grad_norm": 0.8834717869758606,
1845
- "learning_rate": 0.0002849771319918922,
1846
- "loss": 0.3354,
1847
- "step": 123
1848
- },
1849
- {
1850
- "epoch": 6.473684210526316,
1851
- "eval_loss": 0.30564117431640625,
1852
- "eval_runtime": 3.3954,
1853
- "eval_samples_per_second": 8.836,
1854
- "eval_steps_per_second": 1.178,
1855
- "step": 123
1856
- },
1857
- {
1858
- "epoch": 6.526315789473684,
1859
- "grad_norm": 0.8826112151145935,
1860
- "learning_rate": 0.00028166485691656423,
1861
- "loss": 0.301,
1862
- "step": 124
1863
- },
1864
- {
1865
- "epoch": 6.526315789473684,
1866
- "eval_loss": 0.2981402277946472,
1867
- "eval_runtime": 3.3997,
1868
- "eval_samples_per_second": 8.824,
1869
- "eval_steps_per_second": 1.177,
1870
- "step": 124
1871
- },
1872
- {
1873
- "epoch": 6.578947368421053,
1874
- "grad_norm": 0.7558391094207764,
1875
- "learning_rate": 0.00027834692290132053,
1876
- "loss": 0.2935,
1877
- "step": 125
1878
- },
1879
- {
1880
- "epoch": 6.578947368421053,
1881
- "eval_loss": 0.29539814591407776,
1882
- "eval_runtime": 3.3982,
1883
- "eval_samples_per_second": 8.828,
1884
- "eval_steps_per_second": 1.177,
1885
- "step": 125
1886
- },
1887
- {
1888
- "epoch": 6.631578947368421,
1889
- "grad_norm": 1.2316842079162598,
1890
- "learning_rate": 0.0002750239229060246,
1891
- "loss": 0.2295,
1892
- "step": 126
1893
- },
1894
- {
1895
- "epoch": 6.631578947368421,
1896
- "eval_loss": 0.29493311047554016,
1897
- "eval_runtime": 3.3983,
1898
- "eval_samples_per_second": 8.828,
1899
- "eval_steps_per_second": 1.177,
1900
- "step": 126
1901
- },
1902
- {
1903
- "epoch": 6.684210526315789,
1904
- "grad_norm": 0.872908353805542,
1905
- "learning_rate": 0.0002716964507958994,
1906
- "loss": 0.3214,
1907
- "step": 127
1908
- },
1909
- {
1910
- "epoch": 6.684210526315789,
1911
- "eval_loss": 0.2816743552684784,
1912
- "eval_runtime": 3.3969,
1913
- "eval_samples_per_second": 8.831,
1914
- "eval_steps_per_second": 1.178,
1915
- "step": 127
1916
- },
1917
- {
1918
- "epoch": 6.7368421052631575,
1919
- "grad_norm": 1.1845930814743042,
1920
- "learning_rate": 0.0002683651012353955,
1921
- "loss": 0.3108,
1922
- "step": 128
1923
- },
1924
- {
1925
- "epoch": 6.7368421052631575,
1926
- "eval_loss": 0.27218949794769287,
1927
- "eval_runtime": 3.4046,
1928
- "eval_samples_per_second": 8.812,
1929
- "eval_steps_per_second": 1.175,
1930
- "step": 128
1931
- },
1932
- {
1933
- "epoch": 6.7894736842105265,
1934
- "grad_norm": 0.8063351511955261,
1935
- "learning_rate": 0.0002650304695819168,
1936
- "loss": 0.2863,
1937
- "step": 129
1938
- },
1939
- {
1940
- "epoch": 6.7894736842105265,
1941
- "eval_loss": 0.26498475670814514,
1942
- "eval_runtime": 3.4002,
1943
- "eval_samples_per_second": 8.823,
1944
- "eval_steps_per_second": 1.176,
1945
- "step": 129
1946
- },
1947
- {
1948
- "epoch": 6.842105263157895,
1949
- "grad_norm": 0.8428151607513428,
1950
- "learning_rate": 0.00026169315177942135,
1951
- "loss": 0.3621,
1952
- "step": 130
1953
- },
1954
- {
1955
- "epoch": 6.842105263157895,
1956
- "eval_loss": 0.26111218333244324,
1957
- "eval_runtime": 3.4007,
1958
- "eval_samples_per_second": 8.822,
1959
- "eval_steps_per_second": 1.176,
1960
- "step": 130
1961
- },
1962
- {
1963
- "epoch": 6.894736842105263,
1964
- "grad_norm": 0.7834460139274597,
1965
- "learning_rate": 0.0002583537442519187,
1966
- "loss": 0.3314,
1967
- "step": 131
1968
- },
1969
- {
1970
- "epoch": 6.894736842105263,
1971
- "eval_loss": 0.2560313940048218,
1972
- "eval_runtime": 3.3993,
1973
- "eval_samples_per_second": 8.825,
1974
- "eval_steps_per_second": 1.177,
1975
- "step": 131
1976
- },
1977
- {
1978
- "epoch": 6.947368421052632,
1979
- "grad_norm": 0.7809928059577942,
1980
- "learning_rate": 0.00025501284379688067,
1981
- "loss": 0.3259,
1982
- "step": 132
1983
- },
1984
- {
1985
- "epoch": 6.947368421052632,
1986
- "eval_loss": 0.24695177376270294,
1987
- "eval_runtime": 3.3903,
1988
- "eval_samples_per_second": 8.849,
1989
- "eval_steps_per_second": 1.18,
1990
- "step": 132
1991
- },
1992
- {
1993
- "epoch": 7.0,
1994
- "grad_norm": 0.679124116897583,
1995
- "learning_rate": 0.0002516710474785856,
1996
- "loss": 0.2897,
1997
- "step": 133
1998
- },
1999
- {
2000
- "epoch": 7.0,
2001
- "eval_loss": 0.23945002257823944,
2002
- "eval_runtime": 3.393,
2003
- "eval_samples_per_second": 8.842,
2004
- "eval_steps_per_second": 1.179,
2005
- "step": 133
2006
- },
2007
- {
2008
- "epoch": 7.052631578947368,
2009
- "grad_norm": 0.7083767056465149,
2010
- "learning_rate": 0.0002483289525214145,
2011
- "loss": 0.2112,
2012
- "step": 134
2013
- },
2014
- {
2015
- "epoch": 7.052631578947368,
2016
- "eval_loss": 0.22805434465408325,
2017
- "eval_runtime": 3.3927,
2018
- "eval_samples_per_second": 8.843,
2019
- "eval_steps_per_second": 1.179,
2020
- "step": 134
2021
- },
2022
- {
2023
- "epoch": 7.105263157894737,
2024
- "grad_norm": 0.7295684218406677,
2025
- "learning_rate": 0.00024498715620311935,
2026
- "loss": 0.1686,
2027
- "step": 135
2028
- },
2029
- {
2030
- "epoch": 7.105263157894737,
2031
- "eval_loss": 0.22213517129421234,
2032
- "eval_runtime": 3.3986,
2033
- "eval_samples_per_second": 8.827,
2034
- "eval_steps_per_second": 1.177,
2035
- "step": 135
2036
- },
2037
- {
2038
- "epoch": 7.157894736842105,
2039
- "grad_norm": 0.9701097011566162,
2040
- "learning_rate": 0.00024164625574808144,
2041
- "loss": 0.192,
2042
- "step": 136
2043
- },
2044
- {
2045
- "epoch": 7.157894736842105,
2046
- "eval_loss": 0.21580030024051666,
2047
- "eval_runtime": 3.3985,
2048
- "eval_samples_per_second": 8.827,
2049
- "eval_steps_per_second": 1.177,
2050
- "step": 136
2051
- },
2052
- {
2053
- "epoch": 7.2105263157894735,
2054
- "grad_norm": 0.9494478702545166,
2055
- "learning_rate": 0.00023830684822057877,
2056
- "loss": 0.205,
2057
- "step": 137
2058
- },
2059
- {
2060
- "epoch": 7.2105263157894735,
2061
- "eval_loss": 0.21216638386249542,
2062
- "eval_runtime": 3.4006,
2063
- "eval_samples_per_second": 8.822,
2064
- "eval_steps_per_second": 1.176,
2065
- "step": 137
2066
- },
2067
- {
2068
- "epoch": 7.2631578947368425,
2069
- "grad_norm": 0.6897704005241394,
2070
- "learning_rate": 0.00023496953041808325,
2071
- "loss": 0.1542,
2072
- "step": 138
2073
- },
2074
- {
2075
- "epoch": 7.2631578947368425,
2076
- "eval_loss": 0.21432656049728394,
2077
- "eval_runtime": 3.3968,
2078
- "eval_samples_per_second": 8.832,
2079
- "eval_steps_per_second": 1.178,
2080
- "step": 138
2081
- },
2082
- {
2083
- "epoch": 7.315789473684211,
2084
- "grad_norm": 0.7690937519073486,
2085
- "learning_rate": 0.0002316348987646045,
2086
- "loss": 0.213,
2087
- "step": 139
2088
- },
2089
- {
2090
- "epoch": 7.315789473684211,
2091
- "eval_loss": 0.21909914910793304,
2092
- "eval_runtime": 3.3981,
2093
- "eval_samples_per_second": 8.829,
2094
- "eval_steps_per_second": 1.177,
2095
- "step": 139
2096
- },
2097
- {
2098
- "epoch": 7.368421052631579,
2099
- "grad_norm": 0.9047114253044128,
2100
- "learning_rate": 0.00022830354920410064,
2101
- "loss": 0.2302,
2102
- "step": 140
2103
- },
2104
- {
2105
- "epoch": 7.368421052631579,
2106
- "eval_loss": 0.2153581976890564,
2107
- "eval_runtime": 3.399,
2108
- "eval_samples_per_second": 8.826,
2109
- "eval_steps_per_second": 1.177,
2110
- "step": 140
2111
- },
2112
- {
2113
- "epoch": 7.421052631578947,
2114
- "grad_norm": 0.7724714279174805,
2115
- "learning_rate": 0.0002249760770939754,
2116
- "loss": 0.1825,
2117
- "step": 141
2118
- },
2119
- {
2120
- "epoch": 7.421052631578947,
2121
- "eval_loss": 0.20969410240650177,
2122
- "eval_runtime": 3.3776,
2123
- "eval_samples_per_second": 8.882,
2124
- "eval_steps_per_second": 1.184,
2125
- "step": 141
2126
- },
2127
- {
2128
- "epoch": 7.473684210526316,
2129
- "grad_norm": 0.7683383822441101,
2130
- "learning_rate": 0.0002216530770986795,
2131
- "loss": 0.1793,
2132
- "step": 142
2133
- },
2134
- {
2135
- "epoch": 7.473684210526316,
2136
- "eval_loss": 0.204229936003685,
2137
- "eval_runtime": 3.398,
2138
- "eval_samples_per_second": 8.829,
2139
- "eval_steps_per_second": 1.177,
2140
- "step": 142
2141
- },
2142
- {
2143
- "epoch": 7.526315789473684,
2144
- "grad_norm": 0.8928307890892029,
2145
- "learning_rate": 0.0002183351430834358,
2146
- "loss": 0.2218,
2147
- "step": 143
2148
- },
2149
- {
2150
- "epoch": 7.526315789473684,
2151
- "eval_loss": 0.197996586561203,
2152
- "eval_runtime": 3.3992,
2153
- "eval_samples_per_second": 8.826,
2154
- "eval_steps_per_second": 1.177,
2155
- "step": 143
2156
- },
2157
- {
2158
- "epoch": 7.578947368421053,
2159
- "grad_norm": 0.683783769607544,
2160
- "learning_rate": 0.0002150228680081079,
2161
- "loss": 0.1496,
2162
- "step": 144
2163
- },
2164
- {
2165
- "epoch": 7.578947368421053,
2166
- "eval_loss": 0.19135157763957977,
2167
- "eval_runtime": 3.4077,
2168
- "eval_samples_per_second": 8.804,
2169
- "eval_steps_per_second": 1.174,
2170
- "step": 144
2171
- },
2172
- {
2173
- "epoch": 7.631578947368421,
2174
- "grad_norm": 0.7701078653335571,
2175
- "learning_rate": 0.00021171684382123,
2176
- "loss": 0.2014,
2177
- "step": 145
2178
- },
2179
- {
2180
- "epoch": 7.631578947368421,
2181
- "eval_loss": 0.1854608803987503,
2182
- "eval_runtime": 3.4029,
2183
- "eval_samples_per_second": 8.816,
2184
- "eval_steps_per_second": 1.175,
2185
- "step": 145
2186
- },
2187
- {
2188
- "epoch": 7.684210526315789,
2189
- "grad_norm": 0.9109010696411133,
2190
- "learning_rate": 0.0002084176613542175,
2191
- "loss": 0.191,
2192
- "step": 146
2193
- },
2194
- {
2195
- "epoch": 7.684210526315789,
2196
- "eval_loss": 0.1755831390619278,
2197
- "eval_runtime": 3.4062,
2198
- "eval_samples_per_second": 8.807,
2199
- "eval_steps_per_second": 1.174,
2200
- "step": 146
2201
- },
2202
- {
2203
- "epoch": 7.7368421052631575,
2204
- "grad_norm": 0.9243440628051758,
2205
- "learning_rate": 0.00020512591021577773,
2206
- "loss": 0.1477,
2207
- "step": 147
2208
- },
2209
- {
2210
- "epoch": 7.7368421052631575,
2211
- "eval_loss": 0.17130498588085175,
2212
- "eval_runtime": 3.3849,
2213
- "eval_samples_per_second": 8.863,
2214
- "eval_steps_per_second": 1.182,
2215
- "step": 147
2216
- },
2217
- {
2218
- "epoch": 7.7894736842105265,
2219
- "grad_norm": 0.6701480746269226,
2220
- "learning_rate": 0.00020184217868653867,
2221
- "loss": 0.1978,
2222
- "step": 148
2223
- },
2224
- {
2225
- "epoch": 7.7894736842105265,
2226
- "eval_loss": 0.16958914697170258,
2227
- "eval_runtime": 3.3829,
2228
- "eval_samples_per_second": 8.868,
2229
- "eval_steps_per_second": 1.182,
2230
- "step": 148
2231
- },
2232
- {
2233
- "epoch": 7.842105263157895,
2234
- "grad_norm": 0.6767657399177551,
2235
- "learning_rate": 0.0001985670536139151,
2236
- "loss": 0.2179,
2237
- "step": 149
2238
- },
2239
- {
2240
- "epoch": 7.842105263157895,
2241
- "eval_loss": 0.16378562152385712,
2242
- "eval_runtime": 3.3828,
2243
- "eval_samples_per_second": 8.868,
2244
- "eval_steps_per_second": 1.182,
2245
- "step": 149
2246
- },
2247
- {
2248
- "epoch": 7.894736842105263,
2249
- "grad_norm": 0.6448670625686646,
2250
- "learning_rate": 0.0001953011203072312,
2251
- "loss": 0.2025,
2252
- "step": 150
2253
- },
2254
- {
2255
- "epoch": 7.894736842105263,
2256
- "eval_loss": 0.15805380046367645,
2257
- "eval_runtime": 3.3987,
2258
- "eval_samples_per_second": 8.827,
2259
- "eval_steps_per_second": 1.177,
2260
- "step": 150
2261
- },
2262
- {
2263
- "epoch": 7.947368421052632,
2264
- "grad_norm": 0.87026047706604,
2265
- "learning_rate": 0.00019204496243311792,
2266
- "loss": 0.2653,
2267
- "step": 151
2268
- },
2269
- {
2270
- "epoch": 7.947368421052632,
2271
- "eval_loss": 0.14828962087631226,
2272
- "eval_runtime": 3.3946,
2273
- "eval_samples_per_second": 8.838,
2274
- "eval_steps_per_second": 1.178,
2275
- "step": 151
2276
- },
2277
- {
2278
- "epoch": 8.0,
2279
- "grad_norm": 0.746687114238739,
2280
- "learning_rate": 0.00018879916191120349,
2281
- "loss": 0.2052,
2282
- "step": 152
2283
- },
2284
- {
2285
- "epoch": 8.0,
2286
- "eval_loss": 0.1411527693271637,
2287
- "eval_runtime": 3.3931,
2288
- "eval_samples_per_second": 8.841,
2289
- "eval_steps_per_second": 1.179,
2290
- "step": 152
2291
- },
2292
- {
2293
- "epoch": 8.052631578947368,
2294
- "grad_norm": 0.47239571809768677,
2295
- "learning_rate": 0.00018556429881011656,
2296
- "loss": 0.1007,
2297
- "step": 153
2298
- },
2299
- {
2300
- "epoch": 8.052631578947368,
2301
- "eval_loss": 0.13516879081726074,
2302
- "eval_runtime": 3.3993,
2303
- "eval_samples_per_second": 8.825,
2304
- "eval_steps_per_second": 1.177,
2305
- "step": 153
2306
- },
2307
- {
2308
- "epoch": 8.105263157894736,
2309
- "grad_norm": 0.8439627289772034,
2310
- "learning_rate": 0.0001823409512438203,
2311
- "loss": 0.1246,
2312
- "step": 154
2313
- },
2314
- {
2315
- "epoch": 8.105263157894736,
2316
- "eval_loss": 0.13015992939472198,
2317
- "eval_runtime": 3.3942,
2318
- "eval_samples_per_second": 8.839,
2319
- "eval_steps_per_second": 1.178,
2320
- "step": 154
2321
- },
2322
- {
2323
- "epoch": 8.157894736842104,
2324
- "grad_norm": 0.6233652830123901,
2325
- "learning_rate": 0.00017912969526829559,
2326
- "loss": 0.0809,
2327
- "step": 155
2328
- },
2329
- {
2330
- "epoch": 8.157894736842104,
2331
- "eval_loss": 0.12686298787593842,
2332
- "eval_runtime": 3.3971,
2333
- "eval_samples_per_second": 8.831,
2334
- "eval_steps_per_second": 1.177,
2335
- "step": 155
2336
- },
2337
- {
2338
- "epoch": 8.210526315789474,
2339
- "grad_norm": 0.7331376075744629,
2340
- "learning_rate": 0.00017593110477859153,
2341
- "loss": 0.0948,
2342
- "step": 156
2343
- },
2344
- {
2345
- "epoch": 8.210526315789474,
2346
- "eval_loss": 0.12066776305437088,
2347
- "eval_runtime": 3.3964,
2348
- "eval_samples_per_second": 8.833,
2349
- "eval_steps_per_second": 1.178,
2350
- "step": 156
2351
- },
2352
- {
2353
- "epoch": 8.263157894736842,
2354
- "grad_norm": 0.7566715478897095,
2355
- "learning_rate": 0.00017274575140626317,
2356
- "loss": 0.1052,
2357
- "step": 157
2358
- },
2359
- {
2360
- "epoch": 8.263157894736842,
2361
- "eval_loss": 0.1153416633605957,
2362
- "eval_runtime": 3.3936,
2363
- "eval_samples_per_second": 8.84,
2364
- "eval_steps_per_second": 1.179,
2365
- "step": 157
2366
- },
2367
- {
2368
- "epoch": 8.31578947368421,
2369
- "grad_norm": 0.5211192965507507,
2370
- "learning_rate": 0.00016957420441721284,
2371
- "loss": 0.0584,
2372
- "step": 158
2373
- },
2374
- {
2375
- "epoch": 8.31578947368421,
2376
- "eval_loss": 0.10957438498735428,
2377
- "eval_runtime": 3.3951,
2378
- "eval_samples_per_second": 8.836,
2379
- "eval_steps_per_second": 1.178,
2380
- "step": 158
2381
- },
2382
- {
2383
- "epoch": 8.368421052631579,
2384
- "grad_norm": 0.7941140532493591,
2385
- "learning_rate": 0.00016641703060995457,
2386
- "loss": 0.1393,
2387
- "step": 159
2388
- },
2389
- {
2390
- "epoch": 8.368421052631579,
2391
- "eval_loss": 0.1009925901889801,
2392
- "eval_runtime": 3.3908,
2393
- "eval_samples_per_second": 8.847,
2394
- "eval_steps_per_second": 1.18,
2395
- "step": 159
2396
- },
2397
- {
2398
- "epoch": 8.421052631578947,
2399
- "grad_norm": 0.7772736549377441,
2400
- "learning_rate": 0.00016327479421431983,
2401
- "loss": 0.1284,
2402
- "step": 160
2403
- },
2404
- {
2405
- "epoch": 8.421052631578947,
2406
- "eval_loss": 0.094593845307827,
2407
- "eval_runtime": 3.3985,
2408
- "eval_samples_per_second": 8.827,
2409
- "eval_steps_per_second": 1.177,
2410
- "step": 160
2411
- },
2412
- {
2413
- "epoch": 8.473684210526315,
2414
- "grad_norm": 0.8724604845046997,
2415
- "learning_rate": 0.00016014805679062183,
2416
- "loss": 0.1518,
2417
- "step": 161
2418
- },
2419
- {
2420
- "epoch": 8.473684210526315,
2421
- "eval_loss": 0.0894516333937645,
2422
- "eval_runtime": 3.3955,
2423
- "eval_samples_per_second": 8.835,
2424
- "eval_steps_per_second": 1.178,
2425
- "step": 161
2426
- },
2427
- {
2428
- "epoch": 8.526315789473685,
2429
- "grad_norm": 0.7179498672485352,
2430
- "learning_rate": 0.0001570373771292967,
2431
- "loss": 0.1107,
2432
- "step": 162
2433
- },
2434
- {
2435
- "epoch": 8.526315789473685,
2436
- "eval_loss": 0.0845918357372284,
2437
- "eval_runtime": 3.4033,
2438
- "eval_samples_per_second": 8.815,
2439
- "eval_steps_per_second": 1.175,
2440
- "step": 162
2441
- },
2442
- {
2443
- "epoch": 8.578947368421053,
2444
- "grad_norm": 0.6780802607536316,
2445
- "learning_rate": 0.00015394331115104075,
2446
- "loss": 0.0997,
2447
- "step": 163
2448
- },
2449
- {
2450
- "epoch": 8.578947368421053,
2451
- "eval_loss": 0.08240295946598053,
2452
- "eval_runtime": 3.402,
2453
- "eval_samples_per_second": 8.818,
2454
- "eval_steps_per_second": 1.176,
2455
- "step": 163
2456
- },
2457
- {
2458
- "epoch": 8.631578947368421,
2459
- "grad_norm": 0.6817135810852051,
2460
- "learning_rate": 0.00015086641180745932,
2461
- "loss": 0.1156,
2462
- "step": 164
2463
- },
2464
- {
2465
- "epoch": 8.631578947368421,
2466
- "eval_loss": 0.07952894270420074,
2467
- "eval_runtime": 3.4067,
2468
- "eval_samples_per_second": 8.806,
2469
- "eval_steps_per_second": 1.174,
2470
- "step": 164
2471
- },
2472
- {
2473
- "epoch": 8.68421052631579,
2474
- "grad_norm": 0.7739869356155396,
2475
- "learning_rate": 0.00014780722898224708,
2476
- "loss": 0.1247,
2477
- "step": 165
2478
- },
2479
- {
2480
- "epoch": 8.68421052631579,
2481
- "eval_loss": 0.07561580091714859,
2482
- "eval_runtime": 3.4045,
2483
- "eval_samples_per_second": 8.812,
2484
- "eval_steps_per_second": 1.175,
2485
- "step": 165
2486
- },
2487
- {
2488
- "epoch": 8.736842105263158,
2489
- "grad_norm": 0.8046780228614807,
2490
- "learning_rate": 0.0001447663093929163,
2491
- "loss": 0.1085,
2492
- "step": 166
2493
- },
2494
- {
2495
- "epoch": 8.736842105263158,
2496
- "eval_loss": 0.07319317758083344,
2497
- "eval_runtime": 3.3994,
2498
- "eval_samples_per_second": 8.825,
2499
- "eval_steps_per_second": 1.177,
2500
- "step": 166
2501
- },
2502
- {
2503
- "epoch": 8.789473684210526,
2504
- "grad_norm": 0.6103046536445618,
2505
- "learning_rate": 0.00014174419649309089,
2506
- "loss": 0.0832,
2507
- "step": 167
2508
- },
2509
- {
2510
- "epoch": 8.789473684210526,
2511
- "eval_loss": 0.07252493500709534,
2512
- "eval_runtime": 3.4001,
2513
- "eval_samples_per_second": 8.823,
2514
- "eval_steps_per_second": 1.176,
2515
- "step": 167
2516
- },
2517
- {
2518
- "epoch": 8.842105263157894,
2519
- "grad_norm": 0.6907472610473633,
2520
- "learning_rate": 0.00013874143037538418,
2521
- "loss": 0.1031,
2522
- "step": 168
2523
- },
2524
- {
2525
- "epoch": 8.842105263157894,
2526
- "eval_loss": 0.07177206873893738,
2527
- "eval_runtime": 3.4044,
2528
- "eval_samples_per_second": 8.812,
2529
- "eval_steps_per_second": 1.175,
2530
- "step": 168
2531
- },
2532
- {
2533
- "epoch": 8.894736842105264,
2534
- "grad_norm": 0.6837093830108643,
2535
- "learning_rate": 0.0001357585476748766,
2536
- "loss": 0.1074,
2537
- "step": 169
2538
- },
2539
- {
2540
- "epoch": 8.894736842105264,
2541
- "eval_loss": 0.06924725323915482,
2542
- "eval_runtime": 3.4019,
2543
- "eval_samples_per_second": 8.819,
2544
- "eval_steps_per_second": 1.176,
2545
- "step": 169
2546
- },
2547
- {
2548
- "epoch": 8.947368421052632,
2549
- "grad_norm": 0.5226811766624451,
2550
- "learning_rate": 0.00013279608147321223,
2551
- "loss": 0.0467,
2552
- "step": 170
2553
- },
2554
- {
2555
- "epoch": 8.947368421052632,
2556
- "eval_loss": 0.06760647892951965,
2557
- "eval_runtime": 3.3966,
2558
- "eval_samples_per_second": 8.832,
2559
- "eval_steps_per_second": 1.178,
2560
- "step": 170
2561
- },
2562
- {
2563
- "epoch": 9.0,
2564
- "grad_norm": 0.8332634568214417,
2565
- "learning_rate": 0.00012985456120332905,
2566
- "loss": 0.1137,
2567
- "step": 171
2568
- },
2569
- {
2570
- "epoch": 9.0,
2571
- "eval_loss": 0.06686952710151672,
2572
- "eval_runtime": 3.3907,
2573
- "eval_samples_per_second": 8.848,
2574
- "eval_steps_per_second": 1.18,
2575
- "step": 171
2576
- },
2577
- {
2578
- "epoch": 9.052631578947368,
2579
- "grad_norm": 0.4633868932723999,
2580
- "learning_rate": 0.00012693451255484312,
2581
- "loss": 0.0353,
2582
- "step": 172
2583
- },
2584
- {
2585
- "epoch": 9.052631578947368,
2586
- "eval_loss": 0.06244245544075966,
2587
- "eval_runtime": 3.3982,
2588
- "eval_samples_per_second": 8.828,
2589
- "eval_steps_per_second": 1.177,
2590
- "step": 172
2591
- },
2592
- {
2593
- "epoch": 9.105263157894736,
2594
- "grad_norm": 0.7089731693267822,
2595
- "learning_rate": 0.00012403645738009997,
2596
- "loss": 0.059,
2597
- "step": 173
2598
- },
2599
- {
2600
- "epoch": 9.105263157894736,
2601
- "eval_loss": 0.05555792525410652,
2602
- "eval_runtime": 3.3985,
2603
- "eval_samples_per_second": 8.828,
2604
- "eval_steps_per_second": 1.177,
2605
- "step": 173
2606
- },
2607
- {
2608
- "epoch": 9.157894736842104,
2609
- "grad_norm": 0.570846438407898,
2610
- "learning_rate": 0.00012116091360091261,
2611
- "loss": 0.0545,
2612
- "step": 174
2613
- },
2614
- {
2615
- "epoch": 9.157894736842104,
2616
- "eval_loss": 0.052096955478191376,
2617
- "eval_runtime": 3.3964,
2618
- "eval_samples_per_second": 8.833,
2619
- "eval_steps_per_second": 1.178,
2620
- "step": 174
2621
- },
2622
- {
2623
- "epoch": 9.210526315789474,
2624
- "grad_norm": 0.4484975337982178,
2625
- "learning_rate": 0.00011830839511600211,
2626
- "loss": 0.0334,
2627
- "step": 175
2628
- },
2629
- {
2630
- "epoch": 9.210526315789474,
2631
- "eval_loss": 0.05176297202706337,
2632
- "eval_runtime": 3.4033,
2633
- "eval_samples_per_second": 8.815,
2634
- "eval_steps_per_second": 1.175,
2635
- "step": 175
2636
- },
2637
- {
2638
- "epoch": 9.263157894736842,
2639
- "grad_norm": 0.677650511264801,
2640
- "learning_rate": 0.00011547941170915685,
2641
- "loss": 0.0503,
2642
- "step": 176
2643
- },
2644
- {
2645
- "epoch": 9.263157894736842,
2646
- "eval_loss": 0.05027133598923683,
2647
- "eval_runtime": 3.4064,
2648
- "eval_samples_per_second": 8.807,
2649
- "eval_steps_per_second": 1.174,
2650
- "step": 176
2651
- },
2652
- {
2653
- "epoch": 9.31578947368421,
2654
- "grad_norm": 0.5817425847053528,
2655
- "learning_rate": 0.00011267446895812702,
2656
- "loss": 0.0293,
2657
- "step": 177
2658
- },
2659
- {
2660
- "epoch": 9.31578947368421,
2661
- "eval_loss": 0.049430813640356064,
2662
- "eval_runtime": 3.4086,
2663
- "eval_samples_per_second": 8.801,
2664
- "eval_steps_per_second": 1.174,
2665
- "step": 177
2666
- },
2667
- {
2668
- "epoch": 9.368421052631579,
2669
- "grad_norm": 0.970379114151001,
2670
- "learning_rate": 0.0001098940681442713,
2671
- "loss": 0.0679,
2672
- "step": 178
2673
- },
2674
- {
2675
- "epoch": 9.368421052631579,
2676
- "eval_loss": 0.04337286949157715,
2677
- "eval_runtime": 3.4,
2678
- "eval_samples_per_second": 8.824,
2679
- "eval_steps_per_second": 1.176,
2680
- "step": 178
2681
- },
2682
- {
2683
- "epoch": 9.421052631578947,
2684
- "grad_norm": 0.4084687829017639,
2685
- "learning_rate": 0.00010713870616297092,
2686
- "loss": 0.0262,
2687
- "step": 179
2688
- },
2689
- {
2690
- "epoch": 9.421052631578947,
2691
- "eval_loss": 0.03992774710059166,
2692
- "eval_runtime": 3.4026,
2693
- "eval_samples_per_second": 8.817,
2694
- "eval_steps_per_second": 1.176,
2695
- "step": 179
2696
- },
2697
- {
2698
- "epoch": 9.473684210526315,
2699
- "grad_norm": 0.650490939617157,
2700
- "learning_rate": 0.00010440887543482746,
2701
- "loss": 0.0407,
2702
- "step": 180
2703
- },
2704
- {
2705
- "epoch": 9.473684210526315,
2706
- "eval_loss": 0.037015657871961594,
2707
- "eval_runtime": 3.4003,
2708
- "eval_samples_per_second": 8.823,
2709
- "eval_steps_per_second": 1.176,
2710
- "step": 180
2711
  }
2712
  ],
2713
  "logging_steps": 1,
@@ -2727,7 +1527,7 @@
2727
  "attributes": {}
2728
  }
2729
  },
2730
- "total_flos": 7311440876433408.0,
2731
  "train_batch_size": 1,
2732
  "trial_name": null,
2733
  "trial_params": null
 
1
  {
2
+ "best_global_step": 99,
3
+ "best_metric": 0.43201857805252075,
4
+ "best_model_checkpoint": "/content/drive/MyDrive/lora_model/outputs/task15_microsoft/Phi-4-mini-instruct/checkpoint-90",
5
+ "epoch": 5.2631578947368425,
6
  "eval_steps": 1,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1508
  "eval_samples_per_second": 8.824,
1509
  "eval_steps_per_second": 1.177,
1510
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1511
  }
1512
  ],
1513
  "logging_steps": 1,
 
1527
  "attributes": {}
1528
  }
1529
  },
1530
+ "total_flos": 4061879153080320.0,
1531
  "train_batch_size": 1,
1532
  "trial_name": null,
1533
  "trial_params": null