FormlessAI commited on
Commit
2d8ba74
·
verified ·
1 Parent(s): 078d1d9

Training in progress, epoch 5, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9b3d5ef8dbdcdd390a28e8037a9c883e8a9e8cc1df2bfd56b85381a25d34c5d
3
  size 447329696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d36bb55f9ac1df2f0d25ce2eeb81398e77802712a9578fe7f76ff563ea24649
3
  size 447329696
last-checkpoint/global_step696/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c7162362fba71e07486a810c54ff80de80f30a28d703d942d1b997fc7cb8f8
3
+ size 337225765
last-checkpoint/global_step696/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:443d6b0cb79a60396548ba9571603afc505a2e142118cf17253eb8cca82fab00
3
+ size 337225893
last-checkpoint/global_step696/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c2721131a9b37b14e647c7842fa21c9aa750d5900edff2f7a8789d446cfd01
3
+ size 337225893
last-checkpoint/global_step696/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef5b68eb08ee3435b3171510705fb9555af98adb337cee17a512add33cdb6165
3
+ size 337225893
last-checkpoint/global_step696/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019144f749e09b782b7717eb00845b206f2fa622efbcf032ef99a7cdb1d5e08b
3
+ size 2282578149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step497
 
1
+ global_step696
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f34c4cbedf904399f4af645d3a28b5df479b137f15e45ec6f64752366a021c2a
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9eb01ae7bebb6b2a69fa94159506146b6a17d03c942454e369b782eddbb3636
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b258dea490f3c111fd5c3c5d465fe0d908ad66054e60aeaedacb677d1550c2d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:298868c6850dcaf75951547384b919258d47c4f21a1ad448e8ee66c8bb469ab8
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a7cca18d789646595df1226a296f57b919871ef7b43fbc9c48167cc9298caf0
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a2670a19da7fa45f48be9e5fd15b26c73f5d6e0697c656b72c25cc7434cdf5
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:974d503ff1933efc14c242d9a07591e593c3b2292a817c5631a35c3194ee17f5
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de3342513d449cf4f76bef1f9580a412a0169f63933fafeee84531d02a02cbab
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f9cdb2030c3839622af843b226922b0fa7518a706f7fba0868523f0d6419afe
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:272c412581b4c5ddc60c3fc9367c287dd2275d591e95dd9031ee094fa9346284
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.014533035457134247,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.5745062836624775,
6
  "eval_steps": 50,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1668,6 +1668,670 @@
1668
  "eval_samples_per_second": 31.219,
1669
  "eval_steps_per_second": 1.988,
1670
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1671
  }
1672
  ],
1673
  "logging_steps": 5,
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.009149392135441303,
4
  "best_model_checkpoint": null,
5
+ "epoch": 5.0,
6
  "eval_steps": 50,
7
+ "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1668
  "eval_samples_per_second": 31.219,
1669
  "eval_steps_per_second": 1.988,
1670
  "step": 500
1671
+ },
1672
+ {
1673
+ "epoch": 3.6104129263913824,
1674
+ "grad_norm": 0.02227591536939144,
1675
+ "learning_rate": 0.00011329185086309797,
1676
+ "logits/chosen": -5.440625190734863,
1677
+ "logits/rejected": -6.892187595367432,
1678
+ "logps/chosen": -263.04998779296875,
1679
+ "logps/rejected": -438.5,
1680
+ "loss": 0.0284,
1681
+ "rewards/accuracies": 0.9906250238418579,
1682
+ "rewards/chosen": -19.5625,
1683
+ "rewards/margins": 16.524999618530273,
1684
+ "rewards/rejected": -36.099998474121094,
1685
+ "step": 505
1686
+ },
1687
+ {
1688
+ "epoch": 3.646319569120287,
1689
+ "grad_norm": 0.263701468706131,
1690
+ "learning_rate": 0.00011328024147954178,
1691
+ "logits/chosen": -6.275000095367432,
1692
+ "logits/rejected": -7.809374809265137,
1693
+ "logps/chosen": -318.5,
1694
+ "logps/rejected": -494.5,
1695
+ "loss": 0.0307,
1696
+ "rewards/accuracies": 0.987500011920929,
1697
+ "rewards/chosen": -25.049999237060547,
1698
+ "rewards/margins": 17.087499618530273,
1699
+ "rewards/rejected": -42.13750076293945,
1700
+ "step": 510
1701
+ },
1702
+ {
1703
+ "epoch": 3.682226211849192,
1704
+ "grad_norm": 0.08680078387260437,
1705
+ "learning_rate": 0.00011326849010673409,
1706
+ "logits/chosen": -7.175000190734863,
1707
+ "logits/rejected": -8.606249809265137,
1708
+ "logps/chosen": -344.70001220703125,
1709
+ "logps/rejected": -576.4000244140625,
1710
+ "loss": 0.0187,
1711
+ "rewards/accuracies": 0.9937499761581421,
1712
+ "rewards/chosen": -27.662500381469727,
1713
+ "rewards/margins": 22.403125762939453,
1714
+ "rewards/rejected": -50.0625,
1715
+ "step": 515
1716
+ },
1717
+ {
1718
+ "epoch": 3.718132854578097,
1719
+ "grad_norm": 0.3637928068637848,
1720
+ "learning_rate": 0.00011325659677425894,
1721
+ "logits/chosen": -6.360937595367432,
1722
+ "logits/rejected": -8.0078125,
1723
+ "logps/chosen": -321.70001220703125,
1724
+ "logps/rejected": -571.2999877929688,
1725
+ "loss": 0.0563,
1726
+ "rewards/accuracies": 0.987500011920929,
1727
+ "rewards/chosen": -25.375,
1728
+ "rewards/margins": 24.231250762939453,
1729
+ "rewards/rejected": -49.5625,
1730
+ "step": 520
1731
+ },
1732
+ {
1733
+ "epoch": 3.754039497307002,
1734
+ "grad_norm": 0.06884779036045074,
1735
+ "learning_rate": 0.00011324456151205779,
1736
+ "logits/chosen": -5.998437404632568,
1737
+ "logits/rejected": -7.151562690734863,
1738
+ "logps/chosen": -310.5,
1739
+ "logps/rejected": -512.7999877929688,
1740
+ "loss": 0.0412,
1741
+ "rewards/accuracies": 0.984375,
1742
+ "rewards/chosen": -24.200000762939453,
1743
+ "rewards/margins": 19.4375,
1744
+ "rewards/rejected": -43.63750076293945,
1745
+ "step": 525
1746
+ },
1747
+ {
1748
+ "epoch": 3.789946140035907,
1749
+ "grad_norm": 0.19923055171966553,
1750
+ "learning_rate": 0.0001132323843504294,
1751
+ "logits/chosen": -6.621874809265137,
1752
+ "logits/rejected": -7.451562404632568,
1753
+ "logps/chosen": -316.8999938964844,
1754
+ "logps/rejected": -483.79998779296875,
1755
+ "loss": 0.0418,
1756
+ "rewards/accuracies": 0.987500011920929,
1757
+ "rewards/chosen": -24.6875,
1758
+ "rewards/margins": 16.034374237060547,
1759
+ "rewards/rejected": -40.724998474121094,
1760
+ "step": 530
1761
+ },
1762
+ {
1763
+ "epoch": 3.8258527827648114,
1764
+ "grad_norm": 0.025405047461390495,
1765
+ "learning_rate": 0.00011322006532002976,
1766
+ "logits/chosen": -7.5390625,
1767
+ "logits/rejected": -8.484375,
1768
+ "logps/chosen": -325.1000061035156,
1769
+ "logps/rejected": -495.0,
1770
+ "loss": 0.0575,
1771
+ "rewards/accuracies": 0.9781249761581421,
1772
+ "rewards/chosen": -25.862499237060547,
1773
+ "rewards/margins": 16.359375,
1774
+ "rewards/rejected": -42.224998474121094,
1775
+ "step": 535
1776
+ },
1777
+ {
1778
+ "epoch": 3.8617594254937164,
1779
+ "grad_norm": 0.7414963841438293,
1780
+ "learning_rate": 0.00011320760445187202,
1781
+ "logits/chosen": -8.795312881469727,
1782
+ "logits/rejected": -10.037500381469727,
1783
+ "logps/chosen": -355.8999938964844,
1784
+ "logps/rejected": -577.0,
1785
+ "loss": 0.0196,
1786
+ "rewards/accuracies": 0.9906250238418579,
1787
+ "rewards/chosen": -28.868749618530273,
1788
+ "rewards/margins": 21.512500762939453,
1789
+ "rewards/rejected": -50.38750076293945,
1790
+ "step": 540
1791
+ },
1792
+ {
1793
+ "epoch": 3.8976660682226214,
1794
+ "grad_norm": 2.0291175842285156,
1795
+ "learning_rate": 0.00011319500177732639,
1796
+ "logits/chosen": -8.653124809265137,
1797
+ "logits/rejected": -9.693750381469727,
1798
+ "logps/chosen": -344.0,
1799
+ "logps/rejected": -551.2999877929688,
1800
+ "loss": 0.0377,
1801
+ "rewards/accuracies": 0.981249988079071,
1802
+ "rewards/chosen": -27.618749618530273,
1803
+ "rewards/margins": 19.924999237060547,
1804
+ "rewards/rejected": -47.537498474121094,
1805
+ "step": 545
1806
+ },
1807
+ {
1808
+ "epoch": 3.933572710951526,
1809
+ "grad_norm": 0.06643925607204437,
1810
+ "learning_rate": 0.00011318225732812008,
1811
+ "logits/chosen": -6.587500095367432,
1812
+ "logits/rejected": -7.754687309265137,
1813
+ "logps/chosen": -280.04998779296875,
1814
+ "logps/rejected": -456.8999938964844,
1815
+ "loss": 0.033,
1816
+ "rewards/accuracies": 0.9906250238418579,
1817
+ "rewards/chosen": -21.100000381469727,
1818
+ "rewards/margins": 17.134374618530273,
1819
+ "rewards/rejected": -38.25,
1820
+ "step": 550
1821
+ },
1822
+ {
1823
+ "epoch": 3.933572710951526,
1824
+ "eval_logits/chosen": -5.768382549285889,
1825
+ "eval_logits/rejected": -6.928308963775635,
1826
+ "eval_logps/chosen": -264.4117736816406,
1827
+ "eval_logps/rejected": -437.76470947265625,
1828
+ "eval_loss": 0.04973261430859566,
1829
+ "eval_rewards/accuracies": 0.9852941036224365,
1830
+ "eval_rewards/chosen": -19.34558868408203,
1831
+ "eval_rewards/margins": 16.566177368164062,
1832
+ "eval_rewards/rejected": -35.89706039428711,
1833
+ "eval_runtime": 8.691,
1834
+ "eval_samples_per_second": 30.721,
1835
+ "eval_steps_per_second": 1.956,
1836
+ "step": 550
1837
+ },
1838
+ {
1839
+ "epoch": 3.969479353680431,
1840
+ "grad_norm": 0.709564208984375,
1841
+ "learning_rate": 0.00011316937113633724,
1842
+ "logits/chosen": -5.145312309265137,
1843
+ "logits/rejected": -6.15625,
1844
+ "logps/chosen": -254.3000030517578,
1845
+ "logps/rejected": -400.3999938964844,
1846
+ "loss": 0.0689,
1847
+ "rewards/accuracies": 0.965624988079071,
1848
+ "rewards/chosen": -18.596874237060547,
1849
+ "rewards/margins": 13.71875,
1850
+ "rewards/rejected": -32.318748474121094,
1851
+ "step": 555
1852
+ },
1853
+ {
1854
+ "epoch": 4.0,
1855
+ "grad_norm": 0.014319206587970257,
1856
+ "learning_rate": 0.00011315634323441883,
1857
+ "logits/chosen": -4.669117450714111,
1858
+ "logits/rejected": -5.674632549285889,
1859
+ "logps/chosen": -250.88235473632812,
1860
+ "logps/rejected": -388.23529052734375,
1861
+ "loss": 0.0647,
1862
+ "rewards/accuracies": 0.9632353186607361,
1863
+ "rewards/chosen": -18.419116973876953,
1864
+ "rewards/margins": 12.941176414489746,
1865
+ "rewards/rejected": -31.33823585510254,
1866
+ "step": 560
1867
+ },
1868
+ {
1869
+ "epoch": 4.0359066427289045,
1870
+ "grad_norm": 1.3408515453338623,
1871
+ "learning_rate": 0.0001131431736551626,
1872
+ "logits/chosen": -4.814062595367432,
1873
+ "logits/rejected": -6.176562309265137,
1874
+ "logps/chosen": -249.4499969482422,
1875
+ "logps/rejected": -412.79998779296875,
1876
+ "loss": 0.0462,
1877
+ "rewards/accuracies": 0.9750000238418579,
1878
+ "rewards/chosen": -18.274999618530273,
1879
+ "rewards/margins": 15.787500381469727,
1880
+ "rewards/rejected": -34.068748474121094,
1881
+ "step": 565
1882
+ },
1883
+ {
1884
+ "epoch": 4.07181328545781,
1885
+ "grad_norm": 0.39796170592308044,
1886
+ "learning_rate": 0.00011312986243172293,
1887
+ "logits/chosen": -4.244531154632568,
1888
+ "logits/rejected": -6.751562595367432,
1889
+ "logps/chosen": -225.6999969482422,
1890
+ "logps/rejected": -486.6000061035156,
1891
+ "loss": 0.013,
1892
+ "rewards/accuracies": 0.996874988079071,
1893
+ "rewards/chosen": -15.662500381469727,
1894
+ "rewards/margins": 25.412500381469727,
1895
+ "rewards/rejected": -41.087501525878906,
1896
+ "step": 570
1897
+ },
1898
+ {
1899
+ "epoch": 4.1077199281867145,
1900
+ "grad_norm": 2.608715772628784,
1901
+ "learning_rate": 0.00011311640959761081,
1902
+ "logits/chosen": -3.823437452316284,
1903
+ "logits/rejected": -7.4375,
1904
+ "logps/chosen": -240.14999389648438,
1905
+ "logps/rejected": -562.5999755859375,
1906
+ "loss": 0.053,
1907
+ "rewards/accuracies": 0.987500011920929,
1908
+ "rewards/chosen": -17.225000381469727,
1909
+ "rewards/margins": 31.493749618530273,
1910
+ "rewards/rejected": -48.75,
1911
+ "step": 575
1912
+ },
1913
+ {
1914
+ "epoch": 4.143626570915619,
1915
+ "grad_norm": 2.569322347640991,
1916
+ "learning_rate": 0.00011310281518669376,
1917
+ "logits/chosen": -5.467187404632568,
1918
+ "logits/rejected": -8.5078125,
1919
+ "logps/chosen": -370.6000061035156,
1920
+ "logps/rejected": -627.0,
1921
+ "loss": 0.0235,
1922
+ "rewards/accuracies": 0.984375,
1923
+ "rewards/chosen": -30.15625,
1924
+ "rewards/margins": 25.225000381469727,
1925
+ "rewards/rejected": -55.38750076293945,
1926
+ "step": 580
1927
+ },
1928
+ {
1929
+ "epoch": 4.1795332136445245,
1930
+ "grad_norm": 7.271477699279785,
1931
+ "learning_rate": 0.00011308907923319566,
1932
+ "logits/chosen": -4.629687309265137,
1933
+ "logits/rejected": -7.557812690734863,
1934
+ "logps/chosen": -309.3500061035156,
1935
+ "logps/rejected": -587.4000244140625,
1936
+ "loss": 0.0519,
1937
+ "rewards/accuracies": 0.987500011920929,
1938
+ "rewards/chosen": -24.274999618530273,
1939
+ "rewards/margins": 27.075000762939453,
1940
+ "rewards/rejected": -51.337501525878906,
1941
+ "step": 585
1942
+ },
1943
+ {
1944
+ "epoch": 4.215439856373429,
1945
+ "grad_norm": 4.082700729370117,
1946
+ "learning_rate": 0.00011307520177169676,
1947
+ "logits/chosen": -4.9609375,
1948
+ "logits/rejected": -7.151562690734863,
1949
+ "logps/chosen": -320.79998779296875,
1950
+ "logps/rejected": -574.0,
1951
+ "loss": 0.0997,
1952
+ "rewards/accuracies": 0.971875011920929,
1953
+ "rewards/chosen": -25.225000381469727,
1954
+ "rewards/margins": 25.018749237060547,
1955
+ "rewards/rejected": -50.23749923706055,
1956
+ "step": 590
1957
+ },
1958
+ {
1959
+ "epoch": 4.2513464991023335,
1960
+ "grad_norm": 1.2855074405670166,
1961
+ "learning_rate": 0.00011306118283713357,
1962
+ "logits/chosen": -3.370312452316284,
1963
+ "logits/rejected": -4.561718940734863,
1964
+ "logps/chosen": -268.54998779296875,
1965
+ "logps/rejected": -507.8999938964844,
1966
+ "loss": 0.0289,
1967
+ "rewards/accuracies": 0.987500011920929,
1968
+ "rewards/chosen": -20.206249237060547,
1969
+ "rewards/margins": 23.3125,
1970
+ "rewards/rejected": -43.537498474121094,
1971
+ "step": 595
1972
+ },
1973
+ {
1974
+ "epoch": 4.287253141831239,
1975
+ "grad_norm": 0.21659362316131592,
1976
+ "learning_rate": 0.00011304702246479876,
1977
+ "logits/chosen": -3.285937547683716,
1978
+ "logits/rejected": -4.146093845367432,
1979
+ "logps/chosen": -270.3500061035156,
1980
+ "logps/rejected": -488.20001220703125,
1981
+ "loss": 0.0174,
1982
+ "rewards/accuracies": 0.9906250238418579,
1983
+ "rewards/chosen": -19.931249618530273,
1984
+ "rewards/margins": 21.193750381469727,
1985
+ "rewards/rejected": -41.125,
1986
+ "step": 600
1987
+ },
1988
+ {
1989
+ "epoch": 4.287253141831239,
1990
+ "eval_logits/chosen": -3.882352828979492,
1991
+ "eval_logits/rejected": -5.0,
1992
+ "eval_logps/chosen": -272.76470947265625,
1993
+ "eval_logps/rejected": -492.8235168457031,
1994
+ "eval_loss": 0.014545433223247528,
1995
+ "eval_rewards/accuracies": 0.9926470518112183,
1996
+ "eval_rewards/chosen": -20.176469802856445,
1997
+ "eval_rewards/margins": 21.330883026123047,
1998
+ "eval_rewards/rejected": -41.52941131591797,
1999
+ "eval_runtime": 8.5511,
2000
+ "eval_samples_per_second": 31.224,
2001
+ "eval_steps_per_second": 1.988,
2002
+ "step": 600
2003
+ },
2004
+ {
2005
+ "epoch": 4.3231597845601435,
2006
+ "grad_norm": 0.36118730902671814,
2007
+ "learning_rate": 0.00011303272069034098,
2008
+ "logits/chosen": -4.321875095367432,
2009
+ "logits/rejected": -5.332812309265137,
2010
+ "logps/chosen": -282.45001220703125,
2011
+ "logps/rejected": -488.0,
2012
+ "loss": 0.0449,
2013
+ "rewards/accuracies": 0.981249988079071,
2014
+ "rewards/chosen": -21.653125762939453,
2015
+ "rewards/margins": 19.787500381469727,
2016
+ "rewards/rejected": -41.45000076293945,
2017
+ "step": 605
2018
+ },
2019
+ {
2020
+ "epoch": 4.359066427289049,
2021
+ "grad_norm": 0.5386459231376648,
2022
+ "learning_rate": 0.00011301827754976498,
2023
+ "logits/chosen": -5.6015625,
2024
+ "logits/rejected": -6.984375,
2025
+ "logps/chosen": -302.29998779296875,
2026
+ "logps/rejected": -557.7000122070312,
2027
+ "loss": 0.0153,
2028
+ "rewards/accuracies": 0.9937499761581421,
2029
+ "rewards/chosen": -23.518749237060547,
2030
+ "rewards/margins": 24.784374237060547,
2031
+ "rewards/rejected": -48.29999923706055,
2032
+ "step": 610
2033
+ },
2034
+ {
2035
+ "epoch": 4.3949730700179535,
2036
+ "grad_norm": 4.88869571685791,
2037
+ "learning_rate": 0.00011300369307943137,
2038
+ "logits/chosen": -6.215624809265137,
2039
+ "logits/rejected": -7.949999809265137,
2040
+ "logps/chosen": -270.1000061035156,
2041
+ "logps/rejected": -572.2999877929688,
2042
+ "loss": 0.0224,
2043
+ "rewards/accuracies": 0.996874988079071,
2044
+ "rewards/chosen": -19.809375762939453,
2045
+ "rewards/margins": 29.512500762939453,
2046
+ "rewards/rejected": -49.3125,
2047
+ "step": 615
2048
+ },
2049
+ {
2050
+ "epoch": 4.430879712746858,
2051
+ "grad_norm": 2.2598838806152344,
2052
+ "learning_rate": 0.00011298896731605649,
2053
+ "logits/chosen": -3.9453125,
2054
+ "logits/rejected": -5.546875,
2055
+ "logps/chosen": -203.35000610351562,
2056
+ "logps/rejected": -407.79998779296875,
2057
+ "loss": 0.0373,
2058
+ "rewards/accuracies": 0.9906250238418579,
2059
+ "rewards/chosen": -13.240625381469727,
2060
+ "rewards/margins": 20.003124237060547,
2061
+ "rewards/rejected": -33.26250076293945,
2062
+ "step": 620
2063
+ },
2064
+ {
2065
+ "epoch": 4.466786355475763,
2066
+ "grad_norm": 0.47177407145500183,
2067
+ "learning_rate": 0.00011297410029671247,
2068
+ "logits/chosen": -3.74609375,
2069
+ "logits/rejected": -5.087500095367432,
2070
+ "logps/chosen": -281.70001220703125,
2071
+ "logps/rejected": -448.79998779296875,
2072
+ "loss": 0.04,
2073
+ "rewards/accuracies": 0.981249988079071,
2074
+ "rewards/chosen": -21.34375,
2075
+ "rewards/margins": 16.043750762939453,
2076
+ "rewards/rejected": -37.42499923706055,
2077
+ "step": 625
2078
+ },
2079
+ {
2080
+ "epoch": 4.502692998204668,
2081
+ "grad_norm": 0.2046031653881073,
2082
+ "learning_rate": 0.00011295909205882698,
2083
+ "logits/chosen": -1.002783179283142,
2084
+ "logits/rejected": -3.4195313453674316,
2085
+ "logps/chosen": -189.8249969482422,
2086
+ "logps/rejected": -390.1000061035156,
2087
+ "loss": 0.0272,
2088
+ "rewards/accuracies": 0.9937499761581421,
2089
+ "rewards/chosen": -11.790624618530273,
2090
+ "rewards/margins": 19.309375762939453,
2091
+ "rewards/rejected": -31.09375,
2092
+ "step": 630
2093
+ },
2094
+ {
2095
+ "epoch": 4.5385996409335725,
2096
+ "grad_norm": 0.29450154304504395,
2097
+ "learning_rate": 0.00011294394264018326,
2098
+ "logits/chosen": -2.082812547683716,
2099
+ "logits/rejected": -4.514062404632568,
2100
+ "logps/chosen": -224.8000030517578,
2101
+ "logps/rejected": -433.79998779296875,
2102
+ "loss": 0.0158,
2103
+ "rewards/accuracies": 0.9937499761581421,
2104
+ "rewards/chosen": -15.709375381469727,
2105
+ "rewards/margins": 19.975000381469727,
2106
+ "rewards/rejected": -35.70000076293945,
2107
+ "step": 635
2108
+ },
2109
+ {
2110
+ "epoch": 4.574506283662478,
2111
+ "grad_norm": 0.15016689896583557,
2112
+ "learning_rate": 0.00011292865207891994,
2113
+ "logits/chosen": -3.026562452316284,
2114
+ "logits/rejected": -5.426562309265137,
2115
+ "logps/chosen": -247.1999969482422,
2116
+ "logps/rejected": -477.1000061035156,
2117
+ "loss": 0.0044,
2118
+ "rewards/accuracies": 0.996874988079071,
2119
+ "rewards/chosen": -17.771875381469727,
2120
+ "rewards/margins": 22.262500762939453,
2121
+ "rewards/rejected": -40.025001525878906,
2122
+ "step": 640
2123
+ },
2124
+ {
2125
+ "epoch": 4.6104129263913824,
2126
+ "grad_norm": 3.1011385917663574,
2127
+ "learning_rate": 0.00011291322041353101,
2128
+ "logits/chosen": -3.1812500953674316,
2129
+ "logits/rejected": -5.546875,
2130
+ "logps/chosen": -231.9499969482422,
2131
+ "logps/rejected": -464.5,
2132
+ "loss": 0.0109,
2133
+ "rewards/accuracies": 0.996874988079071,
2134
+ "rewards/chosen": -16.149999618530273,
2135
+ "rewards/margins": 22.75,
2136
+ "rewards/rejected": -38.912498474121094,
2137
+ "step": 645
2138
+ },
2139
+ {
2140
+ "epoch": 4.646319569120287,
2141
+ "grad_norm": 0.06376684457063675,
2142
+ "learning_rate": 0.00011289764768286565,
2143
+ "logits/chosen": -4.444531440734863,
2144
+ "logits/rejected": -6.487500190734863,
2145
+ "logps/chosen": -285.45001220703125,
2146
+ "logps/rejected": -519.7999877929688,
2147
+ "loss": 0.0149,
2148
+ "rewards/accuracies": 0.987500011920929,
2149
+ "rewards/chosen": -21.674999237060547,
2150
+ "rewards/margins": 22.8125,
2151
+ "rewards/rejected": -44.525001525878906,
2152
+ "step": 650
2153
+ },
2154
+ {
2155
+ "epoch": 4.646319569120287,
2156
+ "eval_logits/chosen": -4.110294342041016,
2157
+ "eval_logits/rejected": -6.588235378265381,
2158
+ "eval_logps/chosen": -257.6470642089844,
2159
+ "eval_logps/rejected": -516.5882568359375,
2160
+ "eval_loss": 0.029071472585201263,
2161
+ "eval_rewards/accuracies": 0.9889705777168274,
2162
+ "eval_rewards/chosen": -18.647058486938477,
2163
+ "eval_rewards/margins": 25.28676414489746,
2164
+ "eval_rewards/rejected": -43.94117736816406,
2165
+ "eval_runtime": 8.8159,
2166
+ "eval_samples_per_second": 30.286,
2167
+ "eval_steps_per_second": 1.928,
2168
+ "step": 650
2169
+ },
2170
+ {
2171
+ "epoch": 4.682226211849192,
2172
+ "grad_norm": 3.26318621635437,
2173
+ "learning_rate": 0.00011288193392612822,
2174
+ "logits/chosen": -3.928906202316284,
2175
+ "logits/rejected": -6.315625190734863,
2176
+ "logps/chosen": -248.64999389648438,
2177
+ "logps/rejected": -503.79998779296875,
2178
+ "loss": 0.056,
2179
+ "rewards/accuracies": 0.9781249761581421,
2180
+ "rewards/chosen": -18.081249237060547,
2181
+ "rewards/margins": 24.456249237060547,
2182
+ "rewards/rejected": -42.537498474121094,
2183
+ "step": 655
2184
+ },
2185
+ {
2186
+ "epoch": 4.718132854578097,
2187
+ "grad_norm": 0.18917639553546906,
2188
+ "learning_rate": 0.00011286607918287803,
2189
+ "logits/chosen": -2.7222657203674316,
2190
+ "logits/rejected": -5.206250190734863,
2191
+ "logps/chosen": -258.1499938964844,
2192
+ "logps/rejected": -481.79998779296875,
2193
+ "loss": 0.0583,
2194
+ "rewards/accuracies": 0.9750000238418579,
2195
+ "rewards/chosen": -19.225000381469727,
2196
+ "rewards/margins": 21.606250762939453,
2197
+ "rewards/rejected": -40.849998474121094,
2198
+ "step": 660
2199
+ },
2200
+ {
2201
+ "epoch": 4.7540394973070015,
2202
+ "grad_norm": 0.9138699173927307,
2203
+ "learning_rate": 0.00011285008349302943,
2204
+ "logits/chosen": -2.116406202316284,
2205
+ "logits/rejected": -4.259375095367432,
2206
+ "logps/chosen": -294.20001220703125,
2207
+ "logps/rejected": -505.20001220703125,
2208
+ "loss": 0.0192,
2209
+ "rewards/accuracies": 0.9937499761581421,
2210
+ "rewards/chosen": -23.012500762939453,
2211
+ "rewards/margins": 20.296875,
2212
+ "rewards/rejected": -43.337501525878906,
2213
+ "step": 665
2214
+ },
2215
+ {
2216
+ "epoch": 4.789946140035907,
2217
+ "grad_norm": 0.5865360498428345,
2218
+ "learning_rate": 0.00011283394689685153,
2219
+ "logits/chosen": -3.46875,
2220
+ "logits/rejected": -5.800000190734863,
2221
+ "logps/chosen": -292.0,
2222
+ "logps/rejected": -535.2999877929688,
2223
+ "loss": 0.0238,
2224
+ "rewards/accuracies": 0.9937499761581421,
2225
+ "rewards/chosen": -22.268749237060547,
2226
+ "rewards/margins": 23.831249237060547,
2227
+ "rewards/rejected": -46.04999923706055,
2228
+ "step": 670
2229
+ },
2230
+ {
2231
+ "epoch": 4.825852782764811,
2232
+ "grad_norm": 1.3726475238800049,
2233
+ "learning_rate": 0.0001128176694349682,
2234
+ "logits/chosen": -3.3148436546325684,
2235
+ "logits/rejected": -5.324999809265137,
2236
+ "logps/chosen": -286.29998779296875,
2237
+ "logps/rejected": -542.9000244140625,
2238
+ "loss": 0.0127,
2239
+ "rewards/accuracies": 0.9937499761581421,
2240
+ "rewards/chosen": -22.024999618530273,
2241
+ "rewards/margins": 24.868749618530273,
2242
+ "rewards/rejected": -46.912498474121094,
2243
+ "step": 675
2244
+ },
2245
+ {
2246
+ "epoch": 4.861759425493716,
2247
+ "grad_norm": 10.441712379455566,
2248
+ "learning_rate": 0.00011280125114835791,
2249
+ "logits/chosen": -2.067578077316284,
2250
+ "logits/rejected": -4.528124809265137,
2251
+ "logps/chosen": -216.75,
2252
+ "logps/rejected": -488.70001220703125,
2253
+ "loss": 0.0218,
2254
+ "rewards/accuracies": 0.9906250238418579,
2255
+ "rewards/chosen": -15.034375190734863,
2256
+ "rewards/margins": 26.6875,
2257
+ "rewards/rejected": -41.712501525878906,
2258
+ "step": 680
2259
+ },
2260
+ {
2261
+ "epoch": 4.897666068222621,
2262
+ "grad_norm": 0.9471856355667114,
2263
+ "learning_rate": 0.00011278469207835369,
2264
+ "logits/chosen": -0.960888683795929,
2265
+ "logits/rejected": -3.39453125,
2266
+ "logps/chosen": -179.25,
2267
+ "logps/rejected": -433.1000061035156,
2268
+ "loss": 0.0337,
2269
+ "rewards/accuracies": 0.996874988079071,
2270
+ "rewards/chosen": -11.198437690734863,
2271
+ "rewards/margins": 24.59375,
2272
+ "rewards/rejected": -35.787498474121094,
2273
+ "step": 685
2274
+ },
2275
+ {
2276
+ "epoch": 4.933572710951526,
2277
+ "grad_norm": 4.058782577514648,
2278
+ "learning_rate": 0.000112767992266643,
2279
+ "logits/chosen": -2.7562499046325684,
2280
+ "logits/rejected": -4.982031345367432,
2281
+ "logps/chosen": -250.60000610351562,
2282
+ "logps/rejected": -498.20001220703125,
2283
+ "loss": 0.0548,
2284
+ "rewards/accuracies": 0.987500011920929,
2285
+ "rewards/chosen": -18.268749237060547,
2286
+ "rewards/margins": 23.912500381469727,
2287
+ "rewards/rejected": -42.1875,
2288
+ "step": 690
2289
+ },
2290
+ {
2291
+ "epoch": 4.9694793536804305,
2292
+ "grad_norm": 5.3591084480285645,
2293
+ "learning_rate": 0.00011275115175526756,
2294
+ "logits/chosen": -3.2984375953674316,
2295
+ "logits/rejected": -5.337500095367432,
2296
+ "logps/chosen": -273.29998779296875,
2297
+ "logps/rejected": -526.4000244140625,
2298
+ "loss": 0.0574,
2299
+ "rewards/accuracies": 0.984375,
2300
+ "rewards/chosen": -20.40625,
2301
+ "rewards/margins": 24.493749618530273,
2302
+ "rewards/rejected": -44.900001525878906,
2303
+ "step": 695
2304
+ },
2305
+ {
2306
+ "epoch": 5.0,
2307
+ "grad_norm": 3.2741596698760986,
2308
+ "learning_rate": 0.00011273417058662334,
2309
+ "logits/chosen": -1.62890625,
2310
+ "logits/rejected": -3.163602828979492,
2311
+ "logps/chosen": -178.4705810546875,
2312
+ "logps/rejected": -387.76470947265625,
2313
+ "loss": 0.0274,
2314
+ "rewards/accuracies": 0.9852941036224365,
2315
+ "rewards/chosen": -10.939338684082031,
2316
+ "rewards/margins": 20.0,
2317
+ "rewards/rejected": -30.941177368164062,
2318
+ "step": 700
2319
+ },
2320
+ {
2321
+ "epoch": 5.0,
2322
+ "eval_logits/chosen": -1.3389246463775635,
2323
+ "eval_logits/rejected": -2.8189337253570557,
2324
+ "eval_logps/chosen": -160.88235473632812,
2325
+ "eval_logps/rejected": -377.29412841796875,
2326
+ "eval_loss": 0.009149392135441303,
2327
+ "eval_rewards/accuracies": 1.0,
2328
+ "eval_rewards/chosen": -8.79411792755127,
2329
+ "eval_rewards/margins": 20.941177368164062,
2330
+ "eval_rewards/rejected": -29.757352828979492,
2331
+ "eval_runtime": 8.8399,
2332
+ "eval_samples_per_second": 30.204,
2333
+ "eval_steps_per_second": 1.923,
2334
+ "step": 700
2335
  }
2336
  ],
2337
  "logging_steps": 5,