CocoRoF commited on
Commit
7ffc46b
·
verified ·
1 Parent(s): 4e94c86

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e35a58b5ce20a546dab5f4b0c4e7952626f7c02d776f500c9444a2034e601e38
3
  size 737580392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878397b694b0a8de341b4b8d86e9a615129650afe4a1ebc2f9f3ccfb75d0c0c8
3
  size 737580392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de7fa68514e44d305d8ade79d3385d8b8dc317b2f5506d2952862d523ee3e573
3
  size 1475248442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0acf7430500cf883fa2666bfba6735859811567ffb5b4b4b2939205ee3547014
3
  size 1475248442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4728e4a27ef2a8c58f14bda178d8eac76290169f611c06f23f0154a1ac41085
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59efe62e4ca0647678855566a69eaafc20fb9e01c9af7b6b454bf0717d7bf5f7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:042be25dc4541cbc3f89ffe5029c66c140dff258c3ff65395edeb7f31d123087
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4f48bf96def541f86640977b0dc57c5078e1aaca13e1c80e28041dac90f6386
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9372071227741331,
5
  "eval_steps": 250,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1535,6 +1535,770 @@
1535
  "eval_spearman_manhattan": 0.8162459677504973,
1536
  "eval_steps_per_second": 20.407,
1537
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1538
  }
1539
  ],
1540
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4058106841611997,
5
  "eval_steps": 250,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1535
  "eval_spearman_manhattan": 0.8162459677504973,
1536
  "eval_steps_per_second": 20.407,
1537
  "step": 2000
1538
+ },
1539
+ {
1540
+ "epoch": 0.9418931583880038,
1541
+ "grad_norm": 1.454084873199463,
1542
+ "learning_rate": 1.94113167760075e-05,
1543
+ "loss": 0.274,
1544
+ "step": 2010
1545
+ },
1546
+ {
1547
+ "epoch": 0.9465791940018744,
1548
+ "grad_norm": 2.0930581092834473,
1549
+ "learning_rate": 1.940838800374883e-05,
1550
+ "loss": 0.3101,
1551
+ "step": 2020
1552
+ },
1553
+ {
1554
+ "epoch": 0.9512652296157451,
1555
+ "grad_norm": 2.347327947616577,
1556
+ "learning_rate": 1.940545923149016e-05,
1557
+ "loss": 0.3134,
1558
+ "step": 2030
1559
+ },
1560
+ {
1561
+ "epoch": 0.9559512652296157,
1562
+ "grad_norm": 1.7699518203735352,
1563
+ "learning_rate": 1.9402530459231494e-05,
1564
+ "loss": 0.2954,
1565
+ "step": 2040
1566
+ },
1567
+ {
1568
+ "epoch": 0.9606373008434864,
1569
+ "grad_norm": 1.983152985572815,
1570
+ "learning_rate": 1.9399601686972823e-05,
1571
+ "loss": 0.3267,
1572
+ "step": 2050
1573
+ },
1574
+ {
1575
+ "epoch": 0.9653233364573571,
1576
+ "grad_norm": 2.6133322715759277,
1577
+ "learning_rate": 1.9396672914714153e-05,
1578
+ "loss": 0.3197,
1579
+ "step": 2060
1580
+ },
1581
+ {
1582
+ "epoch": 0.9700093720712277,
1583
+ "grad_norm": 1.6881980895996094,
1584
+ "learning_rate": 1.9393744142455482e-05,
1585
+ "loss": 0.3284,
1586
+ "step": 2070
1587
+ },
1588
+ {
1589
+ "epoch": 0.9746954076850984,
1590
+ "grad_norm": 2.0169336795806885,
1591
+ "learning_rate": 1.9390815370196815e-05,
1592
+ "loss": 0.3245,
1593
+ "step": 2080
1594
+ },
1595
+ {
1596
+ "epoch": 0.979381443298969,
1597
+ "grad_norm": 1.456597924232483,
1598
+ "learning_rate": 1.9387886597938145e-05,
1599
+ "loss": 0.297,
1600
+ "step": 2090
1601
+ },
1602
+ {
1603
+ "epoch": 0.9840674789128397,
1604
+ "grad_norm": 1.7516179084777832,
1605
+ "learning_rate": 1.9384957825679474e-05,
1606
+ "loss": 0.3043,
1607
+ "step": 2100
1608
+ },
1609
+ {
1610
+ "epoch": 0.9887535145267105,
1611
+ "grad_norm": 1.981558084487915,
1612
+ "learning_rate": 1.9382029053420807e-05,
1613
+ "loss": 0.3321,
1614
+ "step": 2110
1615
+ },
1616
+ {
1617
+ "epoch": 0.993439550140581,
1618
+ "grad_norm": 2.1244945526123047,
1619
+ "learning_rate": 1.9379100281162137e-05,
1620
+ "loss": 0.2638,
1621
+ "step": 2120
1622
+ },
1623
+ {
1624
+ "epoch": 0.9981255857544518,
1625
+ "grad_norm": 2.025820016860962,
1626
+ "learning_rate": 1.937617150890347e-05,
1627
+ "loss": 0.3164,
1628
+ "step": 2130
1629
+ },
1630
+ {
1631
+ "epoch": 1.0028116213683225,
1632
+ "grad_norm": 1.7136940956115723,
1633
+ "learning_rate": 1.93732427366448e-05,
1634
+ "loss": 0.311,
1635
+ "step": 2140
1636
+ },
1637
+ {
1638
+ "epoch": 1.007497656982193,
1639
+ "grad_norm": 1.604434609413147,
1640
+ "learning_rate": 1.9370313964386132e-05,
1641
+ "loss": 0.2511,
1642
+ "step": 2150
1643
+ },
1644
+ {
1645
+ "epoch": 1.0121836925960637,
1646
+ "grad_norm": 1.7466777563095093,
1647
+ "learning_rate": 1.936738519212746e-05,
1648
+ "loss": 0.2342,
1649
+ "step": 2160
1650
+ },
1651
+ {
1652
+ "epoch": 1.0168697282099344,
1653
+ "grad_norm": 1.9601348638534546,
1654
+ "learning_rate": 1.9364456419868794e-05,
1655
+ "loss": 0.2419,
1656
+ "step": 2170
1657
+ },
1658
+ {
1659
+ "epoch": 1.021555763823805,
1660
+ "grad_norm": 1.7632412910461426,
1661
+ "learning_rate": 1.9361527647610124e-05,
1662
+ "loss": 0.2474,
1663
+ "step": 2180
1664
+ },
1665
+ {
1666
+ "epoch": 1.0262417994376758,
1667
+ "grad_norm": 1.8551344871520996,
1668
+ "learning_rate": 1.9358598875351457e-05,
1669
+ "loss": 0.2375,
1670
+ "step": 2190
1671
+ },
1672
+ {
1673
+ "epoch": 1.0309278350515463,
1674
+ "grad_norm": 1.8569507598876953,
1675
+ "learning_rate": 1.9355670103092786e-05,
1676
+ "loss": 0.2226,
1677
+ "step": 2200
1678
+ },
1679
+ {
1680
+ "epoch": 1.035613870665417,
1681
+ "grad_norm": 1.5567635297775269,
1682
+ "learning_rate": 1.9352741330834116e-05,
1683
+ "loss": 0.2615,
1684
+ "step": 2210
1685
+ },
1686
+ {
1687
+ "epoch": 1.0402999062792877,
1688
+ "grad_norm": 1.6635299921035767,
1689
+ "learning_rate": 1.934981255857545e-05,
1690
+ "loss": 0.255,
1691
+ "step": 2220
1692
+ },
1693
+ {
1694
+ "epoch": 1.0449859418931584,
1695
+ "grad_norm": 2.080941915512085,
1696
+ "learning_rate": 1.9346883786316778e-05,
1697
+ "loss": 0.2847,
1698
+ "step": 2230
1699
+ },
1700
+ {
1701
+ "epoch": 1.0496719775070291,
1702
+ "grad_norm": 1.8611458539962769,
1703
+ "learning_rate": 1.9343955014058108e-05,
1704
+ "loss": 0.2395,
1705
+ "step": 2240
1706
+ },
1707
+ {
1708
+ "epoch": 1.0543580131208996,
1709
+ "grad_norm": 1.7495652437210083,
1710
+ "learning_rate": 1.9341026241799437e-05,
1711
+ "loss": 0.2473,
1712
+ "step": 2250
1713
+ },
1714
+ {
1715
+ "epoch": 1.0543580131208996,
1716
+ "eval_loss": 0.045334625989198685,
1717
+ "eval_pearson_cosine": 0.8123685468173392,
1718
+ "eval_pearson_dot": 0.7270518178916561,
1719
+ "eval_pearson_euclidean": 0.8020452343241686,
1720
+ "eval_pearson_manhattan": 0.8030795563147421,
1721
+ "eval_runtime": 5.4133,
1722
+ "eval_samples_per_second": 277.096,
1723
+ "eval_spearman_cosine": 0.814265717527882,
1724
+ "eval_spearman_dot": 0.7261387412776578,
1725
+ "eval_spearman_euclidean": 0.809310966263978,
1726
+ "eval_spearman_manhattan": 0.8103368464209432,
1727
+ "eval_steps_per_second": 17.365,
1728
+ "step": 2250
1729
+ },
1730
+ {
1731
+ "epoch": 1.0590440487347703,
1732
+ "grad_norm": 1.861383080482483,
1733
+ "learning_rate": 1.933809746954077e-05,
1734
+ "loss": 0.2635,
1735
+ "step": 2260
1736
+ },
1737
+ {
1738
+ "epoch": 1.063730084348641,
1739
+ "grad_norm": 1.855754017829895,
1740
+ "learning_rate": 1.93351686972821e-05,
1741
+ "loss": 0.2752,
1742
+ "step": 2270
1743
+ },
1744
+ {
1745
+ "epoch": 1.0684161199625117,
1746
+ "grad_norm": 1.7547088861465454,
1747
+ "learning_rate": 1.9332239925023432e-05,
1748
+ "loss": 0.2528,
1749
+ "step": 2280
1750
+ },
1751
+ {
1752
+ "epoch": 1.0731021555763824,
1753
+ "grad_norm": 2.317349672317505,
1754
+ "learning_rate": 1.9329311152764762e-05,
1755
+ "loss": 0.2727,
1756
+ "step": 2290
1757
+ },
1758
+ {
1759
+ "epoch": 1.077788191190253,
1760
+ "grad_norm": 1.69141685962677,
1761
+ "learning_rate": 1.932638238050609e-05,
1762
+ "loss": 0.2576,
1763
+ "step": 2300
1764
+ },
1765
+ {
1766
+ "epoch": 1.0824742268041236,
1767
+ "grad_norm": 1.9386281967163086,
1768
+ "learning_rate": 1.9323453608247424e-05,
1769
+ "loss": 0.2746,
1770
+ "step": 2310
1771
+ },
1772
+ {
1773
+ "epoch": 1.0871602624179943,
1774
+ "grad_norm": 2.079533100128174,
1775
+ "learning_rate": 1.9320524835988754e-05,
1776
+ "loss": 0.2145,
1777
+ "step": 2320
1778
+ },
1779
+ {
1780
+ "epoch": 1.091846298031865,
1781
+ "grad_norm": 2.5439393520355225,
1782
+ "learning_rate": 1.9317596063730087e-05,
1783
+ "loss": 0.2406,
1784
+ "step": 2330
1785
+ },
1786
+ {
1787
+ "epoch": 1.0965323336457358,
1788
+ "grad_norm": 2.132382869720459,
1789
+ "learning_rate": 1.9314667291471416e-05,
1790
+ "loss": 0.2158,
1791
+ "step": 2340
1792
+ },
1793
+ {
1794
+ "epoch": 1.1012183692596063,
1795
+ "grad_norm": 2.181603193283081,
1796
+ "learning_rate": 1.931173851921275e-05,
1797
+ "loss": 0.2358,
1798
+ "step": 2350
1799
+ },
1800
+ {
1801
+ "epoch": 1.105904404873477,
1802
+ "grad_norm": 2.288633108139038,
1803
+ "learning_rate": 1.930880974695408e-05,
1804
+ "loss": 0.2715,
1805
+ "step": 2360
1806
+ },
1807
+ {
1808
+ "epoch": 1.1105904404873477,
1809
+ "grad_norm": 1.882150650024414,
1810
+ "learning_rate": 1.930588097469541e-05,
1811
+ "loss": 0.2111,
1812
+ "step": 2370
1813
+ },
1814
+ {
1815
+ "epoch": 1.1152764761012184,
1816
+ "grad_norm": 1.695289969444275,
1817
+ "learning_rate": 1.930295220243674e-05,
1818
+ "loss": 0.2555,
1819
+ "step": 2380
1820
+ },
1821
+ {
1822
+ "epoch": 1.119962511715089,
1823
+ "grad_norm": 1.945979118347168,
1824
+ "learning_rate": 1.930002343017807e-05,
1825
+ "loss": 0.2415,
1826
+ "step": 2390
1827
+ },
1828
+ {
1829
+ "epoch": 1.1246485473289598,
1830
+ "grad_norm": 2.199965238571167,
1831
+ "learning_rate": 1.9297094657919403e-05,
1832
+ "loss": 0.2561,
1833
+ "step": 2400
1834
+ },
1835
+ {
1836
+ "epoch": 1.1293345829428303,
1837
+ "grad_norm": 2.0236504077911377,
1838
+ "learning_rate": 1.9294165885660733e-05,
1839
+ "loss": 0.2458,
1840
+ "step": 2410
1841
+ },
1842
+ {
1843
+ "epoch": 1.134020618556701,
1844
+ "grad_norm": 1.7911378145217896,
1845
+ "learning_rate": 1.9291237113402062e-05,
1846
+ "loss": 0.2428,
1847
+ "step": 2420
1848
+ },
1849
+ {
1850
+ "epoch": 1.1387066541705717,
1851
+ "grad_norm": 1.8625153303146362,
1852
+ "learning_rate": 1.9288308341143395e-05,
1853
+ "loss": 0.2482,
1854
+ "step": 2430
1855
+ },
1856
+ {
1857
+ "epoch": 1.1433926897844424,
1858
+ "grad_norm": 1.4607598781585693,
1859
+ "learning_rate": 1.9285379568884725e-05,
1860
+ "loss": 0.2383,
1861
+ "step": 2440
1862
+ },
1863
+ {
1864
+ "epoch": 1.148078725398313,
1865
+ "grad_norm": 1.552121877670288,
1866
+ "learning_rate": 1.9282450796626054e-05,
1867
+ "loss": 0.2917,
1868
+ "step": 2450
1869
+ },
1870
+ {
1871
+ "epoch": 1.1527647610121836,
1872
+ "grad_norm": 2.049769163131714,
1873
+ "learning_rate": 1.9279522024367387e-05,
1874
+ "loss": 0.2241,
1875
+ "step": 2460
1876
+ },
1877
+ {
1878
+ "epoch": 1.1574507966260543,
1879
+ "grad_norm": 1.900490164756775,
1880
+ "learning_rate": 1.9276593252108717e-05,
1881
+ "loss": 0.2423,
1882
+ "step": 2470
1883
+ },
1884
+ {
1885
+ "epoch": 1.162136832239925,
1886
+ "grad_norm": 1.1432183980941772,
1887
+ "learning_rate": 1.927366447985005e-05,
1888
+ "loss": 0.2556,
1889
+ "step": 2480
1890
+ },
1891
+ {
1892
+ "epoch": 1.1668228678537957,
1893
+ "grad_norm": 1.8964147567749023,
1894
+ "learning_rate": 1.927073570759138e-05,
1895
+ "loss": 0.2258,
1896
+ "step": 2490
1897
+ },
1898
+ {
1899
+ "epoch": 1.1715089034676662,
1900
+ "grad_norm": 1.8627080917358398,
1901
+ "learning_rate": 1.926780693533271e-05,
1902
+ "loss": 0.2563,
1903
+ "step": 2500
1904
+ },
1905
+ {
1906
+ "epoch": 1.1715089034676662,
1907
+ "eval_loss": 0.04076731204986572,
1908
+ "eval_pearson_cosine": 0.8177640807226254,
1909
+ "eval_pearson_dot": 0.7517947588072431,
1910
+ "eval_pearson_euclidean": 0.8032143754633552,
1911
+ "eval_pearson_manhattan": 0.804324819579108,
1912
+ "eval_runtime": 4.5167,
1913
+ "eval_samples_per_second": 332.103,
1914
+ "eval_spearman_cosine": 0.8194654831329402,
1915
+ "eval_spearman_dot": 0.7504183673719423,
1916
+ "eval_spearman_euclidean": 0.811997591732405,
1917
+ "eval_spearman_manhattan": 0.8131700838529736,
1918
+ "eval_steps_per_second": 20.812,
1919
+ "step": 2500
1920
+ },
1921
+ {
1922
+ "epoch": 1.176194939081537,
1923
+ "grad_norm": 1.7957406044006348,
1924
+ "learning_rate": 1.926487816307404e-05,
1925
+ "loss": 0.2381,
1926
+ "step": 2510
1927
+ },
1928
+ {
1929
+ "epoch": 1.1808809746954076,
1930
+ "grad_norm": 1.868900179862976,
1931
+ "learning_rate": 1.926194939081537e-05,
1932
+ "loss": 0.245,
1933
+ "step": 2520
1934
+ },
1935
+ {
1936
+ "epoch": 1.1855670103092784,
1937
+ "grad_norm": 1.8972773551940918,
1938
+ "learning_rate": 1.9259020618556704e-05,
1939
+ "loss": 0.2346,
1940
+ "step": 2530
1941
+ },
1942
+ {
1943
+ "epoch": 1.190253045923149,
1944
+ "grad_norm": 1.7982817888259888,
1945
+ "learning_rate": 1.9256091846298033e-05,
1946
+ "loss": 0.2182,
1947
+ "step": 2540
1948
+ },
1949
+ {
1950
+ "epoch": 1.1949390815370198,
1951
+ "grad_norm": 2.066537380218506,
1952
+ "learning_rate": 1.9253163074039366e-05,
1953
+ "loss": 0.2396,
1954
+ "step": 2550
1955
+ },
1956
+ {
1957
+ "epoch": 1.1996251171508903,
1958
+ "grad_norm": 2.3115556240081787,
1959
+ "learning_rate": 1.9250234301780696e-05,
1960
+ "loss": 0.2513,
1961
+ "step": 2560
1962
+ },
1963
+ {
1964
+ "epoch": 1.204311152764761,
1965
+ "grad_norm": 2.140615940093994,
1966
+ "learning_rate": 1.9247305529522025e-05,
1967
+ "loss": 0.258,
1968
+ "step": 2570
1969
+ },
1970
+ {
1971
+ "epoch": 1.2089971883786317,
1972
+ "grad_norm": 1.9183828830718994,
1973
+ "learning_rate": 1.9244376757263358e-05,
1974
+ "loss": 0.2249,
1975
+ "step": 2580
1976
+ },
1977
+ {
1978
+ "epoch": 1.2136832239925024,
1979
+ "grad_norm": 1.9000664949417114,
1980
+ "learning_rate": 1.9241447985004687e-05,
1981
+ "loss": 0.2469,
1982
+ "step": 2590
1983
+ },
1984
+ {
1985
+ "epoch": 1.218369259606373,
1986
+ "grad_norm": 2.2240567207336426,
1987
+ "learning_rate": 1.9238519212746017e-05,
1988
+ "loss": 0.2342,
1989
+ "step": 2600
1990
+ },
1991
+ {
1992
+ "epoch": 1.2230552952202436,
1993
+ "grad_norm": 2.227921724319458,
1994
+ "learning_rate": 1.923559044048735e-05,
1995
+ "loss": 0.2867,
1996
+ "step": 2610
1997
+ },
1998
+ {
1999
+ "epoch": 1.2277413308341143,
2000
+ "grad_norm": 1.8266960382461548,
2001
+ "learning_rate": 1.923266166822868e-05,
2002
+ "loss": 0.2541,
2003
+ "step": 2620
2004
+ },
2005
+ {
2006
+ "epoch": 1.232427366447985,
2007
+ "grad_norm": 2.0011913776397705,
2008
+ "learning_rate": 1.922973289597001e-05,
2009
+ "loss": 0.2404,
2010
+ "step": 2630
2011
+ },
2012
+ {
2013
+ "epoch": 1.2371134020618557,
2014
+ "grad_norm": 2.053349256515503,
2015
+ "learning_rate": 1.9226804123711342e-05,
2016
+ "loss": 0.2931,
2017
+ "step": 2640
2018
+ },
2019
+ {
2020
+ "epoch": 1.2417994376757264,
2021
+ "grad_norm": 1.3468660116195679,
2022
+ "learning_rate": 1.922387535145267e-05,
2023
+ "loss": 0.2326,
2024
+ "step": 2650
2025
+ },
2026
+ {
2027
+ "epoch": 1.246485473289597,
2028
+ "grad_norm": 2.1676151752471924,
2029
+ "learning_rate": 1.9220946579194004e-05,
2030
+ "loss": 0.2478,
2031
+ "step": 2660
2032
+ },
2033
+ {
2034
+ "epoch": 1.2511715089034676,
2035
+ "grad_norm": 1.9708117246627808,
2036
+ "learning_rate": 1.9218017806935334e-05,
2037
+ "loss": 0.2281,
2038
+ "step": 2670
2039
+ },
2040
+ {
2041
+ "epoch": 1.2558575445173383,
2042
+ "grad_norm": 1.8341456651687622,
2043
+ "learning_rate": 1.9215089034676667e-05,
2044
+ "loss": 0.2477,
2045
+ "step": 2680
2046
+ },
2047
+ {
2048
+ "epoch": 1.260543580131209,
2049
+ "grad_norm": 1.9229050874710083,
2050
+ "learning_rate": 1.9212160262417996e-05,
2051
+ "loss": 0.2424,
2052
+ "step": 2690
2053
+ },
2054
+ {
2055
+ "epoch": 1.2652296157450795,
2056
+ "grad_norm": 2.013848066329956,
2057
+ "learning_rate": 1.9209231490159326e-05,
2058
+ "loss": 0.272,
2059
+ "step": 2700
2060
+ },
2061
+ {
2062
+ "epoch": 1.2699156513589505,
2063
+ "grad_norm": 1.684461236000061,
2064
+ "learning_rate": 1.920630271790066e-05,
2065
+ "loss": 0.2432,
2066
+ "step": 2710
2067
+ },
2068
+ {
2069
+ "epoch": 1.274601686972821,
2070
+ "grad_norm": 1.300451636314392,
2071
+ "learning_rate": 1.9203373945641988e-05,
2072
+ "loss": 0.2006,
2073
+ "step": 2720
2074
+ },
2075
+ {
2076
+ "epoch": 1.2792877225866917,
2077
+ "grad_norm": 1.6482150554656982,
2078
+ "learning_rate": 1.920044517338332e-05,
2079
+ "loss": 0.2495,
2080
+ "step": 2730
2081
+ },
2082
+ {
2083
+ "epoch": 1.2839737582005624,
2084
+ "grad_norm": 1.7251955270767212,
2085
+ "learning_rate": 1.919751640112465e-05,
2086
+ "loss": 0.2469,
2087
+ "step": 2740
2088
+ },
2089
+ {
2090
+ "epoch": 1.2886597938144329,
2091
+ "grad_norm": 2.6221530437469482,
2092
+ "learning_rate": 1.9194587628865983e-05,
2093
+ "loss": 0.2841,
2094
+ "step": 2750
2095
+ },
2096
+ {
2097
+ "epoch": 1.2886597938144329,
2098
+ "eval_loss": 0.04372455179691315,
2099
+ "eval_pearson_cosine": 0.8073759135651102,
2100
+ "eval_pearson_dot": 0.7236907127152534,
2101
+ "eval_pearson_euclidean": 0.8053106447795528,
2102
+ "eval_pearson_manhattan": 0.806321043432086,
2103
+ "eval_runtime": 3.4277,
2104
+ "eval_samples_per_second": 437.612,
2105
+ "eval_spearman_cosine": 0.8100379018669143,
2106
+ "eval_spearman_dot": 0.7204492861182332,
2107
+ "eval_spearman_euclidean": 0.8129520365277123,
2108
+ "eval_spearman_manhattan": 0.813834648092825,
2109
+ "eval_steps_per_second": 27.424,
2110
+ "step": 2750
2111
+ },
2112
+ {
2113
+ "epoch": 1.2933458294283038,
2114
+ "grad_norm": 1.8232098817825317,
2115
+ "learning_rate": 1.9191658856607313e-05,
2116
+ "loss": 0.2717,
2117
+ "step": 2760
2118
+ },
2119
+ {
2120
+ "epoch": 1.2980318650421743,
2121
+ "grad_norm": 1.6222856044769287,
2122
+ "learning_rate": 1.9188730084348642e-05,
2123
+ "loss": 0.227,
2124
+ "step": 2770
2125
+ },
2126
+ {
2127
+ "epoch": 1.302717900656045,
2128
+ "grad_norm": 1.8859031200408936,
2129
+ "learning_rate": 1.9185801312089972e-05,
2130
+ "loss": 0.2428,
2131
+ "step": 2780
2132
+ },
2133
+ {
2134
+ "epoch": 1.3074039362699157,
2135
+ "grad_norm": 1.9047764539718628,
2136
+ "learning_rate": 1.9182872539831305e-05,
2137
+ "loss": 0.2322,
2138
+ "step": 2790
2139
+ },
2140
+ {
2141
+ "epoch": 1.3120899718837864,
2142
+ "grad_norm": 1.734043002128601,
2143
+ "learning_rate": 1.9179943767572634e-05,
2144
+ "loss": 0.2562,
2145
+ "step": 2800
2146
+ },
2147
+ {
2148
+ "epoch": 1.316776007497657,
2149
+ "grad_norm": 1.840468406677246,
2150
+ "learning_rate": 1.9177014995313964e-05,
2151
+ "loss": 0.2429,
2152
+ "step": 2810
2153
+ },
2154
+ {
2155
+ "epoch": 1.3214620431115276,
2156
+ "grad_norm": 1.6314520835876465,
2157
+ "learning_rate": 1.9174086223055296e-05,
2158
+ "loss": 0.232,
2159
+ "step": 2820
2160
+ },
2161
+ {
2162
+ "epoch": 1.3261480787253983,
2163
+ "grad_norm": 1.9515080451965332,
2164
+ "learning_rate": 1.9171157450796626e-05,
2165
+ "loss": 0.2289,
2166
+ "step": 2830
2167
+ },
2168
+ {
2169
+ "epoch": 1.330834114339269,
2170
+ "grad_norm": 1.7130558490753174,
2171
+ "learning_rate": 1.916822867853796e-05,
2172
+ "loss": 0.2349,
2173
+ "step": 2840
2174
+ },
2175
+ {
2176
+ "epoch": 1.3355201499531397,
2177
+ "grad_norm": 2.0093345642089844,
2178
+ "learning_rate": 1.916529990627929e-05,
2179
+ "loss": 0.216,
2180
+ "step": 2850
2181
+ },
2182
+ {
2183
+ "epoch": 1.3402061855670104,
2184
+ "grad_norm": 1.8628073930740356,
2185
+ "learning_rate": 1.916237113402062e-05,
2186
+ "loss": 0.2395,
2187
+ "step": 2860
2188
+ },
2189
+ {
2190
+ "epoch": 1.344892221180881,
2191
+ "grad_norm": 1.8130676746368408,
2192
+ "learning_rate": 1.915944236176195e-05,
2193
+ "loss": 0.2283,
2194
+ "step": 2870
2195
+ },
2196
+ {
2197
+ "epoch": 1.3495782567947516,
2198
+ "grad_norm": 1.8690656423568726,
2199
+ "learning_rate": 1.9156513589503284e-05,
2200
+ "loss": 0.2459,
2201
+ "step": 2880
2202
+ },
2203
+ {
2204
+ "epoch": 1.3542642924086223,
2205
+ "grad_norm": 1.607759714126587,
2206
+ "learning_rate": 1.9153584817244613e-05,
2207
+ "loss": 0.2128,
2208
+ "step": 2890
2209
+ },
2210
+ {
2211
+ "epoch": 1.358950328022493,
2212
+ "grad_norm": 1.827553629875183,
2213
+ "learning_rate": 1.9150656044985943e-05,
2214
+ "loss": 0.225,
2215
+ "step": 2900
2216
+ },
2217
+ {
2218
+ "epoch": 1.3636363636363638,
2219
+ "grad_norm": 1.599199891090393,
2220
+ "learning_rate": 1.9147727272727276e-05,
2221
+ "loss": 0.264,
2222
+ "step": 2910
2223
+ },
2224
+ {
2225
+ "epoch": 1.3683223992502342,
2226
+ "grad_norm": 2.0378599166870117,
2227
+ "learning_rate": 1.9144798500468605e-05,
2228
+ "loss": 0.2147,
2229
+ "step": 2920
2230
+ },
2231
+ {
2232
+ "epoch": 1.373008434864105,
2233
+ "grad_norm": 2.201847791671753,
2234
+ "learning_rate": 1.9141869728209938e-05,
2235
+ "loss": 0.2812,
2236
+ "step": 2930
2237
+ },
2238
+ {
2239
+ "epoch": 1.3776944704779757,
2240
+ "grad_norm": 1.5552937984466553,
2241
+ "learning_rate": 1.9138940955951267e-05,
2242
+ "loss": 0.2386,
2243
+ "step": 2940
2244
+ },
2245
+ {
2246
+ "epoch": 1.3823805060918464,
2247
+ "grad_norm": 1.7810213565826416,
2248
+ "learning_rate": 1.9136012183692597e-05,
2249
+ "loss": 0.2596,
2250
+ "step": 2950
2251
+ },
2252
+ {
2253
+ "epoch": 1.387066541705717,
2254
+ "grad_norm": 1.9728736877441406,
2255
+ "learning_rate": 1.913308341143393e-05,
2256
+ "loss": 0.2385,
2257
+ "step": 2960
2258
+ },
2259
+ {
2260
+ "epoch": 1.3917525773195876,
2261
+ "grad_norm": 1.868575930595398,
2262
+ "learning_rate": 1.913015463917526e-05,
2263
+ "loss": 0.2287,
2264
+ "step": 2970
2265
+ },
2266
+ {
2267
+ "epoch": 1.3964386129334583,
2268
+ "grad_norm": 1.4674561023712158,
2269
+ "learning_rate": 1.912722586691659e-05,
2270
+ "loss": 0.2619,
2271
+ "step": 2980
2272
+ },
2273
+ {
2274
+ "epoch": 1.401124648547329,
2275
+ "grad_norm": 1.9503545761108398,
2276
+ "learning_rate": 1.912429709465792e-05,
2277
+ "loss": 0.2319,
2278
+ "step": 2990
2279
+ },
2280
+ {
2281
+ "epoch": 1.4058106841611997,
2282
+ "grad_norm": 1.705707311630249,
2283
+ "learning_rate": 1.912136832239925e-05,
2284
+ "loss": 0.2462,
2285
+ "step": 3000
2286
+ },
2287
+ {
2288
+ "epoch": 1.4058106841611997,
2289
+ "eval_loss": 0.04194045811891556,
2290
+ "eval_pearson_cosine": 0.8164473390079152,
2291
+ "eval_pearson_dot": 0.7394876158890895,
2292
+ "eval_pearson_euclidean": 0.8038670284436478,
2293
+ "eval_pearson_manhattan": 0.8050459887967918,
2294
+ "eval_runtime": 3.394,
2295
+ "eval_samples_per_second": 441.953,
2296
+ "eval_spearman_cosine": 0.8192114591290918,
2297
+ "eval_spearman_dot": 0.7393194455764386,
2298
+ "eval_spearman_euclidean": 0.8132475290397445,
2299
+ "eval_spearman_manhattan": 0.8142678938453525,
2300
+ "eval_steps_per_second": 27.696,
2301
+ "step": 3000
2302
  }
2303
  ],
2304
  "logging_steps": 10,