CocoRoF commited on
Commit
7a180d7
·
verified ·
1 Parent(s): 13eb7ab

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8108b4c7505bf31f05a0583aabbf5fe4cfe4429dc292fc208409ab6e5fcfded
3
  size 598434152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d0eabfbdd37b2fae13fa797c3f0436a08906b6795d79ec3027697d278db38e
3
  size 598434152
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ee29d79e39b5eeb941ba1801f2a551f0871fde9c5f8d5db87000ea005b4bca7
3
  size 1196955962
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65be9700ffb2ead35027d047544f87600617dedade6f5c3f6c069f12bb951ba1
3
  size 1196955962
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4728e4a27ef2a8c58f14bda178d8eac76290169f611c06f23f0154a1ac41085
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59efe62e4ca0647678855566a69eaafc20fb9e01c9af7b6b454bf0717d7bf5f7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f83b5db90f4185c28054d91912c7daf482661e1eee0f05c8ff078fbdacbc6754
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f7bf369a63d501e73812630c6dd5a11aa21e67b93b5316e7295ec988e3218d6
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9372071227741331,
5
  "eval_steps": 250,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1535,6 +1535,770 @@
1535
  "eval_spearman_manhattan": 0.7766391726420421,
1536
  "eval_steps_per_second": 32.826,
1537
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1538
  }
1539
  ],
1540
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4058106841611997,
5
  "eval_steps": 250,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1535
  "eval_spearman_manhattan": 0.7766391726420421,
1536
  "eval_steps_per_second": 32.826,
1537
  "step": 2000
1538
+ },
1539
+ {
1540
+ "epoch": 0.9418931583880038,
1541
+ "grad_norm": 5.383081912994385,
1542
+ "learning_rate": 9.411316776007498e-06,
1543
+ "loss": 0.5343,
1544
+ "step": 2010
1545
+ },
1546
+ {
1547
+ "epoch": 0.9465791940018744,
1548
+ "grad_norm": 5.533719539642334,
1549
+ "learning_rate": 9.408388003748829e-06,
1550
+ "loss": 0.5313,
1551
+ "step": 2020
1552
+ },
1553
+ {
1554
+ "epoch": 0.9512652296157451,
1555
+ "grad_norm": 4.267172336578369,
1556
+ "learning_rate": 9.40545923149016e-06,
1557
+ "loss": 0.5172,
1558
+ "step": 2030
1559
+ },
1560
+ {
1561
+ "epoch": 0.9559512652296157,
1562
+ "grad_norm": 4.8553009033203125,
1563
+ "learning_rate": 9.402530459231491e-06,
1564
+ "loss": 0.5104,
1565
+ "step": 2040
1566
+ },
1567
+ {
1568
+ "epoch": 0.9606373008434864,
1569
+ "grad_norm": 6.460834503173828,
1570
+ "learning_rate": 9.399601686972821e-06,
1571
+ "loss": 0.5225,
1572
+ "step": 2050
1573
+ },
1574
+ {
1575
+ "epoch": 0.9653233364573571,
1576
+ "grad_norm": 27.46290397644043,
1577
+ "learning_rate": 9.396672914714152e-06,
1578
+ "loss": 0.544,
1579
+ "step": 2060
1580
+ },
1581
+ {
1582
+ "epoch": 0.9700093720712277,
1583
+ "grad_norm": 4.89717435836792,
1584
+ "learning_rate": 9.393744142455483e-06,
1585
+ "loss": 0.5653,
1586
+ "step": 2070
1587
+ },
1588
+ {
1589
+ "epoch": 0.9746954076850984,
1590
+ "grad_norm": 4.803583145141602,
1591
+ "learning_rate": 9.390815370196814e-06,
1592
+ "loss": 0.5739,
1593
+ "step": 2080
1594
+ },
1595
+ {
1596
+ "epoch": 0.979381443298969,
1597
+ "grad_norm": 4.121029853820801,
1598
+ "learning_rate": 9.387886597938146e-06,
1599
+ "loss": 0.5192,
1600
+ "step": 2090
1601
+ },
1602
+ {
1603
+ "epoch": 0.9840674789128397,
1604
+ "grad_norm": 4.464984893798828,
1605
+ "learning_rate": 9.384957825679475e-06,
1606
+ "loss": 0.5393,
1607
+ "step": 2100
1608
+ },
1609
+ {
1610
+ "epoch": 0.9887535145267105,
1611
+ "grad_norm": 6.364498615264893,
1612
+ "learning_rate": 9.382029053420806e-06,
1613
+ "loss": 0.5764,
1614
+ "step": 2110
1615
+ },
1616
+ {
1617
+ "epoch": 0.993439550140581,
1618
+ "grad_norm": 3.743790864944458,
1619
+ "learning_rate": 9.379100281162138e-06,
1620
+ "loss": 0.5276,
1621
+ "step": 2120
1622
+ },
1623
+ {
1624
+ "epoch": 0.9981255857544518,
1625
+ "grad_norm": 4.737389087677002,
1626
+ "learning_rate": 9.376171508903469e-06,
1627
+ "loss": 0.5211,
1628
+ "step": 2130
1629
+ },
1630
+ {
1631
+ "epoch": 1.0028116213683225,
1632
+ "grad_norm": 3.622758626937866,
1633
+ "learning_rate": 9.3732427366448e-06,
1634
+ "loss": 0.5329,
1635
+ "step": 2140
1636
+ },
1637
+ {
1638
+ "epoch": 1.007497656982193,
1639
+ "grad_norm": 3.5359978675842285,
1640
+ "learning_rate": 9.37031396438613e-06,
1641
+ "loss": 0.4941,
1642
+ "step": 2150
1643
+ },
1644
+ {
1645
+ "epoch": 1.0121836925960637,
1646
+ "grad_norm": 4.669582843780518,
1647
+ "learning_rate": 9.36738519212746e-06,
1648
+ "loss": 0.4821,
1649
+ "step": 2160
1650
+ },
1651
+ {
1652
+ "epoch": 1.0168697282099344,
1653
+ "grad_norm": 3.767122507095337,
1654
+ "learning_rate": 9.364456419868792e-06,
1655
+ "loss": 0.4886,
1656
+ "step": 2170
1657
+ },
1658
+ {
1659
+ "epoch": 1.021555763823805,
1660
+ "grad_norm": 3.9681687355041504,
1661
+ "learning_rate": 9.361527647610123e-06,
1662
+ "loss": 0.493,
1663
+ "step": 2180
1664
+ },
1665
+ {
1666
+ "epoch": 1.0262417994376758,
1667
+ "grad_norm": 3.389897108078003,
1668
+ "learning_rate": 9.358598875351454e-06,
1669
+ "loss": 0.4688,
1670
+ "step": 2190
1671
+ },
1672
+ {
1673
+ "epoch": 1.0309278350515463,
1674
+ "grad_norm": 3.5152347087860107,
1675
+ "learning_rate": 9.355670103092784e-06,
1676
+ "loss": 0.4625,
1677
+ "step": 2200
1678
+ },
1679
+ {
1680
+ "epoch": 1.035613870665417,
1681
+ "grad_norm": 3.23901629447937,
1682
+ "learning_rate": 9.352741330834115e-06,
1683
+ "loss": 0.5143,
1684
+ "step": 2210
1685
+ },
1686
+ {
1687
+ "epoch": 1.0402999062792877,
1688
+ "grad_norm": 4.617633819580078,
1689
+ "learning_rate": 9.349812558575446e-06,
1690
+ "loss": 0.4732,
1691
+ "step": 2220
1692
+ },
1693
+ {
1694
+ "epoch": 1.0449859418931584,
1695
+ "grad_norm": 5.245469570159912,
1696
+ "learning_rate": 9.346883786316777e-06,
1697
+ "loss": 0.5213,
1698
+ "step": 2230
1699
+ },
1700
+ {
1701
+ "epoch": 1.0496719775070291,
1702
+ "grad_norm": 4.20419454574585,
1703
+ "learning_rate": 9.343955014058108e-06,
1704
+ "loss": 0.5042,
1705
+ "step": 2240
1706
+ },
1707
+ {
1708
+ "epoch": 1.0543580131208996,
1709
+ "grad_norm": 4.6322102546691895,
1710
+ "learning_rate": 9.341026241799438e-06,
1711
+ "loss": 0.4982,
1712
+ "step": 2250
1713
+ },
1714
+ {
1715
+ "epoch": 1.0543580131208996,
1716
+ "eval_loss": 0.05779802054166794,
1717
+ "eval_pearson_cosine": 0.7770314842083366,
1718
+ "eval_pearson_dot": 0.6498110843024136,
1719
+ "eval_pearson_euclidean": 0.7709013065859232,
1720
+ "eval_pearson_manhattan": 0.7695278239114174,
1721
+ "eval_runtime": 48.4856,
1722
+ "eval_samples_per_second": 30.937,
1723
+ "eval_spearman_cosine": 0.7783328375480574,
1724
+ "eval_spearman_dot": 0.6551905692522538,
1725
+ "eval_spearman_euclidean": 0.7802862933680744,
1726
+ "eval_spearman_manhattan": 0.7790525675974715,
1727
+ "eval_steps_per_second": 30.937,
1728
+ "step": 2250
1729
+ },
1730
+ {
1731
+ "epoch": 1.0590440487347703,
1732
+ "grad_norm": 4.474431991577148,
1733
+ "learning_rate": 9.33809746954077e-06,
1734
+ "loss": 0.5227,
1735
+ "step": 2260
1736
+ },
1737
+ {
1738
+ "epoch": 1.063730084348641,
1739
+ "grad_norm": 4.538947105407715,
1740
+ "learning_rate": 9.3351686972821e-06,
1741
+ "loss": 0.5158,
1742
+ "step": 2270
1743
+ },
1744
+ {
1745
+ "epoch": 1.0684161199625117,
1746
+ "grad_norm": 6.6143693923950195,
1747
+ "learning_rate": 9.332239925023432e-06,
1748
+ "loss": 0.461,
1749
+ "step": 2280
1750
+ },
1751
+ {
1752
+ "epoch": 1.0731021555763824,
1753
+ "grad_norm": 4.316189765930176,
1754
+ "learning_rate": 9.329311152764761e-06,
1755
+ "loss": 0.5079,
1756
+ "step": 2290
1757
+ },
1758
+ {
1759
+ "epoch": 1.077788191190253,
1760
+ "grad_norm": 4.054687976837158,
1761
+ "learning_rate": 9.326382380506092e-06,
1762
+ "loss": 0.5022,
1763
+ "step": 2300
1764
+ },
1765
+ {
1766
+ "epoch": 1.0824742268041236,
1767
+ "grad_norm": 4.232051849365234,
1768
+ "learning_rate": 9.323453608247423e-06,
1769
+ "loss": 0.5096,
1770
+ "step": 2310
1771
+ },
1772
+ {
1773
+ "epoch": 1.0871602624179943,
1774
+ "grad_norm": 3.7785236835479736,
1775
+ "learning_rate": 9.320524835988755e-06,
1776
+ "loss": 0.4614,
1777
+ "step": 2320
1778
+ },
1779
+ {
1780
+ "epoch": 1.091846298031865,
1781
+ "grad_norm": 4.865905284881592,
1782
+ "learning_rate": 9.317596063730086e-06,
1783
+ "loss": 0.5135,
1784
+ "step": 2330
1785
+ },
1786
+ {
1787
+ "epoch": 1.0965323336457358,
1788
+ "grad_norm": 4.681485176086426,
1789
+ "learning_rate": 9.314667291471417e-06,
1790
+ "loss": 0.5061,
1791
+ "step": 2340
1792
+ },
1793
+ {
1794
+ "epoch": 1.1012183692596063,
1795
+ "grad_norm": 4.256619453430176,
1796
+ "learning_rate": 9.311738519212747e-06,
1797
+ "loss": 0.4627,
1798
+ "step": 2350
1799
+ },
1800
+ {
1801
+ "epoch": 1.105904404873477,
1802
+ "grad_norm": 4.459606170654297,
1803
+ "learning_rate": 9.308809746954078e-06,
1804
+ "loss": 0.5171,
1805
+ "step": 2360
1806
+ },
1807
+ {
1808
+ "epoch": 1.1105904404873477,
1809
+ "grad_norm": 4.008665084838867,
1810
+ "learning_rate": 9.305880974695409e-06,
1811
+ "loss": 0.4422,
1812
+ "step": 2370
1813
+ },
1814
+ {
1815
+ "epoch": 1.1152764761012184,
1816
+ "grad_norm": 3.674177885055542,
1817
+ "learning_rate": 9.302952202436738e-06,
1818
+ "loss": 0.5233,
1819
+ "step": 2380
1820
+ },
1821
+ {
1822
+ "epoch": 1.119962511715089,
1823
+ "grad_norm": 4.463940620422363,
1824
+ "learning_rate": 9.30002343017807e-06,
1825
+ "loss": 0.4731,
1826
+ "step": 2390
1827
+ },
1828
+ {
1829
+ "epoch": 1.1246485473289598,
1830
+ "grad_norm": 3.9289097785949707,
1831
+ "learning_rate": 9.2970946579194e-06,
1832
+ "loss": 0.4869,
1833
+ "step": 2400
1834
+ },
1835
+ {
1836
+ "epoch": 1.1293345829428303,
1837
+ "grad_norm": 4.097565174102783,
1838
+ "learning_rate": 9.294165885660732e-06,
1839
+ "loss": 0.4594,
1840
+ "step": 2410
1841
+ },
1842
+ {
1843
+ "epoch": 1.134020618556701,
1844
+ "grad_norm": 4.55318546295166,
1845
+ "learning_rate": 9.291237113402063e-06,
1846
+ "loss": 0.494,
1847
+ "step": 2420
1848
+ },
1849
+ {
1850
+ "epoch": 1.1387066541705717,
1851
+ "grad_norm": 4.425617694854736,
1852
+ "learning_rate": 9.288308341143394e-06,
1853
+ "loss": 0.4829,
1854
+ "step": 2430
1855
+ },
1856
+ {
1857
+ "epoch": 1.1433926897844424,
1858
+ "grad_norm": 3.908015489578247,
1859
+ "learning_rate": 9.285379568884726e-06,
1860
+ "loss": 0.4793,
1861
+ "step": 2440
1862
+ },
1863
+ {
1864
+ "epoch": 1.148078725398313,
1865
+ "grad_norm": 3.7293996810913086,
1866
+ "learning_rate": 9.282450796626055e-06,
1867
+ "loss": 0.5399,
1868
+ "step": 2450
1869
+ },
1870
+ {
1871
+ "epoch": 1.1527647610121836,
1872
+ "grad_norm": 4.584681034088135,
1873
+ "learning_rate": 9.279522024367386e-06,
1874
+ "loss": 0.4479,
1875
+ "step": 2460
1876
+ },
1877
+ {
1878
+ "epoch": 1.1574507966260543,
1879
+ "grad_norm": 4.109914302825928,
1880
+ "learning_rate": 9.276593252108716e-06,
1881
+ "loss": 0.4599,
1882
+ "step": 2470
1883
+ },
1884
+ {
1885
+ "epoch": 1.162136832239925,
1886
+ "grad_norm": 4.446422100067139,
1887
+ "learning_rate": 9.273664479850047e-06,
1888
+ "loss": 0.4727,
1889
+ "step": 2480
1890
+ },
1891
+ {
1892
+ "epoch": 1.1668228678537957,
1893
+ "grad_norm": 5.975160598754883,
1894
+ "learning_rate": 9.270735707591378e-06,
1895
+ "loss": 0.4509,
1896
+ "step": 2490
1897
+ },
1898
+ {
1899
+ "epoch": 1.1715089034676662,
1900
+ "grad_norm": 4.379275321960449,
1901
+ "learning_rate": 9.26780693533271e-06,
1902
+ "loss": 0.4828,
1903
+ "step": 2500
1904
+ },
1905
+ {
1906
+ "epoch": 1.1715089034676662,
1907
+ "eval_loss": 0.05214480683207512,
1908
+ "eval_pearson_cosine": 0.7792755247272061,
1909
+ "eval_pearson_dot": 0.6569300577465214,
1910
+ "eval_pearson_euclidean": 0.7718322585231894,
1911
+ "eval_pearson_manhattan": 0.7703922250718165,
1912
+ "eval_runtime": 47.8089,
1913
+ "eval_samples_per_second": 31.375,
1914
+ "eval_spearman_cosine": 0.7799819701975583,
1915
+ "eval_spearman_dot": 0.662507389274304,
1916
+ "eval_spearman_euclidean": 0.7818437831063969,
1917
+ "eval_spearman_manhattan": 0.7805341558401507,
1918
+ "eval_steps_per_second": 31.375,
1919
+ "step": 2500
1920
+ },
1921
+ {
1922
+ "epoch": 1.176194939081537,
1923
+ "grad_norm": 3.5287399291992188,
1924
+ "learning_rate": 9.26487816307404e-06,
1925
+ "loss": 0.4591,
1926
+ "step": 2510
1927
+ },
1928
+ {
1929
+ "epoch": 1.1808809746954076,
1930
+ "grad_norm": 3.277655601501465,
1931
+ "learning_rate": 9.261949390815372e-06,
1932
+ "loss": 0.4479,
1933
+ "step": 2520
1934
+ },
1935
+ {
1936
+ "epoch": 1.1855670103092784,
1937
+ "grad_norm": 4.732039451599121,
1938
+ "learning_rate": 9.259020618556703e-06,
1939
+ "loss": 0.461,
1940
+ "step": 2530
1941
+ },
1942
+ {
1943
+ "epoch": 1.190253045923149,
1944
+ "grad_norm": 4.4760966300964355,
1945
+ "learning_rate": 9.256091846298032e-06,
1946
+ "loss": 0.4652,
1947
+ "step": 2540
1948
+ },
1949
+ {
1950
+ "epoch": 1.1949390815370198,
1951
+ "grad_norm": 7.485498428344727,
1952
+ "learning_rate": 9.253163074039364e-06,
1953
+ "loss": 0.4779,
1954
+ "step": 2550
1955
+ },
1956
+ {
1957
+ "epoch": 1.1996251171508903,
1958
+ "grad_norm": 3.9956140518188477,
1959
+ "learning_rate": 9.250234301780693e-06,
1960
+ "loss": 0.4567,
1961
+ "step": 2560
1962
+ },
1963
+ {
1964
+ "epoch": 1.204311152764761,
1965
+ "grad_norm": 3.547563314437866,
1966
+ "learning_rate": 9.247305529522024e-06,
1967
+ "loss": 0.4988,
1968
+ "step": 2570
1969
+ },
1970
+ {
1971
+ "epoch": 1.2089971883786317,
1972
+ "grad_norm": 5.354389667510986,
1973
+ "learning_rate": 9.244376757263355e-06,
1974
+ "loss": 0.464,
1975
+ "step": 2580
1976
+ },
1977
+ {
1978
+ "epoch": 1.2136832239925024,
1979
+ "grad_norm": 3.791760206222534,
1980
+ "learning_rate": 9.241447985004687e-06,
1981
+ "loss": 0.4441,
1982
+ "step": 2590
1983
+ },
1984
+ {
1985
+ "epoch": 1.218369259606373,
1986
+ "grad_norm": 4.77889347076416,
1987
+ "learning_rate": 9.238519212746018e-06,
1988
+ "loss": 0.4655,
1989
+ "step": 2600
1990
+ },
1991
+ {
1992
+ "epoch": 1.2230552952202436,
1993
+ "grad_norm": 5.804917335510254,
1994
+ "learning_rate": 9.235590440487349e-06,
1995
+ "loss": 0.4912,
1996
+ "step": 2610
1997
+ },
1998
+ {
1999
+ "epoch": 1.2277413308341143,
2000
+ "grad_norm": 3.841860771179199,
2001
+ "learning_rate": 9.23266166822868e-06,
2002
+ "loss": 0.472,
2003
+ "step": 2620
2004
+ },
2005
+ {
2006
+ "epoch": 1.232427366447985,
2007
+ "grad_norm": 4.4197540283203125,
2008
+ "learning_rate": 9.22973289597001e-06,
2009
+ "loss": 0.4821,
2010
+ "step": 2630
2011
+ },
2012
+ {
2013
+ "epoch": 1.2371134020618557,
2014
+ "grad_norm": 5.844490051269531,
2015
+ "learning_rate": 9.226804123711341e-06,
2016
+ "loss": 0.5655,
2017
+ "step": 2640
2018
+ },
2019
+ {
2020
+ "epoch": 1.2417994376757264,
2021
+ "grad_norm": 3.5442116260528564,
2022
+ "learning_rate": 9.223875351452672e-06,
2023
+ "loss": 0.4532,
2024
+ "step": 2650
2025
+ },
2026
+ {
2027
+ "epoch": 1.246485473289597,
2028
+ "grad_norm": 5.259571075439453,
2029
+ "learning_rate": 9.220946579194002e-06,
2030
+ "loss": 0.4856,
2031
+ "step": 2660
2032
+ },
2033
+ {
2034
+ "epoch": 1.2511715089034676,
2035
+ "grad_norm": 4.675846576690674,
2036
+ "learning_rate": 9.218017806935333e-06,
2037
+ "loss": 0.4576,
2038
+ "step": 2670
2039
+ },
2040
+ {
2041
+ "epoch": 1.2558575445173383,
2042
+ "grad_norm": 5.236482620239258,
2043
+ "learning_rate": 9.215089034676664e-06,
2044
+ "loss": 0.513,
2045
+ "step": 2680
2046
+ },
2047
+ {
2048
+ "epoch": 1.260543580131209,
2049
+ "grad_norm": 4.658278465270996,
2050
+ "learning_rate": 9.212160262417995e-06,
2051
+ "loss": 0.4734,
2052
+ "step": 2690
2053
+ },
2054
+ {
2055
+ "epoch": 1.2652296157450795,
2056
+ "grad_norm": 3.7085494995117188,
2057
+ "learning_rate": 9.209231490159326e-06,
2058
+ "loss": 0.5279,
2059
+ "step": 2700
2060
+ },
2061
+ {
2062
+ "epoch": 1.2699156513589505,
2063
+ "grad_norm": 3.4627673625946045,
2064
+ "learning_rate": 9.206302717900658e-06,
2065
+ "loss": 0.4773,
2066
+ "step": 2710
2067
+ },
2068
+ {
2069
+ "epoch": 1.274601686972821,
2070
+ "grad_norm": 4.618409633636475,
2071
+ "learning_rate": 9.203373945641987e-06,
2072
+ "loss": 0.4354,
2073
+ "step": 2720
2074
+ },
2075
+ {
2076
+ "epoch": 1.2792877225866917,
2077
+ "grad_norm": 3.1090590953826904,
2078
+ "learning_rate": 9.200445173383318e-06,
2079
+ "loss": 0.4409,
2080
+ "step": 2730
2081
+ },
2082
+ {
2083
+ "epoch": 1.2839737582005624,
2084
+ "grad_norm": 4.328725337982178,
2085
+ "learning_rate": 9.19751640112465e-06,
2086
+ "loss": 0.4799,
2087
+ "step": 2740
2088
+ },
2089
+ {
2090
+ "epoch": 1.2886597938144329,
2091
+ "grad_norm": 3.8362419605255127,
2092
+ "learning_rate": 9.194587628865979e-06,
2093
+ "loss": 0.5062,
2094
+ "step": 2750
2095
+ },
2096
+ {
2097
+ "epoch": 1.2886597938144329,
2098
+ "eval_loss": 0.05263364687561989,
2099
+ "eval_pearson_cosine": 0.7755555336434341,
2100
+ "eval_pearson_dot": 0.6502184577290961,
2101
+ "eval_pearson_euclidean": 0.7709853609297426,
2102
+ "eval_pearson_manhattan": 0.769572635033791,
2103
+ "eval_runtime": 44.8508,
2104
+ "eval_samples_per_second": 33.444,
2105
+ "eval_spearman_cosine": 0.7765036654281985,
2106
+ "eval_spearman_dot": 0.6558936409143281,
2107
+ "eval_spearman_euclidean": 0.7808945633743188,
2108
+ "eval_spearman_manhattan": 0.7795729380744477,
2109
+ "eval_steps_per_second": 33.444,
2110
+ "step": 2750
2111
+ },
2112
+ {
2113
+ "epoch": 1.2933458294283038,
2114
+ "grad_norm": 3.6972432136535645,
2115
+ "learning_rate": 9.19165885660731e-06,
2116
+ "loss": 0.488,
2117
+ "step": 2760
2118
+ },
2119
+ {
2120
+ "epoch": 1.2980318650421743,
2121
+ "grad_norm": 6.73103141784668,
2122
+ "learning_rate": 9.188730084348641e-06,
2123
+ "loss": 0.4553,
2124
+ "step": 2770
2125
+ },
2126
+ {
2127
+ "epoch": 1.302717900656045,
2128
+ "grad_norm": 4.371028423309326,
2129
+ "learning_rate": 9.185801312089973e-06,
2130
+ "loss": 0.4555,
2131
+ "step": 2780
2132
+ },
2133
+ {
2134
+ "epoch": 1.3074039362699157,
2135
+ "grad_norm": 3.4788401126861572,
2136
+ "learning_rate": 9.182872539831304e-06,
2137
+ "loss": 0.4561,
2138
+ "step": 2790
2139
+ },
2140
+ {
2141
+ "epoch": 1.3120899718837864,
2142
+ "grad_norm": 3.832277774810791,
2143
+ "learning_rate": 9.179943767572635e-06,
2144
+ "loss": 0.4838,
2145
+ "step": 2800
2146
+ },
2147
+ {
2148
+ "epoch": 1.316776007497657,
2149
+ "grad_norm": 3.5579423904418945,
2150
+ "learning_rate": 9.177014995313966e-06,
2151
+ "loss": 0.4404,
2152
+ "step": 2810
2153
+ },
2154
+ {
2155
+ "epoch": 1.3214620431115276,
2156
+ "grad_norm": 3.7768073081970215,
2157
+ "learning_rate": 9.174086223055296e-06,
2158
+ "loss": 0.4724,
2159
+ "step": 2820
2160
+ },
2161
+ {
2162
+ "epoch": 1.3261480787253983,
2163
+ "grad_norm": 3.957035779953003,
2164
+ "learning_rate": 9.171157450796627e-06,
2165
+ "loss": 0.471,
2166
+ "step": 2830
2167
+ },
2168
+ {
2169
+ "epoch": 1.330834114339269,
2170
+ "grad_norm": 3.6035895347595215,
2171
+ "learning_rate": 9.168228678537958e-06,
2172
+ "loss": 0.4645,
2173
+ "step": 2840
2174
+ },
2175
+ {
2176
+ "epoch": 1.3355201499531397,
2177
+ "grad_norm": 4.358327388763428,
2178
+ "learning_rate": 9.165299906279288e-06,
2179
+ "loss": 0.4301,
2180
+ "step": 2850
2181
+ },
2182
+ {
2183
+ "epoch": 1.3402061855670104,
2184
+ "grad_norm": 3.4666709899902344,
2185
+ "learning_rate": 9.162371134020619e-06,
2186
+ "loss": 0.4508,
2187
+ "step": 2860
2188
+ },
2189
+ {
2190
+ "epoch": 1.344892221180881,
2191
+ "grad_norm": 3.912290096282959,
2192
+ "learning_rate": 9.15944236176195e-06,
2193
+ "loss": 0.4379,
2194
+ "step": 2870
2195
+ },
2196
+ {
2197
+ "epoch": 1.3495782567947516,
2198
+ "grad_norm": 4.305796146392822,
2199
+ "learning_rate": 9.156513589503281e-06,
2200
+ "loss": 0.4194,
2201
+ "step": 2880
2202
+ },
2203
+ {
2204
+ "epoch": 1.3542642924086223,
2205
+ "grad_norm": 4.231681823730469,
2206
+ "learning_rate": 9.153584817244612e-06,
2207
+ "loss": 0.4017,
2208
+ "step": 2890
2209
+ },
2210
+ {
2211
+ "epoch": 1.358950328022493,
2212
+ "grad_norm": 4.43821382522583,
2213
+ "learning_rate": 9.150656044985944e-06,
2214
+ "loss": 0.4185,
2215
+ "step": 2900
2216
+ },
2217
+ {
2218
+ "epoch": 1.3636363636363638,
2219
+ "grad_norm": 4.922164440155029,
2220
+ "learning_rate": 9.147727272727273e-06,
2221
+ "loss": 0.5199,
2222
+ "step": 2910
2223
+ },
2224
+ {
2225
+ "epoch": 1.3683223992502342,
2226
+ "grad_norm": 4.577489852905273,
2227
+ "learning_rate": 9.144798500468604e-06,
2228
+ "loss": 0.4237,
2229
+ "step": 2920
2230
+ },
2231
+ {
2232
+ "epoch": 1.373008434864105,
2233
+ "grad_norm": 3.9537651538848877,
2234
+ "learning_rate": 9.141869728209935e-06,
2235
+ "loss": 0.4888,
2236
+ "step": 2930
2237
+ },
2238
+ {
2239
+ "epoch": 1.3776944704779757,
2240
+ "grad_norm": 4.165870189666748,
2241
+ "learning_rate": 9.138940955951267e-06,
2242
+ "loss": 0.4476,
2243
+ "step": 2940
2244
+ },
2245
+ {
2246
+ "epoch": 1.3823805060918464,
2247
+ "grad_norm": 4.492893218994141,
2248
+ "learning_rate": 9.136012183692596e-06,
2249
+ "loss": 0.5159,
2250
+ "step": 2950
2251
+ },
2252
+ {
2253
+ "epoch": 1.387066541705717,
2254
+ "grad_norm": 3.847490072250366,
2255
+ "learning_rate": 9.133083411433927e-06,
2256
+ "loss": 0.4497,
2257
+ "step": 2960
2258
+ },
2259
+ {
2260
+ "epoch": 1.3917525773195876,
2261
+ "grad_norm": 6.766137599945068,
2262
+ "learning_rate": 9.130154639175258e-06,
2263
+ "loss": 0.4379,
2264
+ "step": 2970
2265
+ },
2266
+ {
2267
+ "epoch": 1.3964386129334583,
2268
+ "grad_norm": 3.9198007583618164,
2269
+ "learning_rate": 9.12722586691659e-06,
2270
+ "loss": 0.4519,
2271
+ "step": 2980
2272
+ },
2273
+ {
2274
+ "epoch": 1.401124648547329,
2275
+ "grad_norm": 3.67480731010437,
2276
+ "learning_rate": 9.124297094657921e-06,
2277
+ "loss": 0.4108,
2278
+ "step": 2990
2279
+ },
2280
+ {
2281
+ "epoch": 1.4058106841611997,
2282
+ "grad_norm": 3.3013832569122314,
2283
+ "learning_rate": 9.12136832239925e-06,
2284
+ "loss": 0.433,
2285
+ "step": 3000
2286
+ },
2287
+ {
2288
+ "epoch": 1.4058106841611997,
2289
+ "eval_loss": 0.0497601218521595,
2290
+ "eval_pearson_cosine": 0.7834985989633054,
2291
+ "eval_pearson_dot": 0.6669524421664974,
2292
+ "eval_pearson_euclidean": 0.7743874834934843,
2293
+ "eval_pearson_manhattan": 0.7730376146204847,
2294
+ "eval_runtime": 47.8141,
2295
+ "eval_samples_per_second": 31.371,
2296
+ "eval_spearman_cosine": 0.7845889452017747,
2297
+ "eval_spearman_dot": 0.6729435548765089,
2298
+ "eval_spearman_euclidean": 0.784591658726837,
2299
+ "eval_spearman_manhattan": 0.7832975474858643,
2300
+ "eval_steps_per_second": 31.371,
2301
+ "step": 3000
2302
  }
2303
  ],
2304
  "logging_steps": 10,