CocoRoF commited on
Commit
6313304
·
verified ·
1 Parent(s): 2a6e3ef

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba21b9e70b08c8157a0be6f7f43a461e7115ad8add195cfd8d7ecd41a4a559ba
3
  size 738367848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719ef6a0a30163105c21960f0dd8b4a42fea79f576ca2432e13b51c25b82ea80
3
  size 738367848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f32f2ca1f450a8a345cef7a5a79a872c2bc4a1ba658bcfe499421b034c07bf73
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6e48bb60ca108c3441e9bb2125a1aa87430a3765f91e3afbcc5c959898ed887
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ca51d4b33edcedf9568d5202767b896d828b5aeca18f2cdd82617688464b784
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d1be83d8248a4d086961979df8c8adf273c0891e791d7b637d9e752cbaf971
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10e6ea705ea5a1704cd5773090c827a2013c8caab967a116ff24a5f57ce3ce90
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df16bc8587b83b59d73ffcb4774bab640ed2bbf6249aba7b7112751df7280b58
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6ff0b4da12dd0cdcb6e90b04160e41685d9ccc1fa1cc74bb7949edf700200d4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ebd66766c47747d9d34f4ee4e6f1e09fb1843f9769ec17242277c256d80133
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:322470b09ac4f5d9443d55c37c8b8e7d0e8a1702208c81e52e3a58a8de515b5b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44591b56351d86ebac6b6310a6b9a58bf9ebd5af691efd9614e457180a22080
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ababe8505205ca2bcb959a2abbe2fbc8b6ad677bd43b1f2ee9055b3cb400b061
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2488c2baf1f7983e7e82c869c2ff023bdc7796ba97390c46686a4df8544a046
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed1c3ba656fdb40a72824e366a08e148a30e1089a6ecd019eaf28fa4a17859fa
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a864d68e543f00211ae2c48a5b9f47a92cf862dc03f0cda64f0647177108efe6
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:042c62210b9e1c9ed394e0a3362b1c773c07591d94f2716a8e928676134742b7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ab980d3568f3d6a91c3cc4b09b1c84c8bbbd77347d21d918824619ddb9bc7f
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb54f0278c663494261026658652f845bae43245e75ccc213c6897de179f542a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a7008a4087300200a04419d46f39b98daf870297f179e965bf970ef908f90f3
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fe8eacf1bb6bef5171d133da4e7818c1e7ae0192bd879d86e40a8d923aeb81f
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6c54bb82c2097d343d35db0c4f1a616af19cd7b594e6327d1786008f1daedc5
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8744142455482662,
5
  "eval_steps": 250,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1535,6 +1535,770 @@
1535
  "eval_spearman_manhattan": 0.7747616492851076,
1536
  "eval_steps_per_second": 8.47,
1537
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1538
  }
1539
  ],
1540
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.8116213683223994,
5
  "eval_steps": 250,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1535
  "eval_spearman_manhattan": 0.7747616492851076,
1536
  "eval_steps_per_second": 8.47,
1537
  "step": 2000
1538
+ },
1539
+ {
1540
+ "epoch": 1.8837863167760074,
1541
+ "grad_norm": 1.116107702255249,
1542
+ "learning_rate": 9.985283134911919e-06,
1543
+ "loss": 0.1775,
1544
+ "step": 2010
1545
+ },
1546
+ {
1547
+ "epoch": 1.8931583880037488,
1548
+ "grad_norm": 1.280927300453186,
1549
+ "learning_rate": 9.985209916677651e-06,
1550
+ "loss": 0.1853,
1551
+ "step": 2020
1552
+ },
1553
+ {
1554
+ "epoch": 1.9025304592314902,
1555
+ "grad_norm": 1.419044852256775,
1556
+ "learning_rate": 9.98513669844338e-06,
1557
+ "loss": 0.1767,
1558
+ "step": 2030
1559
+ },
1560
+ {
1561
+ "epoch": 1.9119025304592316,
1562
+ "grad_norm": 1.4140015840530396,
1563
+ "learning_rate": 9.985063480209111e-06,
1564
+ "loss": 0.1968,
1565
+ "step": 2040
1566
+ },
1567
+ {
1568
+ "epoch": 1.9212746016869728,
1569
+ "grad_norm": 1.23015296459198,
1570
+ "learning_rate": 9.984990261974844e-06,
1571
+ "loss": 0.1559,
1572
+ "step": 2050
1573
+ },
1574
+ {
1575
+ "epoch": 1.930646672914714,
1576
+ "grad_norm": 1.4209731817245483,
1577
+ "learning_rate": 9.984917043740574e-06,
1578
+ "loss": 0.18,
1579
+ "step": 2060
1580
+ },
1581
+ {
1582
+ "epoch": 1.9400187441424555,
1583
+ "grad_norm": 1.5270899534225464,
1584
+ "learning_rate": 9.984843825506305e-06,
1585
+ "loss": 0.1858,
1586
+ "step": 2070
1587
+ },
1588
+ {
1589
+ "epoch": 1.9493908153701969,
1590
+ "grad_norm": 2.0037920475006104,
1591
+ "learning_rate": 9.984770607272036e-06,
1592
+ "loss": 0.1812,
1593
+ "step": 2080
1594
+ },
1595
+ {
1596
+ "epoch": 1.9587628865979383,
1597
+ "grad_norm": 1.4397103786468506,
1598
+ "learning_rate": 9.984697389037767e-06,
1599
+ "loss": 0.1853,
1600
+ "step": 2090
1601
+ },
1602
+ {
1603
+ "epoch": 1.9681349578256795,
1604
+ "grad_norm": 1.555161476135254,
1605
+ "learning_rate": 9.984624170803497e-06,
1606
+ "loss": 0.1758,
1607
+ "step": 2100
1608
+ },
1609
+ {
1610
+ "epoch": 1.9775070290534207,
1611
+ "grad_norm": 1.1453354358673096,
1612
+ "learning_rate": 9.984550952569228e-06,
1613
+ "loss": 0.1821,
1614
+ "step": 2110
1615
+ },
1616
+ {
1617
+ "epoch": 1.986879100281162,
1618
+ "grad_norm": 1.3050484657287598,
1619
+ "learning_rate": 9.984477734334959e-06,
1620
+ "loss": 0.1828,
1621
+ "step": 2120
1622
+ },
1623
+ {
1624
+ "epoch": 1.9962511715089035,
1625
+ "grad_norm": 1.1858463287353516,
1626
+ "learning_rate": 9.984404516100691e-06,
1627
+ "loss": 0.1801,
1628
+ "step": 2130
1629
+ },
1630
+ {
1631
+ "epoch": 2.005623242736645,
1632
+ "grad_norm": 1.2467753887176514,
1633
+ "learning_rate": 9.984331297866422e-06,
1634
+ "loss": 0.1651,
1635
+ "step": 2140
1636
+ },
1637
+ {
1638
+ "epoch": 2.014995313964386,
1639
+ "grad_norm": 1.9730074405670166,
1640
+ "learning_rate": 9.984258079632151e-06,
1641
+ "loss": 0.1654,
1642
+ "step": 2150
1643
+ },
1644
+ {
1645
+ "epoch": 2.0243673851921273,
1646
+ "grad_norm": 1.384181261062622,
1647
+ "learning_rate": 9.984184861397884e-06,
1648
+ "loss": 0.151,
1649
+ "step": 2160
1650
+ },
1651
+ {
1652
+ "epoch": 2.0337394564198688,
1653
+ "grad_norm": 1.2262136936187744,
1654
+ "learning_rate": 9.984111643163614e-06,
1655
+ "loss": 0.1338,
1656
+ "step": 2170
1657
+ },
1658
+ {
1659
+ "epoch": 2.04311152764761,
1660
+ "grad_norm": 1.3417856693267822,
1661
+ "learning_rate": 9.984038424929345e-06,
1662
+ "loss": 0.1445,
1663
+ "step": 2180
1664
+ },
1665
+ {
1666
+ "epoch": 2.0524835988753516,
1667
+ "grad_norm": 1.3032526969909668,
1668
+ "learning_rate": 9.983965206695076e-06,
1669
+ "loss": 0.1675,
1670
+ "step": 2190
1671
+ },
1672
+ {
1673
+ "epoch": 2.0618556701030926,
1674
+ "grad_norm": 1.4586397409439087,
1675
+ "learning_rate": 9.983891988460808e-06,
1676
+ "loss": 0.1503,
1677
+ "step": 2200
1678
+ },
1679
+ {
1680
+ "epoch": 2.071227741330834,
1681
+ "grad_norm": 1.8017582893371582,
1682
+ "learning_rate": 9.983818770226537e-06,
1683
+ "loss": 0.1614,
1684
+ "step": 2210
1685
+ },
1686
+ {
1687
+ "epoch": 2.0805998125585754,
1688
+ "grad_norm": 1.1136542558670044,
1689
+ "learning_rate": 9.983745551992268e-06,
1690
+ "loss": 0.1385,
1691
+ "step": 2220
1692
+ },
1693
+ {
1694
+ "epoch": 2.089971883786317,
1695
+ "grad_norm": 1.48130202293396,
1696
+ "learning_rate": 9.983672333758e-06,
1697
+ "loss": 0.1448,
1698
+ "step": 2230
1699
+ },
1700
+ {
1701
+ "epoch": 2.0993439550140582,
1702
+ "grad_norm": 1.1847114562988281,
1703
+ "learning_rate": 9.983599115523731e-06,
1704
+ "loss": 0.1263,
1705
+ "step": 2240
1706
+ },
1707
+ {
1708
+ "epoch": 2.108716026241799,
1709
+ "grad_norm": 1.068515419960022,
1710
+ "learning_rate": 9.983525897289462e-06,
1711
+ "loss": 0.1448,
1712
+ "step": 2250
1713
+ },
1714
+ {
1715
+ "epoch": 2.108716026241799,
1716
+ "eval_loss": 0.0637284442782402,
1717
+ "eval_pearson_cosine": 0.766581654548645,
1718
+ "eval_pearson_dot": 0.652958333492279,
1719
+ "eval_pearson_euclidean": 0.76385897397995,
1720
+ "eval_pearson_manhattan": 0.7643536329269409,
1721
+ "eval_runtime": 24.9836,
1722
+ "eval_samples_per_second": 60.039,
1723
+ "eval_spearman_cosine": 0.7736502023043434,
1724
+ "eval_spearman_dot": 0.6506365364740643,
1725
+ "eval_spearman_euclidean": 0.7701725336122238,
1726
+ "eval_spearman_manhattan": 0.7705851416924343,
1727
+ "eval_steps_per_second": 7.525,
1728
+ "step": 2250
1729
+ },
1730
+ {
1731
+ "epoch": 2.1180880974695406,
1732
+ "grad_norm": 1.2607600688934326,
1733
+ "learning_rate": 9.983452679055193e-06,
1734
+ "loss": 0.1405,
1735
+ "step": 2260
1736
+ },
1737
+ {
1738
+ "epoch": 2.127460168697282,
1739
+ "grad_norm": 1.3096617460250854,
1740
+ "learning_rate": 9.983379460820924e-06,
1741
+ "loss": 0.159,
1742
+ "step": 2270
1743
+ },
1744
+ {
1745
+ "epoch": 2.1368322399250235,
1746
+ "grad_norm": 1.4220956563949585,
1747
+ "learning_rate": 9.983306242586654e-06,
1748
+ "loss": 0.1634,
1749
+ "step": 2280
1750
+ },
1751
+ {
1752
+ "epoch": 2.146204311152765,
1753
+ "grad_norm": 1.5565595626831055,
1754
+ "learning_rate": 9.983233024352385e-06,
1755
+ "loss": 0.1549,
1756
+ "step": 2290
1757
+ },
1758
+ {
1759
+ "epoch": 2.155576382380506,
1760
+ "grad_norm": 1.357906460762024,
1761
+ "learning_rate": 9.983159806118118e-06,
1762
+ "loss": 0.1503,
1763
+ "step": 2300
1764
+ },
1765
+ {
1766
+ "epoch": 2.1649484536082473,
1767
+ "grad_norm": 1.0181514024734497,
1768
+ "learning_rate": 9.983086587883848e-06,
1769
+ "loss": 0.1242,
1770
+ "step": 2310
1771
+ },
1772
+ {
1773
+ "epoch": 2.1743205248359887,
1774
+ "grad_norm": 1.2936785221099854,
1775
+ "learning_rate": 9.983013369649577e-06,
1776
+ "loss": 0.1516,
1777
+ "step": 2320
1778
+ },
1779
+ {
1780
+ "epoch": 2.18369259606373,
1781
+ "grad_norm": 1.353125810623169,
1782
+ "learning_rate": 9.98294015141531e-06,
1783
+ "loss": 0.1576,
1784
+ "step": 2330
1785
+ },
1786
+ {
1787
+ "epoch": 2.1930646672914715,
1788
+ "grad_norm": 1.5978926420211792,
1789
+ "learning_rate": 9.98286693318104e-06,
1790
+ "loss": 0.143,
1791
+ "step": 2340
1792
+ },
1793
+ {
1794
+ "epoch": 2.2024367385192125,
1795
+ "grad_norm": 1.643609642982483,
1796
+ "learning_rate": 9.982793714946771e-06,
1797
+ "loss": 0.1509,
1798
+ "step": 2350
1799
+ },
1800
+ {
1801
+ "epoch": 2.211808809746954,
1802
+ "grad_norm": 1.2868740558624268,
1803
+ "learning_rate": 9.982720496712502e-06,
1804
+ "loss": 0.1407,
1805
+ "step": 2360
1806
+ },
1807
+ {
1808
+ "epoch": 2.2211808809746953,
1809
+ "grad_norm": 1.662234902381897,
1810
+ "learning_rate": 9.982647278478233e-06,
1811
+ "loss": 0.1499,
1812
+ "step": 2370
1813
+ },
1814
+ {
1815
+ "epoch": 2.2305529522024368,
1816
+ "grad_norm": 1.7390748262405396,
1817
+ "learning_rate": 9.982574060243964e-06,
1818
+ "loss": 0.139,
1819
+ "step": 2380
1820
+ },
1821
+ {
1822
+ "epoch": 2.239925023430178,
1823
+ "grad_norm": 1.2645044326782227,
1824
+ "learning_rate": 9.982500842009694e-06,
1825
+ "loss": 0.1541,
1826
+ "step": 2390
1827
+ },
1828
+ {
1829
+ "epoch": 2.2492970946579196,
1830
+ "grad_norm": 1.5143808126449585,
1831
+ "learning_rate": 9.982427623775425e-06,
1832
+ "loss": 0.15,
1833
+ "step": 2400
1834
+ },
1835
+ {
1836
+ "epoch": 2.2586691658856606,
1837
+ "grad_norm": 1.516233205795288,
1838
+ "learning_rate": 9.982354405541158e-06,
1839
+ "loss": 0.1387,
1840
+ "step": 2410
1841
+ },
1842
+ {
1843
+ "epoch": 2.268041237113402,
1844
+ "grad_norm": 1.607926368713379,
1845
+ "learning_rate": 9.982281187306888e-06,
1846
+ "loss": 0.1459,
1847
+ "step": 2420
1848
+ },
1849
+ {
1850
+ "epoch": 2.2774133083411434,
1851
+ "grad_norm": 1.433325171470642,
1852
+ "learning_rate": 9.982207969072617e-06,
1853
+ "loss": 0.145,
1854
+ "step": 2430
1855
+ },
1856
+ {
1857
+ "epoch": 2.286785379568885,
1858
+ "grad_norm": 1.4051145315170288,
1859
+ "learning_rate": 9.98213475083835e-06,
1860
+ "loss": 0.1433,
1861
+ "step": 2440
1862
+ },
1863
+ {
1864
+ "epoch": 2.296157450796626,
1865
+ "grad_norm": 1.5076231956481934,
1866
+ "learning_rate": 9.98206153260408e-06,
1867
+ "loss": 0.1514,
1868
+ "step": 2450
1869
+ },
1870
+ {
1871
+ "epoch": 2.3055295220243672,
1872
+ "grad_norm": 1.185927152633667,
1873
+ "learning_rate": 9.981988314369811e-06,
1874
+ "loss": 0.1315,
1875
+ "step": 2460
1876
+ },
1877
+ {
1878
+ "epoch": 2.3149015932521086,
1879
+ "grad_norm": 1.1687299013137817,
1880
+ "learning_rate": 9.981915096135542e-06,
1881
+ "loss": 0.1611,
1882
+ "step": 2470
1883
+ },
1884
+ {
1885
+ "epoch": 2.32427366447985,
1886
+ "grad_norm": 1.205338716506958,
1887
+ "learning_rate": 9.981841877901274e-06,
1888
+ "loss": 0.1587,
1889
+ "step": 2480
1890
+ },
1891
+ {
1892
+ "epoch": 2.3336457357075915,
1893
+ "grad_norm": 1.1079684495925903,
1894
+ "learning_rate": 9.981768659667004e-06,
1895
+ "loss": 0.142,
1896
+ "step": 2490
1897
+ },
1898
+ {
1899
+ "epoch": 2.3430178069353325,
1900
+ "grad_norm": 1.1689645051956177,
1901
+ "learning_rate": 9.981695441432734e-06,
1902
+ "loss": 0.1449,
1903
+ "step": 2500
1904
+ },
1905
+ {
1906
+ "epoch": 2.3430178069353325,
1907
+ "eval_loss": 0.05785529315471649,
1908
+ "eval_pearson_cosine": 0.7640599012374878,
1909
+ "eval_pearson_dot": 0.6659318208694458,
1910
+ "eval_pearson_euclidean": 0.7584241628646851,
1911
+ "eval_pearson_manhattan": 0.7589800357818604,
1912
+ "eval_runtime": 27.3942,
1913
+ "eval_samples_per_second": 54.756,
1914
+ "eval_spearman_cosine": 0.7698402659202235,
1915
+ "eval_spearman_dot": 0.6637382071207051,
1916
+ "eval_spearman_euclidean": 0.765183939076614,
1917
+ "eval_spearman_manhattan": 0.7654494135153407,
1918
+ "eval_steps_per_second": 6.863,
1919
+ "step": 2500
1920
+ },
1921
+ {
1922
+ "epoch": 2.352389878163074,
1923
+ "grad_norm": 1.1410503387451172,
1924
+ "learning_rate": 9.981622223198467e-06,
1925
+ "loss": 0.1253,
1926
+ "step": 2510
1927
+ },
1928
+ {
1929
+ "epoch": 2.3617619493908153,
1930
+ "grad_norm": 1.6562408208847046,
1931
+ "learning_rate": 9.981549004964197e-06,
1932
+ "loss": 0.1363,
1933
+ "step": 2520
1934
+ },
1935
+ {
1936
+ "epoch": 2.3711340206185567,
1937
+ "grad_norm": 1.3503327369689941,
1938
+ "learning_rate": 9.981475786729928e-06,
1939
+ "loss": 0.141,
1940
+ "step": 2530
1941
+ },
1942
+ {
1943
+ "epoch": 2.380506091846298,
1944
+ "grad_norm": 1.4653688669204712,
1945
+ "learning_rate": 9.981402568495659e-06,
1946
+ "loss": 0.1452,
1947
+ "step": 2540
1948
+ },
1949
+ {
1950
+ "epoch": 2.3898781630740396,
1951
+ "grad_norm": 1.4135221242904663,
1952
+ "learning_rate": 9.98132935026139e-06,
1953
+ "loss": 0.1387,
1954
+ "step": 2550
1955
+ },
1956
+ {
1957
+ "epoch": 2.3992502343017805,
1958
+ "grad_norm": 1.1758474111557007,
1959
+ "learning_rate": 9.98125613202712e-06,
1960
+ "loss": 0.1402,
1961
+ "step": 2560
1962
+ },
1963
+ {
1964
+ "epoch": 2.408622305529522,
1965
+ "grad_norm": 1.6394227743148804,
1966
+ "learning_rate": 9.981182913792851e-06,
1967
+ "loss": 0.1434,
1968
+ "step": 2570
1969
+ },
1970
+ {
1971
+ "epoch": 2.4179943767572634,
1972
+ "grad_norm": 1.5223402976989746,
1973
+ "learning_rate": 9.981109695558584e-06,
1974
+ "loss": 0.1433,
1975
+ "step": 2580
1976
+ },
1977
+ {
1978
+ "epoch": 2.427366447985005,
1979
+ "grad_norm": 1.3722361326217651,
1980
+ "learning_rate": 9.981036477324314e-06,
1981
+ "loss": 0.145,
1982
+ "step": 2590
1983
+ },
1984
+ {
1985
+ "epoch": 2.436738519212746,
1986
+ "grad_norm": 1.4288251399993896,
1987
+ "learning_rate": 9.980963259090045e-06,
1988
+ "loss": 0.1419,
1989
+ "step": 2600
1990
+ },
1991
+ {
1992
+ "epoch": 2.446110590440487,
1993
+ "grad_norm": 1.3789891004562378,
1994
+ "learning_rate": 9.980890040855776e-06,
1995
+ "loss": 0.1428,
1996
+ "step": 2610
1997
+ },
1998
+ {
1999
+ "epoch": 2.4554826616682286,
2000
+ "grad_norm": 1.3833218812942505,
2001
+ "learning_rate": 9.980816822621507e-06,
2002
+ "loss": 0.163,
2003
+ "step": 2620
2004
+ },
2005
+ {
2006
+ "epoch": 2.46485473289597,
2007
+ "grad_norm": 1.2749391794204712,
2008
+ "learning_rate": 9.980743604387237e-06,
2009
+ "loss": 0.1457,
2010
+ "step": 2630
2011
+ },
2012
+ {
2013
+ "epoch": 2.4742268041237114,
2014
+ "grad_norm": 1.3677037954330444,
2015
+ "learning_rate": 9.980670386152968e-06,
2016
+ "loss": 0.1393,
2017
+ "step": 2640
2018
+ },
2019
+ {
2020
+ "epoch": 2.483598875351453,
2021
+ "grad_norm": 1.2386823892593384,
2022
+ "learning_rate": 9.980597167918699e-06,
2023
+ "loss": 0.1446,
2024
+ "step": 2650
2025
+ },
2026
+ {
2027
+ "epoch": 2.492970946579194,
2028
+ "grad_norm": 1.6553146839141846,
2029
+ "learning_rate": 9.98052394968443e-06,
2030
+ "loss": 0.1399,
2031
+ "step": 2660
2032
+ },
2033
+ {
2034
+ "epoch": 2.5023430178069352,
2035
+ "grad_norm": 1.2258574962615967,
2036
+ "learning_rate": 9.98045073145016e-06,
2037
+ "loss": 0.1557,
2038
+ "step": 2670
2039
+ },
2040
+ {
2041
+ "epoch": 2.5117150890346767,
2042
+ "grad_norm": 1.1680238246917725,
2043
+ "learning_rate": 9.980377513215891e-06,
2044
+ "loss": 0.14,
2045
+ "step": 2680
2046
+ },
2047
+ {
2048
+ "epoch": 2.521087160262418,
2049
+ "grad_norm": 1.3764533996582031,
2050
+ "learning_rate": 9.980304294981624e-06,
2051
+ "loss": 0.1429,
2052
+ "step": 2690
2053
+ },
2054
+ {
2055
+ "epoch": 2.530459231490159,
2056
+ "grad_norm": 1.1607757806777954,
2057
+ "learning_rate": 9.980231076747354e-06,
2058
+ "loss": 0.156,
2059
+ "step": 2700
2060
+ },
2061
+ {
2062
+ "epoch": 2.539831302717901,
2063
+ "grad_norm": 1.30258309841156,
2064
+ "learning_rate": 9.980157858513085e-06,
2065
+ "loss": 0.1334,
2066
+ "step": 2710
2067
+ },
2068
+ {
2069
+ "epoch": 2.549203373945642,
2070
+ "grad_norm": 1.3965803384780884,
2071
+ "learning_rate": 9.980084640278816e-06,
2072
+ "loss": 0.1532,
2073
+ "step": 2720
2074
+ },
2075
+ {
2076
+ "epoch": 2.5585754451733833,
2077
+ "grad_norm": 1.2492479085922241,
2078
+ "learning_rate": 9.980011422044547e-06,
2079
+ "loss": 0.1538,
2080
+ "step": 2730
2081
+ },
2082
+ {
2083
+ "epoch": 2.5679475164011247,
2084
+ "grad_norm": 1.5879229307174683,
2085
+ "learning_rate": 9.979938203810277e-06,
2086
+ "loss": 0.1393,
2087
+ "step": 2740
2088
+ },
2089
+ {
2090
+ "epoch": 2.5773195876288657,
2091
+ "grad_norm": 1.5499955415725708,
2092
+ "learning_rate": 9.979864985576008e-06,
2093
+ "loss": 0.1443,
2094
+ "step": 2750
2095
+ },
2096
+ {
2097
+ "epoch": 2.5773195876288657,
2098
+ "eval_loss": 0.059572458267211914,
2099
+ "eval_pearson_cosine": 0.7583234310150146,
2100
+ "eval_pearson_dot": 0.6585268378257751,
2101
+ "eval_pearson_euclidean": 0.7594324946403503,
2102
+ "eval_pearson_manhattan": 0.7599164843559265,
2103
+ "eval_runtime": 25.1198,
2104
+ "eval_samples_per_second": 59.714,
2105
+ "eval_spearman_cosine": 0.7658877891929784,
2106
+ "eval_spearman_dot": 0.6550703356470525,
2107
+ "eval_spearman_euclidean": 0.7651954936870381,
2108
+ "eval_spearman_manhattan": 0.7656066832066194,
2109
+ "eval_steps_per_second": 7.484,
2110
+ "step": 2750
2111
+ },
2112
+ {
2113
+ "epoch": 2.5866916588566076,
2114
+ "grad_norm": 1.1182575225830078,
2115
+ "learning_rate": 9.97979176734174e-06,
2116
+ "loss": 0.1449,
2117
+ "step": 2760
2118
+ },
2119
+ {
2120
+ "epoch": 2.5960637300843485,
2121
+ "grad_norm": 1.3228731155395508,
2122
+ "learning_rate": 9.979718549107471e-06,
2123
+ "loss": 0.1339,
2124
+ "step": 2770
2125
+ },
2126
+ {
2127
+ "epoch": 2.60543580131209,
2128
+ "grad_norm": 1.3763021230697632,
2129
+ "learning_rate": 9.9796453308732e-06,
2130
+ "loss": 0.1379,
2131
+ "step": 2780
2132
+ },
2133
+ {
2134
+ "epoch": 2.6148078725398314,
2135
+ "grad_norm": 1.6708637475967407,
2136
+ "learning_rate": 9.979572112638933e-06,
2137
+ "loss": 0.1491,
2138
+ "step": 2790
2139
+ },
2140
+ {
2141
+ "epoch": 2.624179943767573,
2142
+ "grad_norm": 1.0826717615127563,
2143
+ "learning_rate": 9.979498894404664e-06,
2144
+ "loss": 0.1447,
2145
+ "step": 2800
2146
+ },
2147
+ {
2148
+ "epoch": 2.633552014995314,
2149
+ "grad_norm": 1.4416155815124512,
2150
+ "learning_rate": 9.979425676170394e-06,
2151
+ "loss": 0.1398,
2152
+ "step": 2810
2153
+ },
2154
+ {
2155
+ "epoch": 2.642924086223055,
2156
+ "grad_norm": 1.3966304063796997,
2157
+ "learning_rate": 9.979352457936125e-06,
2158
+ "loss": 0.1332,
2159
+ "step": 2820
2160
+ },
2161
+ {
2162
+ "epoch": 2.6522961574507966,
2163
+ "grad_norm": 1.5255811214447021,
2164
+ "learning_rate": 9.979279239701856e-06,
2165
+ "loss": 0.1423,
2166
+ "step": 2830
2167
+ },
2168
+ {
2169
+ "epoch": 2.661668228678538,
2170
+ "grad_norm": 1.3866652250289917,
2171
+ "learning_rate": 9.979206021467587e-06,
2172
+ "loss": 0.1554,
2173
+ "step": 2840
2174
+ },
2175
+ {
2176
+ "epoch": 2.6710402999062794,
2177
+ "grad_norm": 1.3477802276611328,
2178
+ "learning_rate": 9.979132803233317e-06,
2179
+ "loss": 0.1547,
2180
+ "step": 2850
2181
+ },
2182
+ {
2183
+ "epoch": 2.680412371134021,
2184
+ "grad_norm": 1.540963053703308,
2185
+ "learning_rate": 9.97905958499905e-06,
2186
+ "loss": 0.1229,
2187
+ "step": 2860
2188
+ },
2189
+ {
2190
+ "epoch": 2.689784442361762,
2191
+ "grad_norm": 1.697350025177002,
2192
+ "learning_rate": 9.97898636676478e-06,
2193
+ "loss": 0.153,
2194
+ "step": 2870
2195
+ },
2196
+ {
2197
+ "epoch": 2.6991565135895033,
2198
+ "grad_norm": 1.6020257472991943,
2199
+ "learning_rate": 9.978913148530511e-06,
2200
+ "loss": 0.1334,
2201
+ "step": 2880
2202
+ },
2203
+ {
2204
+ "epoch": 2.7085285848172447,
2205
+ "grad_norm": 1.7637958526611328,
2206
+ "learning_rate": 9.978839930296242e-06,
2207
+ "loss": 0.1513,
2208
+ "step": 2890
2209
+ },
2210
+ {
2211
+ "epoch": 2.717900656044986,
2212
+ "grad_norm": 1.2917182445526123,
2213
+ "learning_rate": 9.978766712061973e-06,
2214
+ "loss": 0.1296,
2215
+ "step": 2900
2216
+ },
2217
+ {
2218
+ "epoch": 2.7272727272727275,
2219
+ "grad_norm": 1.42876136302948,
2220
+ "learning_rate": 9.978693493827704e-06,
2221
+ "loss": 0.1276,
2222
+ "step": 2910
2223
+ },
2224
+ {
2225
+ "epoch": 2.7366447985004685,
2226
+ "grad_norm": 1.340184211730957,
2227
+ "learning_rate": 9.978620275593434e-06,
2228
+ "loss": 0.164,
2229
+ "step": 2920
2230
+ },
2231
+ {
2232
+ "epoch": 2.74601686972821,
2233
+ "grad_norm": 1.1638396978378296,
2234
+ "learning_rate": 9.978547057359165e-06,
2235
+ "loss": 0.1372,
2236
+ "step": 2930
2237
+ },
2238
+ {
2239
+ "epoch": 2.7553889409559513,
2240
+ "grad_norm": 1.5060447454452515,
2241
+ "learning_rate": 9.978473839124897e-06,
2242
+ "loss": 0.1489,
2243
+ "step": 2940
2244
+ },
2245
+ {
2246
+ "epoch": 2.7647610121836927,
2247
+ "grad_norm": 1.3632638454437256,
2248
+ "learning_rate": 9.978400620890627e-06,
2249
+ "loss": 0.1242,
2250
+ "step": 2950
2251
+ },
2252
+ {
2253
+ "epoch": 2.774133083411434,
2254
+ "grad_norm": 1.6402980089187622,
2255
+ "learning_rate": 9.978327402656359e-06,
2256
+ "loss": 0.1395,
2257
+ "step": 2960
2258
+ },
2259
+ {
2260
+ "epoch": 2.783505154639175,
2261
+ "grad_norm": 1.8350452184677124,
2262
+ "learning_rate": 9.97825418442209e-06,
2263
+ "loss": 0.1501,
2264
+ "step": 2970
2265
+ },
2266
+ {
2267
+ "epoch": 2.7928772258669166,
2268
+ "grad_norm": 1.6517874002456665,
2269
+ "learning_rate": 9.97818096618782e-06,
2270
+ "loss": 0.1596,
2271
+ "step": 2980
2272
+ },
2273
+ {
2274
+ "epoch": 2.802249297094658,
2275
+ "grad_norm": 1.7441259622573853,
2276
+ "learning_rate": 9.978107747953551e-06,
2277
+ "loss": 0.1344,
2278
+ "step": 2990
2279
+ },
2280
+ {
2281
+ "epoch": 2.8116213683223994,
2282
+ "grad_norm": 1.4474517107009888,
2283
+ "learning_rate": 9.978034529719282e-06,
2284
+ "loss": 0.1363,
2285
+ "step": 3000
2286
+ },
2287
+ {
2288
+ "epoch": 2.8116213683223994,
2289
+ "eval_loss": 0.05750729516148567,
2290
+ "eval_pearson_cosine": 0.767126202583313,
2291
+ "eval_pearson_dot": 0.676889181137085,
2292
+ "eval_pearson_euclidean": 0.756407618522644,
2293
+ "eval_pearson_manhattan": 0.7570176124572754,
2294
+ "eval_runtime": 25.3699,
2295
+ "eval_samples_per_second": 59.125,
2296
+ "eval_spearman_cosine": 0.7727339030438767,
2297
+ "eval_spearman_dot": 0.6755843192398268,
2298
+ "eval_spearman_euclidean": 0.7624238185076594,
2299
+ "eval_spearman_manhattan": 0.7629469399526556,
2300
+ "eval_steps_per_second": 7.41,
2301
+ "step": 3000
2302
  }
2303
  ],
2304
  "logging_steps": 10,