MDaytek commited on
Commit
24d6a5b
·
verified ·
1 Parent(s): 00bdaa9

Chess proven 1M

Browse files
Files changed (5) hide show
  1. README.md +11 -5
  2. config.json +3 -3
  3. model.py +1 -1
  4. model.safetensors +2 -2
  5. vocab.json +1 -201
README.md CHANGED
@@ -5,11 +5,17 @@ tags:
5
  license: mit
6
  ---
7
 
8
- # Chess GPT - Proven Architecture
9
 
10
- Params: 975,808
11
- Vocab: 1804 (TOP_K=2000)
12
- Dataset: 1M samples x 10 epochs
13
- UNK rate: 24.2%
 
 
 
 
 
 
14
 
15
  Target: 60-70% legal rate
 
5
  license: mit
6
  ---
7
 
8
+ # Chess GPT - Prof's Architecture
9
 
10
+ Params: 998,656
11
+ Vocab: 1604 (TOP_K=2000)
12
+ Dataset: 1M samples x 5 epochs
13
+
14
+ Config:
15
+ - n_embd: 128
16
+ - n_layer: 4
17
+ - n_head: 4
18
+ - LR: 5e-4
19
+ - UNK rate: 25.7%
20
 
21
  Target: 60-70% legal rate
config.json CHANGED
@@ -15,10 +15,10 @@
15
  "n_ctx": 256,
16
  "n_embd": 128,
17
  "n_head": 4,
18
- "n_inner": 320,
19
- "n_layer": 5,
20
  "pad_token_id": 0,
21
  "tie_weights": true,
22
  "transformers_version": "4.57.6",
23
- "vocab_size": 1804
24
  }
 
15
  "n_ctx": 256,
16
  "n_embd": 128,
17
  "n_head": 4,
18
+ "n_inner": 512,
19
+ "n_layer": 4,
20
  "pad_token_id": 0,
21
  "tie_weights": true,
22
  "transformers_version": "4.57.6",
23
+ "vocab_size": 1604
24
  }
model.py CHANGED
@@ -26,7 +26,7 @@ def apply_rope(q, k):
26
 
27
  class ChessConfig(PretrainedConfig):
28
  model_type = "chess_transformer"
29
- def __init__(self, vocab_size=1804, n_embd=128, n_layer=6, n_head=8, n_ctx=256, n_inner=None, dropout=0.1, layer_norm_epsilon=1e-05, tie_weights=True, pad_token_id=0, bos_token_id=1, eos_token_id=2, **kwargs):
30
  super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
31
  self.vocab_size = vocab_size
32
  self.n_embd = n_embd
 
26
 
27
  class ChessConfig(PretrainedConfig):
28
  model_type = "chess_transformer"
29
+ def __init__(self, vocab_size=1604, n_embd=128, n_layer=4, n_head=4, n_ctx=256, n_inner=None, dropout=0.1, layer_norm_epsilon=1e-05, tie_weights=True, pad_token_id=0, bos_token_id=1, eos_token_id=2, **kwargs):
30
  super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
31
  self.vocab_size = vocab_size
32
  self.n_embd = n_embd
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6e3cbb9b7f597f471f675808f06654046cde64cf9d14196d6226640eb5dc8c6
3
- size 3908576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55dfdcc3502c61d227c19892983221a8a00ae4c084f1ce855076fda72325c02b
3
+ size 3998960
vocab.json CHANGED
@@ -1602,205 +1602,5 @@
1602
  "WNc3d1(x)": 1600,
1603
  "BNe4g5(x)": 1601,
1604
  "BBe4g6": 1602,
1605
- "WNh3f2": 1603,
1606
- "BRa2b2": 1604,
1607
- "BQh4f6": 1605,
1608
- "WQe2c4(x)": 1606,
1609
- "BRf8f4": 1607,
1610
- "BQg5f6": 1608,
1611
- "WKe4e3": 1609,
1612
- "BRg8e8": 1610,
1613
- "WQd3e4(x)": 1611,
1614
- "BBf5d3": 1612,
1615
- "BRe8e3": 1613,
1616
- "WNg4e5": 1614,
1617
- "WKb3b4": 1615,
1618
- "BBh3g4": 1616,
1619
- "WQe1g3": 1617,
1620
- "BNd4b3(x)": 1618,
1621
- "BBe5g7": 1619,
1622
- "BKd5e5": 1620,
1623
- "BBc6f3(x)": 1621,
1624
- "BKh5g4": 1622,
1625
- "BNc4b2(x)": 1623,
1626
- "BBb4d2(x)": 1624,
1627
- "BRe8a8": 1625,
1628
- "WQg3g4": 1626,
1629
- "WKc4d5": 1627,
1630
- "WBb2d4": 1628,
1631
- "BBc6d5(x)": 1629,
1632
- "WRa1a6(x)": 1630,
1633
- "WBc1d2(x)": 1631,
1634
- "WBb3e6(x)": 1632,
1635
- "BQe7e6(x)": 1633,
1636
- "WQf3h5": 1634,
1637
- "BNc4e5": 1635,
1638
- "WBc4f7(x)": 1636,
1639
- "BRd2b2(x)": 1637,
1640
- "BRh8b8": 1638,
1641
- "WKd4c3": 1639,
1642
- "WNb5d6(+)": 1640,
1643
- "BRf8a8(x)": 1641,
1644
- "WKb3c2": 1642,
1645
- "WBf3c6(x)": 1643,
1646
- "WBd5b7(x)": 1644,
1647
- "WKd4d5": 1645,
1648
- "WQc2d3(x)": 1646,
1649
- "BBg7h6(x)": 1647,
1650
- "WBb5d7(x)": 1648,
1651
- "BQe6g6": 1649,
1652
- "BBg4h3": 1650,
1653
- "WKg4h3": 1651,
1654
- "WKc3b2": 1652,
1655
- "WQd2f4(x)": 1653,
1656
- "WKg5f6": 1654,
1657
- "WBd3a6(x)": 1655,
1658
- "BRd8e8(x)": 1656,
1659
- "BNe8d6": 1657,
1660
- "WQf3d5(x)": 1658,
1661
- "WRa7b7": 1659,
1662
- "WNb5c7(x)": 1660,
1663
- "WRf1e1(x)": 1661,
1664
- "WBd2c1": 1662,
1665
- "WKd4e4": 1663,
1666
- "WQb7a7(x)": 1664,
1667
- "WKb2c2": 1665,
1668
- "WRe2e3": 1666,
1669
- "BBc6d5": 1667,
1670
- "BPe7d6(x)": 1668,
1671
- "WQd3c3": 1669,
1672
- "WQb3d1": 1670,
1673
- "BNc6d8(x)": 1671,
1674
- "WQg3h4": 1672,
1675
- "WBf1a6(x)": 1673,
1676
- "BRc2a2(x)": 1674,
1677
- "WNd3c5": 1675,
1678
- "WBd2f4(x)": 1676,
1679
- "WBc2e4(x)": 1677,
1680
- "BRa2a1(+)": 1678,
1681
- "BKb5c4": 1679,
1682
- "WKe5f6": 1680,
1683
- "BPd2d1(Q)": 1681,
1684
- "WBb3d5": 1682,
1685
- "WNe4f6(x)": 1683,
1686
- "BQd6c6": 1684,
1687
- "WKb3a4": 1685,
1688
- "WRd7b7(x)": 1686,
1689
- "BKe5e6": 1687,
1690
- "WRc1c6": 1688,
1691
- "BQd6c7": 1689,
1692
- "BBg6e4": 1690,
1693
- "WNe3c4": 1691,
1694
- "WQf3e4": 1692,
1695
- "WKc4d3": 1693,
1696
- "WPd7d8(Q)": 1694,
1697
- "BRd8d2(+)": 1695,
1698
- "WBe3c1": 1696,
1699
- "WRf2f1": 1697,
1700
- "BQd7f7": 1698,
1701
- "WKf5g6": 1699,
1702
- "WNf4e2": 1700,
1703
- "WBd5c6(x)": 1701,
1704
- "BKc7d8": 1702,
1705
- "BNd6c4": 1703,
1706
- "WNc5e4": 1704,
1707
- "WBc3b4": 1705,
1708
- "BQe7b4": 1706,
1709
- "WQe3e2": 1707,
1710
- "BBd6b8": 1708,
1711
- "WQe2d3(x)": 1709,
1712
- "WNf5e7(+)": 1710,
1713
- "BBd4b6": 1711,
1714
- "WKd5c6": 1712,
1715
- "WRh1h5(x)": 1713,
1716
- "BQe5f6": 1714,
1717
- "WQb3c3": 1715,
1718
- "BKf4e3": 1716,
1719
- "WNe2c1": 1717,
1720
- "WKf5e6": 1718,
1721
- "BQf6b2(x)": 1719,
1722
- "WQb3a4": 1720,
1723
- "BBf8b4(x)": 1721,
1724
- "BKc6c7": 1722,
1725
- "BQd7d8": 1723,
1726
- "WQd1b1": 1724,
1727
- "WRe1e7(+)": 1725,
1728
- "WKg5g6": 1726,
1729
- "WRh1d1(x)": 1727,
1730
- "WNg5h7(x)": 1728,
1731
- "BBe6g4(x)": 1729,
1732
- "BRa2a3": 1730,
1733
- "WNf5e3": 1731,
1734
- "BRa8a7(x)": 1732,
1735
- "WPg4g5(+)": 1733,
1736
- "BRd7c7": 1734,
1737
- "WPe6f7(x+)": 1735,
1738
- "BKg5g6": 1736,
1739
- "BPf5f4(+)": 1737,
1740
- "BQb4b2(x)": 1738,
1741
- "BPd7e6(x)": 1739,
1742
- "BKf4f3": 1740,
1743
- "BNf4e6": 1741,
1744
- "BNc4b6": 1742,
1745
- "BQc7b8": 1743,
1746
- "BKc4b3": 1744,
1747
- "WQc2d1": 1745,
1748
- "BRe8h8": 1746,
1749
- "BNf4g6": 1747,
1750
- "BNe6c5": 1748,
1751
- "BBd4e5": 1749,
1752
- "WQd2c3(x)": 1750,
1753
- "WQe4f3": 1751,
1754
- "WNd6e4": 1752,
1755
- "WPf7f8(Q)": 1753,
1756
- "WNe1d3": 1754,
1757
- "BNe5f3(x)": 1755,
1758
- "WRd2c2": 1756,
1759
- "WBe5g7(x)": 1757,
1760
- "WRa1a4(x)": 1758,
1761
- "WKb3c3": 1759,
1762
- "BRa8a5(x)": 1760,
1763
- "BQe7e5": 1761,
1764
- "BBh5e2(x)": 1762,
1765
- "BNf6g4(+)": 1763,
1766
- "WQd3b3": 1764,
1767
- "WBe4c6(x)": 1765,
1768
- "WRe2f2": 1766,
1769
- "BKg5h6": 1767,
1770
- "BKc5b6": 1768,
1771
- "BBd4b2(x)": 1769,
1772
- "WNc4d6(x)": 1770,
1773
- "WKf5f6": 1771,
1774
- "BBf8a3(x)": 1772,
1775
- "BNc2d4": 1773,
1776
- "BBa6c4(x)": 1774,
1777
- "WRa1a7": 1775,
1778
- "WQd4d2": 1776,
1779
- "BRf8f2": 1777,
1780
- "BNc7e6": 1778,
1781
- "WPe2f3(x)": 1779,
1782
- "WBg2e4": 1780,
1783
- "BRc8c1(x+)": 1781,
1784
- "BRa8a4(x)": 1782,
1785
- "WNe5c4(x)": 1783,
1786
- "BQd5e6(+)": 1784,
1787
- "WNd2b1": 1785,
1788
- "WQd2d3(x)": 1786,
1789
- "BRh8d8(x)": 1787,
1790
- "WRc7a7(x)": 1788,
1791
- "WQe2e5(x)": 1789,
1792
- "WRf1f5": 1790,
1793
- "BBf5h7": 1791,
1794
- "WRc1c4": 1792,
1795
- "BKb7c8": 1793,
1796
- "BNd8e6": 1794,
1797
- "WRc1c8(x+)": 1795,
1798
- "BNg5e4": 1796,
1799
- "BQf6d6": 1797,
1800
- "BRf7g7": 1798,
1801
- "BNc4e3(x)": 1799,
1802
- "WKc2c1": 1800,
1803
- "WNf4e6": 1801,
1804
- "BKe4f3": 1802,
1805
- "BKg7f6(x)": 1803
1806
  }
 
1602
  "WNc3d1(x)": 1600,
1603
  "BNe4g5(x)": 1601,
1604
  "BBe4g6": 1602,
1605
+ "WNh3f2": 1603
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1606
  }