Chess proven 1M
Browse files- README.md +11 -5
- config.json +3 -3
- model.py +1 -1
- model.safetensors +2 -2
- vocab.json +1 -201
README.md
CHANGED
|
@@ -5,11 +5,17 @@ tags:
|
|
| 5 |
license: mit
|
| 6 |
---
|
| 7 |
|
| 8 |
-
# Chess GPT -
|
| 9 |
|
| 10 |
-
Params:
|
| 11 |
-
Vocab:
|
| 12 |
-
Dataset: 1M samples x
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
Target: 60-70% legal rate
|
|
|
|
| 5 |
license: mit
|
| 6 |
---
|
| 7 |
|
| 8 |
+
# Chess GPT - Prof's Architecture
|
| 9 |
|
| 10 |
+
Params: 998,656
|
| 11 |
+
Vocab: 1604 (TOP_K=2000)
|
| 12 |
+
Dataset: 1M samples x 5 epochs
|
| 13 |
+
|
| 14 |
+
Config:
|
| 15 |
+
- n_embd: 128
|
| 16 |
+
- n_layer: 4
|
| 17 |
+
- n_head: 4
|
| 18 |
+
- LR: 5e-4
|
| 19 |
+
- UNK rate: 25.7%
|
| 20 |
|
| 21 |
Target: 60-70% legal rate
|
config.json
CHANGED
|
@@ -15,10 +15,10 @@
|
|
| 15 |
"n_ctx": 256,
|
| 16 |
"n_embd": 128,
|
| 17 |
"n_head": 4,
|
| 18 |
-
"n_inner":
|
| 19 |
-
"n_layer":
|
| 20 |
"pad_token_id": 0,
|
| 21 |
"tie_weights": true,
|
| 22 |
"transformers_version": "4.57.6",
|
| 23 |
-
"vocab_size":
|
| 24 |
}
|
|
|
|
| 15 |
"n_ctx": 256,
|
| 16 |
"n_embd": 128,
|
| 17 |
"n_head": 4,
|
| 18 |
+
"n_inner": 512,
|
| 19 |
+
"n_layer": 4,
|
| 20 |
"pad_token_id": 0,
|
| 21 |
"tie_weights": true,
|
| 22 |
"transformers_version": "4.57.6",
|
| 23 |
+
"vocab_size": 1604
|
| 24 |
}
|
model.py
CHANGED
|
@@ -26,7 +26,7 @@ def apply_rope(q, k):
|
|
| 26 |
|
| 27 |
class ChessConfig(PretrainedConfig):
|
| 28 |
model_type = "chess_transformer"
|
| 29 |
-
def __init__(self, vocab_size=
|
| 30 |
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
| 31 |
self.vocab_size = vocab_size
|
| 32 |
self.n_embd = n_embd
|
|
|
|
| 26 |
|
| 27 |
class ChessConfig(PretrainedConfig):
|
| 28 |
model_type = "chess_transformer"
|
| 29 |
+
def __init__(self, vocab_size=1604, n_embd=128, n_layer=4, n_head=4, n_ctx=256, n_inner=None, dropout=0.1, layer_norm_epsilon=1e-05, tie_weights=True, pad_token_id=0, bos_token_id=1, eos_token_id=2, **kwargs):
|
| 30 |
super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
|
| 31 |
self.vocab_size = vocab_size
|
| 32 |
self.n_embd = n_embd
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55dfdcc3502c61d227c19892983221a8a00ae4c084f1ce855076fda72325c02b
|
| 3 |
+
size 3998960
|
vocab.json
CHANGED
|
@@ -1602,205 +1602,5 @@
|
|
| 1602 |
"WNc3d1(x)": 1600,
|
| 1603 |
"BNe4g5(x)": 1601,
|
| 1604 |
"BBe4g6": 1602,
|
| 1605 |
-
"WNh3f2": 1603
|
| 1606 |
-
"BRa2b2": 1604,
|
| 1607 |
-
"BQh4f6": 1605,
|
| 1608 |
-
"WQe2c4(x)": 1606,
|
| 1609 |
-
"BRf8f4": 1607,
|
| 1610 |
-
"BQg5f6": 1608,
|
| 1611 |
-
"WKe4e3": 1609,
|
| 1612 |
-
"BRg8e8": 1610,
|
| 1613 |
-
"WQd3e4(x)": 1611,
|
| 1614 |
-
"BBf5d3": 1612,
|
| 1615 |
-
"BRe8e3": 1613,
|
| 1616 |
-
"WNg4e5": 1614,
|
| 1617 |
-
"WKb3b4": 1615,
|
| 1618 |
-
"BBh3g4": 1616,
|
| 1619 |
-
"WQe1g3": 1617,
|
| 1620 |
-
"BNd4b3(x)": 1618,
|
| 1621 |
-
"BBe5g7": 1619,
|
| 1622 |
-
"BKd5e5": 1620,
|
| 1623 |
-
"BBc6f3(x)": 1621,
|
| 1624 |
-
"BKh5g4": 1622,
|
| 1625 |
-
"BNc4b2(x)": 1623,
|
| 1626 |
-
"BBb4d2(x)": 1624,
|
| 1627 |
-
"BRe8a8": 1625,
|
| 1628 |
-
"WQg3g4": 1626,
|
| 1629 |
-
"WKc4d5": 1627,
|
| 1630 |
-
"WBb2d4": 1628,
|
| 1631 |
-
"BBc6d5(x)": 1629,
|
| 1632 |
-
"WRa1a6(x)": 1630,
|
| 1633 |
-
"WBc1d2(x)": 1631,
|
| 1634 |
-
"WBb3e6(x)": 1632,
|
| 1635 |
-
"BQe7e6(x)": 1633,
|
| 1636 |
-
"WQf3h5": 1634,
|
| 1637 |
-
"BNc4e5": 1635,
|
| 1638 |
-
"WBc4f7(x)": 1636,
|
| 1639 |
-
"BRd2b2(x)": 1637,
|
| 1640 |
-
"BRh8b8": 1638,
|
| 1641 |
-
"WKd4c3": 1639,
|
| 1642 |
-
"WNb5d6(+)": 1640,
|
| 1643 |
-
"BRf8a8(x)": 1641,
|
| 1644 |
-
"WKb3c2": 1642,
|
| 1645 |
-
"WBf3c6(x)": 1643,
|
| 1646 |
-
"WBd5b7(x)": 1644,
|
| 1647 |
-
"WKd4d5": 1645,
|
| 1648 |
-
"WQc2d3(x)": 1646,
|
| 1649 |
-
"BBg7h6(x)": 1647,
|
| 1650 |
-
"WBb5d7(x)": 1648,
|
| 1651 |
-
"BQe6g6": 1649,
|
| 1652 |
-
"BBg4h3": 1650,
|
| 1653 |
-
"WKg4h3": 1651,
|
| 1654 |
-
"WKc3b2": 1652,
|
| 1655 |
-
"WQd2f4(x)": 1653,
|
| 1656 |
-
"WKg5f6": 1654,
|
| 1657 |
-
"WBd3a6(x)": 1655,
|
| 1658 |
-
"BRd8e8(x)": 1656,
|
| 1659 |
-
"BNe8d6": 1657,
|
| 1660 |
-
"WQf3d5(x)": 1658,
|
| 1661 |
-
"WRa7b7": 1659,
|
| 1662 |
-
"WNb5c7(x)": 1660,
|
| 1663 |
-
"WRf1e1(x)": 1661,
|
| 1664 |
-
"WBd2c1": 1662,
|
| 1665 |
-
"WKd4e4": 1663,
|
| 1666 |
-
"WQb7a7(x)": 1664,
|
| 1667 |
-
"WKb2c2": 1665,
|
| 1668 |
-
"WRe2e3": 1666,
|
| 1669 |
-
"BBc6d5": 1667,
|
| 1670 |
-
"BPe7d6(x)": 1668,
|
| 1671 |
-
"WQd3c3": 1669,
|
| 1672 |
-
"WQb3d1": 1670,
|
| 1673 |
-
"BNc6d8(x)": 1671,
|
| 1674 |
-
"WQg3h4": 1672,
|
| 1675 |
-
"WBf1a6(x)": 1673,
|
| 1676 |
-
"BRc2a2(x)": 1674,
|
| 1677 |
-
"WNd3c5": 1675,
|
| 1678 |
-
"WBd2f4(x)": 1676,
|
| 1679 |
-
"WBc2e4(x)": 1677,
|
| 1680 |
-
"BRa2a1(+)": 1678,
|
| 1681 |
-
"BKb5c4": 1679,
|
| 1682 |
-
"WKe5f6": 1680,
|
| 1683 |
-
"BPd2d1(Q)": 1681,
|
| 1684 |
-
"WBb3d5": 1682,
|
| 1685 |
-
"WNe4f6(x)": 1683,
|
| 1686 |
-
"BQd6c6": 1684,
|
| 1687 |
-
"WKb3a4": 1685,
|
| 1688 |
-
"WRd7b7(x)": 1686,
|
| 1689 |
-
"BKe5e6": 1687,
|
| 1690 |
-
"WRc1c6": 1688,
|
| 1691 |
-
"BQd6c7": 1689,
|
| 1692 |
-
"BBg6e4": 1690,
|
| 1693 |
-
"WNe3c4": 1691,
|
| 1694 |
-
"WQf3e4": 1692,
|
| 1695 |
-
"WKc4d3": 1693,
|
| 1696 |
-
"WPd7d8(Q)": 1694,
|
| 1697 |
-
"BRd8d2(+)": 1695,
|
| 1698 |
-
"WBe3c1": 1696,
|
| 1699 |
-
"WRf2f1": 1697,
|
| 1700 |
-
"BQd7f7": 1698,
|
| 1701 |
-
"WKf5g6": 1699,
|
| 1702 |
-
"WNf4e2": 1700,
|
| 1703 |
-
"WBd5c6(x)": 1701,
|
| 1704 |
-
"BKc7d8": 1702,
|
| 1705 |
-
"BNd6c4": 1703,
|
| 1706 |
-
"WNc5e4": 1704,
|
| 1707 |
-
"WBc3b4": 1705,
|
| 1708 |
-
"BQe7b4": 1706,
|
| 1709 |
-
"WQe3e2": 1707,
|
| 1710 |
-
"BBd6b8": 1708,
|
| 1711 |
-
"WQe2d3(x)": 1709,
|
| 1712 |
-
"WNf5e7(+)": 1710,
|
| 1713 |
-
"BBd4b6": 1711,
|
| 1714 |
-
"WKd5c6": 1712,
|
| 1715 |
-
"WRh1h5(x)": 1713,
|
| 1716 |
-
"BQe5f6": 1714,
|
| 1717 |
-
"WQb3c3": 1715,
|
| 1718 |
-
"BKf4e3": 1716,
|
| 1719 |
-
"WNe2c1": 1717,
|
| 1720 |
-
"WKf5e6": 1718,
|
| 1721 |
-
"BQf6b2(x)": 1719,
|
| 1722 |
-
"WQb3a4": 1720,
|
| 1723 |
-
"BBf8b4(x)": 1721,
|
| 1724 |
-
"BKc6c7": 1722,
|
| 1725 |
-
"BQd7d8": 1723,
|
| 1726 |
-
"WQd1b1": 1724,
|
| 1727 |
-
"WRe1e7(+)": 1725,
|
| 1728 |
-
"WKg5g6": 1726,
|
| 1729 |
-
"WRh1d1(x)": 1727,
|
| 1730 |
-
"WNg5h7(x)": 1728,
|
| 1731 |
-
"BBe6g4(x)": 1729,
|
| 1732 |
-
"BRa2a3": 1730,
|
| 1733 |
-
"WNf5e3": 1731,
|
| 1734 |
-
"BRa8a7(x)": 1732,
|
| 1735 |
-
"WPg4g5(+)": 1733,
|
| 1736 |
-
"BRd7c7": 1734,
|
| 1737 |
-
"WPe6f7(x+)": 1735,
|
| 1738 |
-
"BKg5g6": 1736,
|
| 1739 |
-
"BPf5f4(+)": 1737,
|
| 1740 |
-
"BQb4b2(x)": 1738,
|
| 1741 |
-
"BPd7e6(x)": 1739,
|
| 1742 |
-
"BKf4f3": 1740,
|
| 1743 |
-
"BNf4e6": 1741,
|
| 1744 |
-
"BNc4b6": 1742,
|
| 1745 |
-
"BQc7b8": 1743,
|
| 1746 |
-
"BKc4b3": 1744,
|
| 1747 |
-
"WQc2d1": 1745,
|
| 1748 |
-
"BRe8h8": 1746,
|
| 1749 |
-
"BNf4g6": 1747,
|
| 1750 |
-
"BNe6c5": 1748,
|
| 1751 |
-
"BBd4e5": 1749,
|
| 1752 |
-
"WQd2c3(x)": 1750,
|
| 1753 |
-
"WQe4f3": 1751,
|
| 1754 |
-
"WNd6e4": 1752,
|
| 1755 |
-
"WPf7f8(Q)": 1753,
|
| 1756 |
-
"WNe1d3": 1754,
|
| 1757 |
-
"BNe5f3(x)": 1755,
|
| 1758 |
-
"WRd2c2": 1756,
|
| 1759 |
-
"WBe5g7(x)": 1757,
|
| 1760 |
-
"WRa1a4(x)": 1758,
|
| 1761 |
-
"WKb3c3": 1759,
|
| 1762 |
-
"BRa8a5(x)": 1760,
|
| 1763 |
-
"BQe7e5": 1761,
|
| 1764 |
-
"BBh5e2(x)": 1762,
|
| 1765 |
-
"BNf6g4(+)": 1763,
|
| 1766 |
-
"WQd3b3": 1764,
|
| 1767 |
-
"WBe4c6(x)": 1765,
|
| 1768 |
-
"WRe2f2": 1766,
|
| 1769 |
-
"BKg5h6": 1767,
|
| 1770 |
-
"BKc5b6": 1768,
|
| 1771 |
-
"BBd4b2(x)": 1769,
|
| 1772 |
-
"WNc4d6(x)": 1770,
|
| 1773 |
-
"WKf5f6": 1771,
|
| 1774 |
-
"BBf8a3(x)": 1772,
|
| 1775 |
-
"BNc2d4": 1773,
|
| 1776 |
-
"BBa6c4(x)": 1774,
|
| 1777 |
-
"WRa1a7": 1775,
|
| 1778 |
-
"WQd4d2": 1776,
|
| 1779 |
-
"BRf8f2": 1777,
|
| 1780 |
-
"BNc7e6": 1778,
|
| 1781 |
-
"WPe2f3(x)": 1779,
|
| 1782 |
-
"WBg2e4": 1780,
|
| 1783 |
-
"BRc8c1(x+)": 1781,
|
| 1784 |
-
"BRa8a4(x)": 1782,
|
| 1785 |
-
"WNe5c4(x)": 1783,
|
| 1786 |
-
"BQd5e6(+)": 1784,
|
| 1787 |
-
"WNd2b1": 1785,
|
| 1788 |
-
"WQd2d3(x)": 1786,
|
| 1789 |
-
"BRh8d8(x)": 1787,
|
| 1790 |
-
"WRc7a7(x)": 1788,
|
| 1791 |
-
"WQe2e5(x)": 1789,
|
| 1792 |
-
"WRf1f5": 1790,
|
| 1793 |
-
"BBf5h7": 1791,
|
| 1794 |
-
"WRc1c4": 1792,
|
| 1795 |
-
"BKb7c8": 1793,
|
| 1796 |
-
"BNd8e6": 1794,
|
| 1797 |
-
"WRc1c8(x+)": 1795,
|
| 1798 |
-
"BNg5e4": 1796,
|
| 1799 |
-
"BQf6d6": 1797,
|
| 1800 |
-
"BRf7g7": 1798,
|
| 1801 |
-
"BNc4e3(x)": 1799,
|
| 1802 |
-
"WKc2c1": 1800,
|
| 1803 |
-
"WNf4e6": 1801,
|
| 1804 |
-
"BKe4f3": 1802,
|
| 1805 |
-
"BKg7f6(x)": 1803
|
| 1806 |
}
|
|
|
|
| 1602 |
"WNc3d1(x)": 1600,
|
| 1603 |
"BNe4g5(x)": 1601,
|
| 1604 |
"BBe4g6": 1602,
|
| 1605 |
+
"WNh3f2": 1603
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1606 |
}
|