agroudiev commited on
Commit
a000336
·
verified ·
1 Parent(s): 01a54ef

Chess Challenge submission by agroudiev

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. config.json +7 -6
  3. model.safetensors +2 -2
  4. vocab.json +77 -130
README.md CHANGED
@@ -14,13 +14,13 @@ Chess model submitted to the LLM Course Chess Challenge.
14
  ## Submission Info
15
 
16
  - **Submitted by**: [agroudiev](https://huggingface.co/agroudiev)
17
- - **Parameters**: 990,004
18
  - **Organization**: LLM-course
19
 
20
  ## Model Details
21
 
22
  - **Architecture**: Chess Transformer (GPT-style)
23
- - **Vocab size**: 134
24
  - **Embedding dim**: 128
25
  - **Layers**: 6
26
- - **Heads**: 4
 
14
  ## Submission Info
15
 
16
  - **Submitted by**: [agroudiev](https://huggingface.co/agroudiev)
17
+ - **Parameters**: 998,600
18
  - **Organization**: LLM-course
19
 
20
  ## Model Details
21
 
22
  - **Architecture**: Chess Transformer (GPT-style)
23
+ - **Vocab size**: 81
24
  - **Embedding dim**: 128
25
  - **Layers**: 6
26
+ - **Heads**: 8
config.json CHANGED
@@ -8,13 +8,14 @@
8
  "eos_token_id": 2,
9
  "layer_norm_epsilon": 1e-05,
10
  "model_type": "chess_transformer",
11
- "n_ctx": 256,
12
  "n_embd": 128,
13
- "n_head": 4,
14
- "n_inner": 350,
15
  "n_layer": 6,
16
  "pad_token_id": 0,
17
- "tie_weights": true,
18
- "transformers_version": "4.57.4",
19
- "vocab_size": 134
 
20
  }
 
8
  "eos_token_id": 2,
9
  "layer_norm_epsilon": 1e-05,
10
  "model_type": "chess_transformer",
11
+ "n_ctx": 512,
12
  "n_embd": 128,
13
+ "n_head": 8,
14
+ "n_inner": 332,
15
  "n_layer": 6,
16
  "pad_token_id": 0,
17
+ "tie_weights": false,
18
+ "tie_word_embeddings": false,
19
+ "transformers_version": "4.57.3",
20
+ "vocab_size": 81
21
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24c1decf60ef24cda76b5d1e328823253e4d38bdc08dfae6ad569aed71570aa5
3
- size 3966464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03e33bbe23635a3cb196ff34f7f4b4c1dc559d415f609fcb4ff6c5175543699f
3
+ size 4000928
vocab.json CHANGED
@@ -3,134 +3,81 @@
3
  "[BOS]": 1,
4
  "[EOS]": 2,
5
  "[UNK]": 3,
6
- "BBc8b7": 4,
7
- "BBc8d7": 5,
8
- "BBc8e6": 6,
9
- "BBc8f5": 7,
10
- "BBc8g4": 8,
11
- "BBf8b4": 9,
12
- "BBf8c5": 10,
13
- "BBf8d6": 11,
14
- "BBf8e7": 12,
15
- "BBf8g7": 13,
16
- "BBg4f3(x)": 14,
17
- "BKe8c8(O)": 15,
18
- "BKe8g8(o)": 16,
19
- "BKg8g7": 17,
20
- "BKg8h8": 18,
21
- "BNb8c6": 19,
22
- "BNb8d7": 20,
23
- "BNc6d4": 21,
24
- "BNf6d5": 22,
25
- "BNf6d5(x)": 23,
26
- "BNf6e4": 24,
27
- "BNf6e4(x)": 25,
28
- "BNg8e7": 26,
29
- "BNg8f6": 27,
30
- "BPa5a4": 28,
31
- "BPa6a5": 29,
32
- "BPa7a5": 30,
33
- "BPa7a6": 31,
34
- "BPb5b4": 32,
35
- "BPb7b5": 33,
36
- "BPb7b6": 34,
37
- "BPb7c6(x)": 35,
38
- "BPc5c4": 36,
39
- "BPc5d4(x)": 37,
40
- "BPc6c5": 38,
41
- "BPc6d5(x)": 39,
42
- "BPc7c5": 40,
43
- "BPc7c6": 41,
44
- "BPd5d4": 42,
45
- "BPd5e4(x)": 43,
46
- "BPd6d5": 44,
47
- "BPd6e5(x)": 45,
48
- "BPd7d5": 46,
49
- "BPd7d6": 47,
50
- "BPe5d4(x)": 48,
51
- "BPe5e4": 49,
52
- "BPe6d5(x)": 50,
53
- "BPe6e5": 51,
54
- "BPe7e5": 52,
55
- "BPe7e6": 53,
56
- "BPf7f5": 54,
57
- "BPf7f6": 55,
58
- "BPg6g5": 56,
59
- "BPg7g5": 57,
60
- "BPg7g6": 58,
61
- "BPh7h5": 59,
62
- "BPh7h6": 60,
63
- "BQd8b6": 61,
64
- "BQd8c7": 62,
65
- "BQd8d7": 63,
66
- "BQd8e7": 64,
67
- "BRa8b8": 65,
68
- "BRa8c8": 66,
69
- "BRa8d8": 67,
70
- "BRf8e8": 68,
71
- "WBc1b2": 69,
72
- "WBc1d2": 70,
73
- "WBc1e3": 71,
74
- "WBc1f4": 72,
75
- "WBc1g5": 73,
76
- "WBf1b5": 74,
77
- "WBf1c4": 75,
78
- "WBf1d3": 76,
79
- "WBf1e2": 77,
80
- "WBf1g2": 78,
81
- "WKe1c1(O)": 79,
82
- "WKe1g1(o)": 80,
83
- "WKg1g2": 81,
84
- "WKg1h1": 82,
85
- "WKg1h2": 83,
86
- "WNb1c3": 84,
87
- "WNb1d2": 85,
88
- "WNc3d5": 86,
89
- "WNf3d4(x)": 87,
90
- "WNf3e5": 88,
91
- "WNf3e5(x)": 89,
92
- "WNf3g5": 90,
93
- "WNg1e2": 91,
94
- "WNg1f3": 92,
95
- "WPa2a3": 93,
96
- "WPa2a4": 94,
97
- "WPa4a5": 95,
98
- "WPb2b3": 96,
99
- "WPb2b4": 97,
100
- "WPb2c3(x)": 98,
101
- "WPb4b5": 99,
102
- "WPc2c3": 100,
103
- "WPc2c4": 101,
104
- "WPc3c4": 102,
105
- "WPc3d4(x)": 103,
106
- "WPc4c5": 104,
107
- "WPc4d5(x)": 105,
108
- "WPd2d3": 106,
109
- "WPd2d4": 107,
110
- "WPd3d4": 108,
111
- "WPd4d5": 109,
112
- "WPd4e5(x)": 110,
113
- "WPe2e3": 111,
114
- "WPe2e4": 112,
115
- "WPe3e4": 113,
116
- "WPe4d5(x)": 114,
117
- "WPe4e5": 115,
118
- "WPf2f3": 116,
119
- "WPf2f4": 117,
120
- "WPf4f5": 118,
121
- "WPg2g3": 119,
122
- "WPg2g4": 120,
123
- "WPg4g5": 121,
124
- "WPh2h3": 122,
125
- "WPh2h4": 123,
126
- "WPh3h4": 124,
127
- "WPh4h5": 125,
128
- "WQd1c2": 126,
129
- "WQd1d2": 127,
130
- "WQd1e2": 128,
131
- "WRa1b1": 129,
132
- "WRa1c1": 130,
133
- "WRa1d1": 131,
134
- "WRa1e1": 132,
135
- "WRf1e1": 133
136
  }
 
3
  "[BOS]": 1,
4
  "[EOS]": 2,
5
  "[UNK]": 3,
6
+ "W": 4,
7
+ "B": 9,
8
+ "K": 6,
9
+ "Q": 7,
10
+ "R": 8,
11
+ "N": 10,
12
+ "P": 11,
13
+ "a1": 12,
14
+ "a2": 13,
15
+ "a3": 14,
16
+ "a4": 15,
17
+ "a5": 16,
18
+ "a6": 17,
19
+ "a7": 18,
20
+ "a8": 19,
21
+ "b1": 20,
22
+ "b2": 21,
23
+ "b3": 22,
24
+ "b4": 23,
25
+ "b5": 24,
26
+ "b6": 25,
27
+ "b7": 26,
28
+ "b8": 27,
29
+ "c1": 28,
30
+ "c2": 29,
31
+ "c3": 30,
32
+ "c4": 31,
33
+ "c5": 32,
34
+ "c6": 33,
35
+ "c7": 34,
36
+ "c8": 35,
37
+ "d1": 36,
38
+ "d2": 37,
39
+ "d3": 38,
40
+ "d4": 39,
41
+ "d5": 40,
42
+ "d6": 41,
43
+ "d7": 42,
44
+ "d8": 43,
45
+ "e1": 44,
46
+ "e2": 45,
47
+ "e3": 46,
48
+ "e4": 47,
49
+ "e5": 48,
50
+ "e6": 49,
51
+ "e7": 50,
52
+ "e8": 51,
53
+ "f1": 52,
54
+ "f2": 53,
55
+ "f3": 54,
56
+ "f4": 55,
57
+ "f5": 56,
58
+ "f6": 57,
59
+ "f7": 58,
60
+ "f8": 59,
61
+ "g1": 60,
62
+ "g2": 61,
63
+ "g3": 62,
64
+ "g4": 63,
65
+ "g5": 64,
66
+ "g6": 65,
67
+ "g7": 66,
68
+ "g8": 67,
69
+ "h1": 68,
70
+ "h2": 69,
71
+ "h3": 70,
72
+ "h4": 71,
73
+ "h5": 72,
74
+ "h6": 73,
75
+ "h7": 74,
76
+ "h8": 75,
77
+ "(x)": 76,
78
+ "(+)": 77,
79
+ "(+*)": 78,
80
+ "(o)": 79,
81
+ "(O)": 80,
82
+ " ": 81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  }