MDaytek commited on
Commit
f669547
·
verified ·
1 Parent(s): f892ca9

Full submission with code by MDaytek

Browse files
Files changed (3) hide show
  1. README.md +2 -16
  2. model.py +80 -0
  3. tokenizer.py +68 -0
README.md CHANGED
@@ -6,20 +6,6 @@ tags:
6
  - chess-challenge
7
  license: mit
8
  ---
9
-
10
  # chess-v2-head
11
-
12
- Chess model submitted to the LLM Course Chess Challenge.
13
-
14
- ## Submission Info
15
- - **Submitted by**: [MDaytek](https://huggingface.co/MDaytek)
16
- - **Parameters**: 999,936
17
- - **Epochs**: 4
18
- - **Training Samples**: 1,000,000
19
-
20
- ## Architecture
21
- - **Type**: Custom Chess Transformer
22
- - **Vocab size**: 1344
23
- - **Layers**: 6
24
- - **Heads**: 8
25
- - **Embed Dim**: 128
 
6
  - chess-challenge
7
  license: mit
8
  ---
 
9
  # chess-v2-head
10
+ Model submitted by MDaytek.
11
+ **Parameters:** 999,936
 
 
 
 
 
 
 
 
 
 
 
 
 
model.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import torch.nn as nn
4
+ from transformers import PreTrainedModel, PretrainedConfig, GenerationMixin
5
+ from transformers.modeling_outputs import CausalLMOutput
6
+
7
+ class ChessConfig(PretrainedConfig):
8
+ model_type = "chess_transformer"
9
+ def __init__(self, vocab_size=1000, n_embd=128, n_layer=4, n_head=4, n_inner=512, n_ctx=256, **kwargs):
10
+ super().__init__(**kwargs)
11
+ self.vocab_size = vocab_size
12
+ self.n_embd = n_embd
13
+ self.n_layer = n_layer
14
+ self.n_head = n_head
15
+ self.n_inner = n_inner
16
+ self.n_ctx = n_ctx
17
+
18
+ # Alias pour compatibilité Hugging Face (Important pour .generate)
19
+ self.num_hidden_layers = n_layer
20
+ self.hidden_size = n_embd
21
+ self.num_attention_heads = n_head
22
+
23
+ class Block(nn.Module):
24
+ def __init__(self, config):
25
+ super().__init__()
26
+ self.ln1 = nn.LayerNorm(config.n_embd)
27
+ self.attn = nn.MultiheadAttention(config.n_embd, config.n_head, batch_first=True)
28
+ self.ln2 = nn.LayerNorm(config.n_embd)
29
+ self.mlp = nn.Sequential(nn.Linear(config.n_embd, config.n_inner), nn.GELU(), nn.Linear(config.n_inner, config.n_embd))
30
+
31
+ def forward(self, x, mask=None):
32
+ attn_out, _ = self.attn(self.ln1(x), self.ln1(x), self.ln1(x), attn_mask=mask, need_weights=False)
33
+ return x + attn_out + self.mlp(self.ln2(x + attn_out))
34
+
35
+ class ChessForCausalLM(PreTrainedModel, GenerationMixin):
36
+ config_class = ChessConfig
37
+
38
+ def __init__(self, config):
39
+ super().__init__(config)
40
+ self.config = config
41
+ self.token_emb = nn.Embedding(config.vocab_size, config.n_embd)
42
+ self.pos_emb = nn.Embedding(config.n_ctx, config.n_embd)
43
+ self.blocks = nn.ModuleList([Block(config) for _ in range(config.n_layer)])
44
+ self.ln_f = nn.LayerNorm(config.n_embd)
45
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
46
+
47
+ # Weight Tying (Partage de poids)
48
+ self.lm_head.weight = self.token_emb.weight
49
+ self.apply(self._init_weights)
50
+
51
+ def _init_weights(self, module):
52
+ if isinstance(module, (nn.Linear, nn.Embedding)):
53
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
54
+
55
+ # Obligatoire pour .generate()
56
+ def prepare_inputs_for_generation(self, input_ids, **kwargs):
57
+ return {"input_ids": input_ids}
58
+
59
+ def forward(self, input_ids, attention_mask=None, labels=None, **kwargs):
60
+ B, T = input_ids.shape
61
+ x = self.token_emb(input_ids) + self.pos_emb(torch.arange(T, device=input_ids.device))
62
+
63
+ # Masque causal (Interdiction de voir le futur)
64
+ mask = torch.triu(torch.ones(T, T, device=input_ids.device) * float('-inf'), diagonal=1)
65
+
66
+ for block in self.blocks:
67
+ x = block(x, mask=mask)
68
+
69
+ logits = self.lm_head(self.ln_f(x))
70
+
71
+ loss = None
72
+ if labels is not None:
73
+ # Shift des labels (input [BOS, A, B] -> predit [A, B, C])
74
+ # Grâce à l'ajout de [BOS] par le tokenizer, cette formule est correcte.
75
+ loss = nn.CrossEntropyLoss(ignore_index=-100)(
76
+ logits[..., :-1, :].contiguous().view(-1, self.config.vocab_size),
77
+ labels[..., 1:].contiguous().view(-1)
78
+ )
79
+
80
+ return CausalLMOutput(loss=loss, logits=logits)
tokenizer.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import PreTrainedTokenizer
3
+ import json
4
+ import os
5
+
6
+ class ChessTokenizer(PreTrainedTokenizer):
7
+ model_input_names = ["input_ids", "attention_mask"]
8
+
9
+ def __init__(self, vocab_file="vocab.json", **kwargs):
10
+ if os.path.exists(vocab_file):
11
+ with open(vocab_file, 'r') as f: data = json.load(f)
12
+ self.token_to_id = data["token_to_id"]
13
+ self.id_to_token = {int(k): v for k, v in data["id_to_token"].items()}
14
+ else:
15
+ raise ValueError(f"Vocabulary file {vocab_file} not found.")
16
+
17
+ self.unk_token = "[UNK]"
18
+ self.pad_token = "[PAD]"
19
+ self.bos_token = "[BOS]"
20
+ self.eos_token = "[EOS]"
21
+
22
+ self.bos_token_id = self.token_to_id.get("[BOS]")
23
+ self.eos_token_id = self.token_to_id.get("[EOS]")
24
+ self.unk_token_id = self.token_to_id.get("[UNK]")
25
+
26
+ super().__init__(pad_token="[PAD]", bos_token="[BOS]", eos_token="[EOS]", unk_token="[UNK]", **kwargs)
27
+
28
+ @property
29
+ def vocab_size(self): return len(self.token_to_id)
30
+
31
+ def get_vocab(self): return self.token_to_id
32
+
33
+ def _convert_token_to_id(self, token):
34
+ return self.token_to_id.get(token, self.unk_token_id)
35
+
36
+ def _convert_id_to_token(self, index):
37
+ return self.id_to_token.get(index, "[UNK]")
38
+
39
+ def __call__(self, text, **kwargs):
40
+ # Gestion correcte des listes de textes
41
+ if isinstance(text, list):
42
+ return {"input_ids": [self.__call__(t, **kwargs)["input_ids"] for t in text]}
43
+
44
+ moves = text.split()
45
+ ids = [self.token_to_id.get(m, self.unk_token_id) for m in moves]
46
+
47
+ # AJOUT AUTOMATIQUE DE [BOS] et [EOS] (Crucial pour la Loss)
48
+ if self.bos_token_id is not None:
49
+ ids = [self.bos_token_id] + ids
50
+ if self.eos_token_id is not None:
51
+ ids = ids + [self.eos_token_id]
52
+
53
+ max_len = kwargs.get('max_length', 256)
54
+ if len(ids) > max_len: ids = ids[:max_len]
55
+
56
+ return {"input_ids": ids}
57
+
58
+ def save_pretrained(self, save_directory, **kwargs):
59
+ with open(os.path.join(save_directory, "vocab.json"), "w") as f:
60
+ json.dump({"token_to_id": self.token_to_id, "id_to_token": self.id_to_token}, f)
61
+ with open(os.path.join(save_directory, "tokenizer_config.json"), "w") as f:
62
+ json.dump({"model_type": "chess_transformer"}, f)
63
+
64
+ @classmethod
65
+ def from_pretrained(cls, path, **kwargs):
66
+ vocab_path = os.path.join(path, "vocab.json")
67
+ if os.path.exists(vocab_path): return cls(vocab_file=vocab_path, **kwargs)
68
+ return cls(**kwargs)