MDaytek
/

chess-v2-head

@@ -6,20 +6,6 @@ tags:
 - chess-challenge
 license: mit
 ---
 # chess-v2-head
-Chess model submitted to the LLM Course Chess Challenge.
-## Submission Info
-- **Submitted by**: [MDaytek](https://huggingface.co/MDaytek)
-- **Parameters**: 999,936
-- **Epochs**: 4
-- **Training Samples**: 1,000,000
-## Architecture
-- **Type**: Custom Chess Transformer
-- **Vocab size**: 1344
-- **Layers**: 6
-- **Heads**: 8
-- **Embed Dim**: 128

 - chess-challenge
 license: mit
 ---
 # chess-v2-head
+Model submitted by MDaytek.
+**Parameters:** 999,936

model.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel, PretrainedConfig, GenerationMixin
+from transformers.modeling_outputs import CausalLMOutput
+class ChessConfig(PretrainedConfig):
+    model_type = "chess_transformer"
+    def __init__(self, vocab_size=1000, n_embd=128, n_layer=4, n_head=4, n_inner=512, n_ctx=256, **kwargs):
+        super().__init__(**kwargs)
+        self.vocab_size = vocab_size
+        self.n_embd = n_embd
+        self.n_layer = n_layer
+        self.n_head = n_head
+        self.n_inner = n_inner
+        self.n_ctx = n_ctx
+        # Alias pour compatibilité Hugging Face (Important pour .generate)
+        self.num_hidden_layers = n_layer
+        self.hidden_size = n_embd
+        self.num_attention_heads = n_head
+class Block(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config.n_embd)
+        self.attn = nn.MultiheadAttention(config.n_embd, config.n_head, batch_first=True)
+        self.ln2 = nn.LayerNorm(config.n_embd)
+        self.mlp = nn.Sequential(nn.Linear(config.n_embd, config.n_inner), nn.GELU(), nn.Linear(config.n_inner, config.n_embd))
+    def forward(self, x, mask=None):
+        attn_out, _ = self.attn(self.ln1(x), self.ln1(x), self.ln1(x), attn_mask=mask, need_weights=False)
+        return x + attn_out + self.mlp(self.ln2(x + attn_out))
+class ChessForCausalLM(PreTrainedModel, GenerationMixin):
+    config_class = ChessConfig
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        self.token_emb = nn.Embedding(config.vocab_size, config.n_embd)
+        self.pos_emb = nn.Embedding(config.n_ctx, config.n_embd)
+        self.blocks = nn.ModuleList([Block(config) for _ in range(config.n_layer)])
+        self.ln_f = nn.LayerNorm(config.n_embd)
+        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+        # Weight Tying (Partage de poids)
+        self.lm_head.weight = self.token_emb.weight
+        self.apply(self._init_weights)
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+    # Obligatoire pour .generate()
+    def prepare_inputs_for_generation(self, input_ids, **kwargs):
+        return {"input_ids": input_ids}
+    def forward(self, input_ids, attention_mask=None, labels=None, **kwargs):
+        B, T = input_ids.shape
+        x = self.token_emb(input_ids) + self.pos_emb(torch.arange(T, device=input_ids.device))
+        # Masque causal (Interdiction de voir le futur)
+        mask = torch.triu(torch.ones(T, T, device=input_ids.device) * float('-inf'), diagonal=1)
+        for block in self.blocks:
+            x = block(x, mask=mask)
+        logits = self.lm_head(self.ln_f(x))
+        loss = None
+        if labels is not None:
+            # Shift des labels (input [BOS, A, B] -> predit [A, B, C])
+            # Grâce à l'ajout de [BOS] par le tokenizer, cette formule est correcte.
+            loss = nn.CrossEntropyLoss(ignore_index=-100)(
+                logits[..., :-1, :].contiguous().view(-1, self.config.vocab_size),
+                labels[..., 1:].contiguous().view(-1)
+            )
+        return CausalLMOutput(loss=loss, logits=logits)

tokenizer.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from transformers import PreTrainedTokenizer
+import json
+import os
+class ChessTokenizer(PreTrainedTokenizer):
+    model_input_names = ["input_ids", "attention_mask"]
+    def __init__(self, vocab_file="vocab.json", **kwargs):
+        if os.path.exists(vocab_file):
+            with open(vocab_file, 'r') as f: data = json.load(f)
+            self.token_to_id = data["token_to_id"]
+            self.id_to_token = {int(k): v for k, v in data["id_to_token"].items()}
+        else:
+            raise ValueError(f"Vocabulary file {vocab_file} not found.")
+        self.unk_token = "[UNK]"
+        self.pad_token = "[PAD]"
+        self.bos_token = "[BOS]"
+        self.eos_token = "[EOS]"
+        self.bos_token_id = self.token_to_id.get("[BOS]")
+        self.eos_token_id = self.token_to_id.get("[EOS]")
+        self.unk_token_id = self.token_to_id.get("[UNK]")
+        super().__init__(pad_token="[PAD]", bos_token="[BOS]", eos_token="[EOS]", unk_token="[UNK]", **kwargs)
+    @property
+    def vocab_size(self): return len(self.token_to_id)
+    def get_vocab(self): return self.token_to_id
+    def _convert_token_to_id(self, token):
+        return self.token_to_id.get(token, self.unk_token_id)
+    def _convert_id_to_token(self, index):
+        return self.id_to_token.get(index, "[UNK]")
+    def __call__(self, text, **kwargs):
+        # Gestion correcte des listes de textes
+        if isinstance(text, list):
+            return {"input_ids": [self.__call__(t, **kwargs)["input_ids"] for t in text]}
+        moves = text.split()
+        ids = [self.token_to_id.get(m, self.unk_token_id) for m in moves]
+        # AJOUT AUTOMATIQUE DE [BOS] et [EOS] (Crucial pour la Loss)
+        if self.bos_token_id is not None:
+            ids = [self.bos_token_id] + ids
+        if self.eos_token_id is not None:
+            ids = ids + [self.eos_token_id]
+        max_len = kwargs.get('max_length', 256)
+        if len(ids) > max_len: ids = ids[:max_len]
+        return {"input_ids": ids}
+    def save_pretrained(self, save_directory, **kwargs):
+        with open(os.path.join(save_directory, "vocab.json"), "w") as f:
+            json.dump({"token_to_id": self.token_to_id, "id_to_token": self.id_to_token}, f)
+        with open(os.path.join(save_directory, "tokenizer_config.json"), "w") as f:
+            json.dump({"model_type": "chess_transformer"}, f)
+    @classmethod
+    def from_pretrained(cls, path, **kwargs):
+        vocab_path = os.path.join(path, "vocab.json")
+        if os.path.exists(vocab_path): return cls(vocab_file=vocab_path, **kwargs)
+        return cls(**kwargs)