Full submission with code by MDaytek
Browse files- README.md +2 -16
- model.py +80 -0
- tokenizer.py +68 -0
README.md
CHANGED
|
@@ -6,20 +6,6 @@ tags:
|
|
| 6 |
- chess-challenge
|
| 7 |
license: mit
|
| 8 |
---
|
| 9 |
-
|
| 10 |
# chess-v2-head
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
## Submission Info
|
| 15 |
-
- **Submitted by**: [MDaytek](https://huggingface.co/MDaytek)
|
| 16 |
-
- **Parameters**: 999,936
|
| 17 |
-
- **Epochs**: 4
|
| 18 |
-
- **Training Samples**: 1,000,000
|
| 19 |
-
|
| 20 |
-
## Architecture
|
| 21 |
-
- **Type**: Custom Chess Transformer
|
| 22 |
-
- **Vocab size**: 1344
|
| 23 |
-
- **Layers**: 6
|
| 24 |
-
- **Heads**: 8
|
| 25 |
-
- **Embed Dim**: 128
|
|
|
|
| 6 |
- chess-challenge
|
| 7 |
license: mit
|
| 8 |
---
|
|
|
|
| 9 |
# chess-v2-head
|
| 10 |
+
Model submitted by MDaytek.
|
| 11 |
+
**Parameters:** 999,936
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import torch
|
| 3 |
+
import torch.nn as nn
|
| 4 |
+
from transformers import PreTrainedModel, PretrainedConfig, GenerationMixin
|
| 5 |
+
from transformers.modeling_outputs import CausalLMOutput
|
| 6 |
+
|
| 7 |
+
class ChessConfig(PretrainedConfig):
|
| 8 |
+
model_type = "chess_transformer"
|
| 9 |
+
def __init__(self, vocab_size=1000, n_embd=128, n_layer=4, n_head=4, n_inner=512, n_ctx=256, **kwargs):
|
| 10 |
+
super().__init__(**kwargs)
|
| 11 |
+
self.vocab_size = vocab_size
|
| 12 |
+
self.n_embd = n_embd
|
| 13 |
+
self.n_layer = n_layer
|
| 14 |
+
self.n_head = n_head
|
| 15 |
+
self.n_inner = n_inner
|
| 16 |
+
self.n_ctx = n_ctx
|
| 17 |
+
|
| 18 |
+
# Alias pour compatibilité Hugging Face (Important pour .generate)
|
| 19 |
+
self.num_hidden_layers = n_layer
|
| 20 |
+
self.hidden_size = n_embd
|
| 21 |
+
self.num_attention_heads = n_head
|
| 22 |
+
|
| 23 |
+
class Block(nn.Module):
|
| 24 |
+
def __init__(self, config):
|
| 25 |
+
super().__init__()
|
| 26 |
+
self.ln1 = nn.LayerNorm(config.n_embd)
|
| 27 |
+
self.attn = nn.MultiheadAttention(config.n_embd, config.n_head, batch_first=True)
|
| 28 |
+
self.ln2 = nn.LayerNorm(config.n_embd)
|
| 29 |
+
self.mlp = nn.Sequential(nn.Linear(config.n_embd, config.n_inner), nn.GELU(), nn.Linear(config.n_inner, config.n_embd))
|
| 30 |
+
|
| 31 |
+
def forward(self, x, mask=None):
|
| 32 |
+
attn_out, _ = self.attn(self.ln1(x), self.ln1(x), self.ln1(x), attn_mask=mask, need_weights=False)
|
| 33 |
+
return x + attn_out + self.mlp(self.ln2(x + attn_out))
|
| 34 |
+
|
| 35 |
+
class ChessForCausalLM(PreTrainedModel, GenerationMixin):
|
| 36 |
+
config_class = ChessConfig
|
| 37 |
+
|
| 38 |
+
def __init__(self, config):
|
| 39 |
+
super().__init__(config)
|
| 40 |
+
self.config = config
|
| 41 |
+
self.token_emb = nn.Embedding(config.vocab_size, config.n_embd)
|
| 42 |
+
self.pos_emb = nn.Embedding(config.n_ctx, config.n_embd)
|
| 43 |
+
self.blocks = nn.ModuleList([Block(config) for _ in range(config.n_layer)])
|
| 44 |
+
self.ln_f = nn.LayerNorm(config.n_embd)
|
| 45 |
+
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
|
| 46 |
+
|
| 47 |
+
# Weight Tying (Partage de poids)
|
| 48 |
+
self.lm_head.weight = self.token_emb.weight
|
| 49 |
+
self.apply(self._init_weights)
|
| 50 |
+
|
| 51 |
+
def _init_weights(self, module):
|
| 52 |
+
if isinstance(module, (nn.Linear, nn.Embedding)):
|
| 53 |
+
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
|
| 54 |
+
|
| 55 |
+
# Obligatoire pour .generate()
|
| 56 |
+
def prepare_inputs_for_generation(self, input_ids, **kwargs):
|
| 57 |
+
return {"input_ids": input_ids}
|
| 58 |
+
|
| 59 |
+
def forward(self, input_ids, attention_mask=None, labels=None, **kwargs):
|
| 60 |
+
B, T = input_ids.shape
|
| 61 |
+
x = self.token_emb(input_ids) + self.pos_emb(torch.arange(T, device=input_ids.device))
|
| 62 |
+
|
| 63 |
+
# Masque causal (Interdiction de voir le futur)
|
| 64 |
+
mask = torch.triu(torch.ones(T, T, device=input_ids.device) * float('-inf'), diagonal=1)
|
| 65 |
+
|
| 66 |
+
for block in self.blocks:
|
| 67 |
+
x = block(x, mask=mask)
|
| 68 |
+
|
| 69 |
+
logits = self.lm_head(self.ln_f(x))
|
| 70 |
+
|
| 71 |
+
loss = None
|
| 72 |
+
if labels is not None:
|
| 73 |
+
# Shift des labels (input [BOS, A, B] -> predit [A, B, C])
|
| 74 |
+
# Grâce à l'ajout de [BOS] par le tokenizer, cette formule est correcte.
|
| 75 |
+
loss = nn.CrossEntropyLoss(ignore_index=-100)(
|
| 76 |
+
logits[..., :-1, :].contiguous().view(-1, self.config.vocab_size),
|
| 77 |
+
labels[..., 1:].contiguous().view(-1)
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
return CausalLMOutput(loss=loss, logits=logits)
|
tokenizer.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from transformers import PreTrainedTokenizer
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
class ChessTokenizer(PreTrainedTokenizer):
|
| 7 |
+
model_input_names = ["input_ids", "attention_mask"]
|
| 8 |
+
|
| 9 |
+
def __init__(self, vocab_file="vocab.json", **kwargs):
|
| 10 |
+
if os.path.exists(vocab_file):
|
| 11 |
+
with open(vocab_file, 'r') as f: data = json.load(f)
|
| 12 |
+
self.token_to_id = data["token_to_id"]
|
| 13 |
+
self.id_to_token = {int(k): v for k, v in data["id_to_token"].items()}
|
| 14 |
+
else:
|
| 15 |
+
raise ValueError(f"Vocabulary file {vocab_file} not found.")
|
| 16 |
+
|
| 17 |
+
self.unk_token = "[UNK]"
|
| 18 |
+
self.pad_token = "[PAD]"
|
| 19 |
+
self.bos_token = "[BOS]"
|
| 20 |
+
self.eos_token = "[EOS]"
|
| 21 |
+
|
| 22 |
+
self.bos_token_id = self.token_to_id.get("[BOS]")
|
| 23 |
+
self.eos_token_id = self.token_to_id.get("[EOS]")
|
| 24 |
+
self.unk_token_id = self.token_to_id.get("[UNK]")
|
| 25 |
+
|
| 26 |
+
super().__init__(pad_token="[PAD]", bos_token="[BOS]", eos_token="[EOS]", unk_token="[UNK]", **kwargs)
|
| 27 |
+
|
| 28 |
+
@property
|
| 29 |
+
def vocab_size(self): return len(self.token_to_id)
|
| 30 |
+
|
| 31 |
+
def get_vocab(self): return self.token_to_id
|
| 32 |
+
|
| 33 |
+
def _convert_token_to_id(self, token):
|
| 34 |
+
return self.token_to_id.get(token, self.unk_token_id)
|
| 35 |
+
|
| 36 |
+
def _convert_id_to_token(self, index):
|
| 37 |
+
return self.id_to_token.get(index, "[UNK]")
|
| 38 |
+
|
| 39 |
+
def __call__(self, text, **kwargs):
|
| 40 |
+
# Gestion correcte des listes de textes
|
| 41 |
+
if isinstance(text, list):
|
| 42 |
+
return {"input_ids": [self.__call__(t, **kwargs)["input_ids"] for t in text]}
|
| 43 |
+
|
| 44 |
+
moves = text.split()
|
| 45 |
+
ids = [self.token_to_id.get(m, self.unk_token_id) for m in moves]
|
| 46 |
+
|
| 47 |
+
# AJOUT AUTOMATIQUE DE [BOS] et [EOS] (Crucial pour la Loss)
|
| 48 |
+
if self.bos_token_id is not None:
|
| 49 |
+
ids = [self.bos_token_id] + ids
|
| 50 |
+
if self.eos_token_id is not None:
|
| 51 |
+
ids = ids + [self.eos_token_id]
|
| 52 |
+
|
| 53 |
+
max_len = kwargs.get('max_length', 256)
|
| 54 |
+
if len(ids) > max_len: ids = ids[:max_len]
|
| 55 |
+
|
| 56 |
+
return {"input_ids": ids}
|
| 57 |
+
|
| 58 |
+
def save_pretrained(self, save_directory, **kwargs):
|
| 59 |
+
with open(os.path.join(save_directory, "vocab.json"), "w") as f:
|
| 60 |
+
json.dump({"token_to_id": self.token_to_id, "id_to_token": self.id_to_token}, f)
|
| 61 |
+
with open(os.path.join(save_directory, "tokenizer_config.json"), "w") as f:
|
| 62 |
+
json.dump({"model_type": "chess_transformer"}, f)
|
| 63 |
+
|
| 64 |
+
@classmethod
|
| 65 |
+
def from_pretrained(cls, path, **kwargs):
|
| 66 |
+
vocab_path = os.path.join(path, "vocab.json")
|
| 67 |
+
if os.path.exists(vocab_path): return cls(vocab_file=vocab_path, **kwargs)
|
| 68 |
+
return cls(**kwargs)
|