Willy Vo commited on
Commit
5ca11c7
·
1 Parent(s): 27d0b30

Changement 7

Browse files
Files changed (2) hide show
  1. tokenizer.py +7 -0
  2. tokenizer_config.json +8 -43
tokenizer.py CHANGED
@@ -86,6 +86,13 @@ class ChessTokenizer(PreTrainedTokenizer):
86
  self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
87
 
88
  # Call parent init AFTER setting up vocab
 
 
 
 
 
 
 
89
  super().__init__(
90
  pad_token=self._pad_token,
91
  bos_token=self._bos_token,
 
86
  self._ids_to_tokens = {v: k for k, v in self._vocab.items()}
87
 
88
  # Call parent init AFTER setting up vocab
89
+ kwargs.pop("added_tokens_decoder", None)
90
+ kwargs.pop("added_tokens_encoder", None)
91
+ kwargs.pop("special_tokens_map", None)
92
+ kwargs.pop("tokenizer_class", None)
93
+ kwargs.pop("auto_map", None)
94
+
95
+ extra = kwargs.pop("extra_special_tokens", None)
96
  super().__init__(
97
  pad_token=self._pad_token,
98
  bos_token=self._bos_token,
tokenizer_config.json CHANGED
@@ -1,47 +1,12 @@
1
  {
2
- "added_tokens_decoder": {
3
- "0": {
4
- "content": "[PAD]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
- "content": "[BOS]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "2": {
20
- "content": "[EOS]",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "3": {
28
- "content": "[UNK]",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- }
35
- },
36
  "bos_token": "[BOS]",
37
- "clean_up_tokenization_spaces": false,
38
  "eos_token": "[EOS]",
39
- "extra_special_tokens": [],
40
- "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
- "tokenizer_class": "ChessTokenizer",
43
- "auto_map": {
44
- "AutoTokenizer": "tokenizer.ChessTokenizer"
45
- },
46
- "unk_token": "[UNK]"
47
- }
 
1
  {
2
+ "tokenizer_class": "ChessTokenizer",
3
+ "auto_map": { "AutoTokenizer": "tokenizer.ChessTokenizer" },
4
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "bos_token": "[BOS]",
 
6
  "eos_token": "[EOS]",
 
 
7
  "pad_token": "[PAD]",
8
+ "unk_token": "[UNK]",
9
+
10
+ "clean_up_tokenization_spaces": false,
11
+ "model_max_length": 192
12
+ }