Faaz commited on
Commit
02eef51
·
1 Parent(s): 24b5fb1

Fix extra_special_tokens: list to dict for transformers 4.55

Browse files
data/tokenizer/mindi_tokenizer/tokenizer_config.json CHANGED
@@ -5,34 +5,34 @@
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|im_end|>",
7
  "errors": "replace",
8
- "extra_special_tokens": [
9
- "<|mindi_start|>",
10
- "<|mindi_end|>",
11
- "<|code_start|>",
12
- "<|code_end|>",
13
- "<|vision_start|>",
14
- "<|vision_end|>",
15
- "<|critique_start|>",
16
- "<|critique_end|>",
17
- "<|suggest_start|>",
18
- "<|suggest_end|>",
19
- "<|think_start|>",
20
- "<|think_end|>",
21
- "<|file_start|>",
22
- "<|file_end|>",
23
- "<|search_start|>",
24
- "<|search_end|>",
25
- "<|sandbox_start|>",
26
- "<|sandbox_end|>",
27
- "<|error_start|>",
28
- "<|error_end|>",
29
- "<|fix_start|>",
30
- "<|fix_end|>"
31
- ],
32
  "is_local": true,
33
  "model_max_length": 32768,
34
  "pad_token": "<|endoftext|>",
35
  "split_special_tokens": false,
36
  "tokenizer_class": "Qwen2Tokenizer",
37
  "unk_token": null
38
- }
 
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|im_end|>",
7
  "errors": "replace",
8
+ "extra_special_tokens": {
9
+ "<|mindi_start|>": "<|mindi_start|>",
10
+ "<|mindi_end|>": "<|mindi_end|>",
11
+ "<|code_start|>": "<|code_start|>",
12
+ "<|code_end|>": "<|code_end|>",
13
+ "<|vision_start|>": "<|vision_start|>",
14
+ "<|vision_end|>": "<|vision_end|>",
15
+ "<|critique_start|>": "<|critique_start|>",
16
+ "<|critique_end|>": "<|critique_end|>",
17
+ "<|suggest_start|>": "<|suggest_start|>",
18
+ "<|suggest_end|>": "<|suggest_end|>",
19
+ "<|think_start|>": "<|think_start|>",
20
+ "<|think_end|>": "<|think_end|>",
21
+ "<|file_start|>": "<|file_start|>",
22
+ "<|file_end|>": "<|file_end|>",
23
+ "<|search_start|>": "<|search_start|>",
24
+ "<|search_end|>": "<|search_end|>",
25
+ "<|sandbox_start|>": "<|sandbox_start|>",
26
+ "<|sandbox_end|>": "<|sandbox_end|>",
27
+ "<|error_start|>": "<|error_start|>",
28
+ "<|error_end|>": "<|error_end|>",
29
+ "<|fix_start|>": "<|fix_start|>",
30
+ "<|fix_end|>": "<|fix_end|>"
31
+ },
32
  "is_local": true,
33
  "model_max_length": 32768,
34
  "pad_token": "<|endoftext|>",
35
  "split_special_tokens": false,
36
  "tokenizer_class": "Qwen2Tokenizer",
37
  "unk_token": null
38
+ }