JingzeShi commited on
Commit
5808c2a
·
verified ·
1 Parent(s): efafa07

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -22,6 +22,7 @@
22
  "<|fim_prefix|>": 128026,
23
  "<|fim_suffix|>": 128028,
24
  "<|im_end|>": 128003,
 
25
  "<|im_start|>": 128002,
26
  "<|image_pad|>": 128024,
27
  "<|object_ref_end|>": 128016,
@@ -162,7 +163,6 @@
162
  "<|reserved_special_token_220|>": 128252,
163
  "<|reserved_special_token_221|>": 128253,
164
  "<|reserved_special_token_222|>": 128254,
165
- "<|reserved_special_token_223|>": 128255,
166
  "<|reserved_special_token_22|>": 128054,
167
  "<|reserved_special_token_23|>": 128055,
168
  "<|reserved_special_token_24|>": 128056,
 
22
  "<|fim_prefix|>": 128026,
23
  "<|fim_suffix|>": 128028,
24
  "<|im_end|>": 128003,
25
+ "<|im_mask|>": 128255,
26
  "<|im_start|>": 128002,
27
  "<|image_pad|>": 128024,
28
  "<|object_ref_end|>": 128016,
 
163
  "<|reserved_special_token_220|>": 128252,
164
  "<|reserved_special_token_221|>": 128253,
165
  "<|reserved_special_token_222|>": 128254,
 
166
  "<|reserved_special_token_22|>": 128054,
167
  "<|reserved_special_token_23|>": 128055,
168
  "<|reserved_special_token_24|>": 128056,
special_tokens_map.json CHANGED
@@ -14,7 +14,7 @@
14
  "single_word": false
15
  },
16
  "mask_token": {
17
- "content": "<|end_of_text|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
14
  "single_word": false
15
  },
16
  "mask_token": {
17
+ "content": "<|im_mask|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf32d7719f1732eef0b7d9752ec708a65efe14c1d8901b9243fa0dc8af325ece
3
- size 17209390
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6994d9adc5f6c1cec4a446e3a3f98ffa7efeefd4a76c057accf09971fbf68420
3
+ size 17209371
tokenizer_config.json CHANGED
@@ -2043,7 +2043,7 @@
2043
  "special": true
2044
  },
2045
  "128255": {
2046
- "content": "<|reserved_special_token_223|>",
2047
  "lstrip": false,
2048
  "normalized": false,
2049
  "rstrip": false,
@@ -2056,10 +2056,11 @@
2056
  "eos_token": "<|im_end|>",
2057
  "errors": "replace",
2058
  "extra_special_tokens": {},
2059
- "mask_token": "<|end_of_text|>",
2060
  "model_input_names": [
2061
  "input_ids",
2062
- "attention_mask"
 
2063
  ],
2064
  "model_max_length": 131072,
2065
  "pad_token": "<|end_of_text|>",
 
2043
  "special": true
2044
  },
2045
  "128255": {
2046
+ "content": "<|im_mask|>",
2047
  "lstrip": false,
2048
  "normalized": false,
2049
  "rstrip": false,
 
2056
  "eos_token": "<|im_end|>",
2057
  "errors": "replace",
2058
  "extra_special_tokens": {},
2059
+ "mask_token": "<|im_mask|>",
2060
  "model_input_names": [
2061
  "input_ids",
2062
+ "attention_mask",
2063
+ "position_ids"
2064
  ],
2065
  "model_max_length": 131072,
2066
  "pad_token": "<|end_of_text|>",