AlonKellner-Jounce commited on
Commit
d46aef9
·
verified ·
1 Parent(s): b7fc258

special tokens without brackets

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fb0d6686f6933f72ed6dc1838b9a6084d735e50e9f8554615ddf5488a7c6624
3
  size 142920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:619fe89397d93fd0a9d08c5ac59c8a0f3235948849bad255c9f4a570f15860b7
3
  size 142920
special_tokens_map.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "bos_token": "[CLS]",
3
- "cls_token": "[CLS]",
4
- "eos_token": "[SEP]",
5
- "mask_token": "[MASK]",
6
- "pad_token": "[PAD]",
7
- "sep_token": "[SEP]",
8
- "unk_token": "[UNK]"
9
  }
 
1
  {
2
+ "bos_token": "CLS",
3
+ "cls_token": "CLS",
4
+ "eos_token": "SEP",
5
+ "mask_token": "MASK",
6
+ "pad_token": "PAD",
7
+ "sep_token": "SEP",
8
+ "unk_token": "UNK"
9
  }
tokenizer.json CHANGED
@@ -5,7 +5,7 @@
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "content": "[UNK]",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 1,
17
- "content": "[CLS]",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "id": 2,
26
- "content": "[SEP]",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -32,7 +32,7 @@
32
  },
33
  {
34
  "id": 3,
35
- "content": "[PAD]",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "id": 4,
44
- "content": "[MASK]",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
@@ -80,7 +80,7 @@
80
  "single": [
81
  {
82
  "SpecialToken": {
83
- "id": "[CLS]",
84
  "type_id": 0
85
  }
86
  },
@@ -92,7 +92,7 @@
92
  },
93
  {
94
  "SpecialToken": {
95
- "id": "[SEP]",
96
  "type_id": 0
97
  }
98
  }
@@ -100,7 +100,7 @@
100
  "pair": [
101
  {
102
  "SpecialToken": {
103
- "id": "[CLS]",
104
  "type_id": 0
105
  }
106
  },
@@ -112,7 +112,7 @@
112
  },
113
  {
114
  "SpecialToken": {
115
- "id": "[SEP]",
116
  "type_id": 0
117
  }
118
  },
@@ -124,28 +124,28 @@
124
  },
125
  {
126
  "SpecialToken": {
127
- "id": "[SEP]",
128
  "type_id": 1
129
  }
130
  }
131
  ],
132
  "special_tokens": {
133
- "[CLS]": {
134
- "id": "[CLS]",
135
  "ids": [
136
  1
137
  ],
138
  "tokens": [
139
- "[CLS]"
140
  ]
141
  },
142
- "[SEP]": {
143
- "id": "[SEP]",
144
  "ids": [
145
  2
146
  ],
147
  "tokens": [
148
- "[SEP]"
149
  ]
150
  }
151
  }
@@ -154,11 +154,11 @@
154
  "model": {
155
  "type": "WordLevel",
156
  "vocab": {
157
- "[UNK]": 0,
158
- "[CLS]": 1,
159
- "[SEP]": 2,
160
- "[PAD]": 3,
161
- "[MASK]": 4,
162
  "boken": 5,
163
  "coken": 6,
164
  "doken": 7,
@@ -180,6 +180,6 @@
180
  "xoken": 23,
181
  "yoken": 24
182
  },
183
- "unk_token": "[UNK]"
184
  }
185
  }
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
+ "content": "UNK",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
 
14
  },
15
  {
16
  "id": 1,
17
+ "content": "CLS",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
23
  },
24
  {
25
  "id": 2,
26
+ "content": "SEP",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
32
  },
33
  {
34
  "id": 3,
35
+ "content": "PAD",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
41
  },
42
  {
43
  "id": 4,
44
+ "content": "MASK",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
 
80
  "single": [
81
  {
82
  "SpecialToken": {
83
+ "id": "CLS",
84
  "type_id": 0
85
  }
86
  },
 
92
  },
93
  {
94
  "SpecialToken": {
95
+ "id": "SEP",
96
  "type_id": 0
97
  }
98
  }
 
100
  "pair": [
101
  {
102
  "SpecialToken": {
103
+ "id": "CLS",
104
  "type_id": 0
105
  }
106
  },
 
112
  },
113
  {
114
  "SpecialToken": {
115
+ "id": "SEP",
116
  "type_id": 0
117
  }
118
  },
 
124
  },
125
  {
126
  "SpecialToken": {
127
+ "id": "SEP",
128
  "type_id": 1
129
  }
130
  }
131
  ],
132
  "special_tokens": {
133
+ "CLS": {
134
+ "id": "CLS",
135
  "ids": [
136
  1
137
  ],
138
  "tokens": [
139
+ "CLS"
140
  ]
141
  },
142
+ "SEP": {
143
+ "id": "SEP",
144
  "ids": [
145
  2
146
  ],
147
  "tokens": [
148
+ "SEP"
149
  ]
150
  }
151
  }
 
154
  "model": {
155
  "type": "WordLevel",
156
  "vocab": {
157
+ "UNK": 0,
158
+ "CLS": 1,
159
+ "SEP": 2,
160
+ "PAD": 3,
161
+ "MASK": 4,
162
  "boken": 5,
163
  "coken": 6,
164
  "doken": 7,
 
180
  "xoken": 23,
181
  "yoken": 24
182
  },
183
+ "unk_token": "UNK"
184
  }
185
  }
tokenizer_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": true,
7
  "rstrip": false,
@@ -9,7 +9,7 @@
9
  "special": false
10
  },
11
  "1": {
12
- "content": "[CLS]",
13
  "lstrip": false,
14
  "normalized": true,
15
  "rstrip": false,
@@ -17,7 +17,7 @@
17
  "special": false
18
  },
19
  "2": {
20
- "content": "[SEP]",
21
  "lstrip": false,
22
  "normalized": true,
23
  "rstrip": false,
@@ -25,7 +25,7 @@
25
  "special": false
26
  },
27
  "3": {
28
- "content": "[PAD]",
29
  "lstrip": false,
30
  "normalized": true,
31
  "rstrip": false,
@@ -33,7 +33,7 @@
33
  "special": false
34
  },
35
  "4": {
36
- "content": "[MASK]",
37
  "lstrip": false,
38
  "normalized": true,
39
  "rstrip": false,
@@ -41,17 +41,17 @@
41
  "special": false
42
  }
43
  },
44
- "bos_token": "[CLS]",
45
  "chat_template": "{% for message in messages %}{{ message['content'] }}{% endfor %}",
46
  "clean_up_tokenization_spaces": false,
47
- "cls_token": "[CLS]",
48
- "eos_token": "[SEP]",
49
- "mask_token": "[MASK]",
50
  "max_new_tokens": 1048576,
51
  "model_max_length": 1048576,
52
- "pad_token": "[PAD]",
53
  "padding_side": "right",
54
- "sep_token": "[SEP]",
55
  "tokenizer_class": "PreTrainedTokenizerFast",
56
- "unk_token": "[UNK]"
57
  }
 
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "UNK",
5
  "lstrip": false,
6
  "normalized": true,
7
  "rstrip": false,
 
9
  "special": false
10
  },
11
  "1": {
12
+ "content": "CLS",
13
  "lstrip": false,
14
  "normalized": true,
15
  "rstrip": false,
 
17
  "special": false
18
  },
19
  "2": {
20
+ "content": "SEP",
21
  "lstrip": false,
22
  "normalized": true,
23
  "rstrip": false,
 
25
  "special": false
26
  },
27
  "3": {
28
+ "content": "PAD",
29
  "lstrip": false,
30
  "normalized": true,
31
  "rstrip": false,
 
33
  "special": false
34
  },
35
  "4": {
36
+ "content": "MASK",
37
  "lstrip": false,
38
  "normalized": true,
39
  "rstrip": false,
 
41
  "special": false
42
  }
43
  },
44
+ "bos_token": "CLS",
45
  "chat_template": "{% for message in messages %}{{ message['content'] }}{% endfor %}",
46
  "clean_up_tokenization_spaces": false,
47
+ "cls_token": "CLS",
48
+ "eos_token": "SEP",
49
+ "mask_token": "MASK",
50
  "max_new_tokens": 1048576,
51
  "model_max_length": 1048576,
52
+ "pad_token": "PAD",
53
  "padding_side": "right",
54
+ "sep_token": "SEP",
55
  "tokenizer_class": "PreTrainedTokenizerFast",
56
+ "unk_token": "UNK"
57
  }