Ahmad-11 commited on
Commit
901cd39
·
verified ·
1 Parent(s): b1af576

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|endoftext|>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 510,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 510
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 32000,
16
- "pad_type_id": 0,
17
- "pad_token": "<|endoftext|>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
@@ -175,12 +161,6 @@
175
  "id": "A",
176
  "type_id": 0
177
  }
178
- },
179
- {
180
- "SpecialToken": {
181
- "id": "<|endoftext|>",
182
- "type_id": 0
183
- }
184
  }
185
  ],
186
  "pair": [
@@ -196,12 +176,6 @@
196
  "type_id": 0
197
  }
198
  },
199
- {
200
- "SpecialToken": {
201
- "id": "<|endoftext|>",
202
- "type_id": 0
203
- }
204
- },
205
  {
206
  "SpecialToken": {
207
  "id": "<s>",
@@ -213,12 +187,6 @@
213
  "id": "B",
214
  "type_id": 1
215
  }
216
- },
217
- {
218
- "SpecialToken": {
219
- "id": "<|endoftext|>",
220
- "type_id": 1
221
- }
222
  }
223
  ],
224
  "special_tokens": {
@@ -230,15 +198,6 @@
230
  "tokens": [
231
  "<s>"
232
  ]
233
- },
234
- "<|endoftext|>": {
235
- "id": "<|endoftext|>",
236
- "ids": [
237
- 32000
238
- ],
239
- "tokens": [
240
- "<|endoftext|>"
241
- ]
242
  }
243
  }
244
  },
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
161
  "id": "A",
162
  "type_id": 0
163
  }
 
 
 
 
 
 
164
  }
165
  ],
166
  "pair": [
 
176
  "type_id": 0
177
  }
178
  },
 
 
 
 
 
 
179
  {
180
  "SpecialToken": {
181
  "id": "<s>",
 
187
  "id": "B",
188
  "type_id": 1
189
  }
 
 
 
 
 
 
190
  }
191
  ],
192
  "special_tokens": {
 
198
  "tokens": [
199
  "<s>"
200
  ]
 
 
 
 
 
 
 
 
 
201
  }
202
  }
203
  },
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": true,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -122,7 +122,7 @@
122
  "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<|endoftext|>",
125
- "padding_side": "right",
126
  "sp_model_kwargs": {},
127
  "tokenizer_class": "LlamaTokenizer",
128
  "unk_token": "<unk>",
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
122
  "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<|endoftext|>",
125
+ "padding_side": "left",
126
  "sp_model_kwargs": {},
127
  "tokenizer_class": "LlamaTokenizer",
128
  "unk_token": "<unk>",