scthornton commited on
Commit
dba0249
·
verified ·
1 Parent(s): 75e84ab

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +5 -53
tokenizer.json CHANGED
@@ -10,7 +10,7 @@
10
  "strategy": {
11
  "Fixed": 4096
12
  },
13
- "direction": "Left",
14
  "pad_to_multiple_of": null,
15
  "pad_id": 32021,
16
  "pad_type_id": 0,
@@ -268,58 +268,10 @@
268
  ]
269
  },
270
  "post_processor": {
271
- "type": "TemplateProcessing",
272
- "single": [
273
- {
274
- "SpecialToken": {
275
- "id": "<|begin▁of▁sentence|>",
276
- "type_id": 0
277
- }
278
- },
279
- {
280
- "Sequence": {
281
- "id": "A",
282
- "type_id": 0
283
- }
284
- }
285
- ],
286
- "pair": [
287
- {
288
- "SpecialToken": {
289
- "id": "<|begin▁of▁sentence|>",
290
- "type_id": 0
291
- }
292
- },
293
- {
294
- "Sequence": {
295
- "id": "A",
296
- "type_id": 0
297
- }
298
- },
299
- {
300
- "SpecialToken": {
301
- "id": "<|begin▁of▁sentence|>",
302
- "type_id": 1
303
- }
304
- },
305
- {
306
- "Sequence": {
307
- "id": "B",
308
- "type_id": 1
309
- }
310
- }
311
- ],
312
- "special_tokens": {
313
- "<|begin▁of▁sentence|>": {
314
- "id": "<|begin▁of▁sentence|>",
315
- "ids": [
316
- 32013
317
- ],
318
- "tokens": [
319
- "<|begin▁of▁sentence|>"
320
- ]
321
- }
322
- }
323
  },
324
  "decoder": {
325
  "type": "ByteLevel",
 
10
  "strategy": {
11
  "Fixed": 4096
12
  },
13
+ "direction": "Right",
14
  "pad_to_multiple_of": null,
15
  "pad_id": 32021,
16
  "pad_type_id": 0,
 
268
  ]
269
  },
270
  "post_processor": {
271
+ "type": "ByteLevel",
272
+ "add_prefix_space": true,
273
+ "trim_offsets": false,
274
+ "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  },
276
  "decoder": {
277
  "type": "ByteLevel",