dda71427 commited on
Commit
34bf488
·
verified ·
1 Parent(s): d1bf205

Create . tokenizer.json

Browse files
Files changed (1) hide show
  1. . tokenizer.json +27 -0
. tokenizer.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {"id": 0, "special": false, "content": "hello"},
7
+ {"id": 1, "special": false, "content": "world"},
8
+ {"id": 2, "special": true, "content": "<unk>"}
9
+ ],
10
+ "normalizer": {
11
+ "type": "BertNormalizer",
12
+ "clean_text": true,
13
+ "handle_chinese_chars": true,
14
+ "strip_accents": null,
15
+ "lowercase": true
16
+ },
17
+ "pre_tokenizer": {"type": "Whitespace"},
18
+ "model": {
19
+ "type": "WordLevel",
20
+ "vocab": {"hello": 0, "world": 1, "<unk>": 2},
21
+ "unk_token": "<unk>"
22
+ },
23
+ "post_processor": {
24
+ "type": "ByteLevel",
25
+ "trim_offsets": true
26
+ }
27
+ }