tokenizer.json / . tokenizer.json
dda71427's picture
Create . tokenizer.json
34bf488 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{"id": 0, "special": false, "content": "hello"},
{"id": 1, "special": false, "content": "world"},
{"id": 2, "special": true, "content": "<unk>"}
],
"normalizer": {
"type": "BertNormalizer",
"clean_text": true,
"handle_chinese_chars": true,
"strip_accents": null,
"lowercase": true
},
"pre_tokenizer": {"type": "Whitespace"},
"model": {
"type": "WordLevel",
"vocab": {"hello": 0, "world": 1, "<unk>": 2},
"unk_token": "<unk>"
},
"post_processor": {
"type": "ByteLevel",
"trim_offsets": true
}
}