cricguru / tokenizer.json
RavindraSingh22's picture
Initial model scaffold
7e546e9
raw
history blame contribute delete
509 Bytes
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [],
"normalizer": {
"type": "BertNormalizer",
"lowercase": true,
"strip_accents": true
},
"pre_tokenizer": {
"type": "Whitespace"
},
"model": {
"type": "WordLevel",
"vocab": {
"[PAD]": 0,
"[UNK]": 1,
"[CLS]": 2,
"[SEP]": 3,
"[MASK]": 4,
"hello": 5,
"world": 6
},
"unk_token": "[UNK]"
}
}