File size: 750 Bytes
ae91f30
7e8e702
 
 
 
 
ae91f30
 
 
 
 
7e8e702
 
ae91f30
d5947d7
7e8e702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae91f30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
{
  "name": "hindi-tokenizer",
  "version": "1.0.0",
  "model_type": "sentencepiece",
  "sp_model_type": "unigram",
  "tokenizer_class": "SentencePieceTokenizer",
  "bos_token": "<s>",
  "eos_token": "</s>",
  "pad_token": "<pad>",
  "unk_token": "<unk>",
  "mask_token": "<mask>",
  "cls_token": "<cls>",
  "sep_token": "<sep>",
  "model_max_length": 512,
  "vocab_size": 16000,
  "do_lower_case": false,
  "special_tokens_map": {
    "pad_token": "<pad>",
    "unk_token": "<unk>",
    "bos_token": "<s>",
    "eos_token": "</s>",
    "mask_token": "<mask>",
    "sep_token": "<sep>",
    "cls_token": "<cls>"
  },
  "tokenizer_file": "tokenizer.model",
  "auto_map": {
    "AutoTokenizer": [
      "PreTrainedTokenizerFast",
      null
    ]
  }
}