File size: 1,784 Bytes
551066e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
{
  "<eop>": "<eop>",
  "<sop>": "<sop>",
  "<|assistant|>": "<|assistant|>",
  "<|begin_of_audio|>": "<|begin_of_audio|>",
  "<|begin_of_image|>": "<|begin_of_image|>",
  "<|begin_of_transcription|>": "<|begin_of_transcription|>",
  "<|begin_of_video|>": "<|begin_of_video|>",
  "<|end_of_audio|>": "<|end_of_audio|>",
  "<|end_of_image|>": "<|end_of_image|>",
  "<|end_of_transcription|>": "<|end_of_transcription|>",
  "<|end_of_video|>": "<|end_of_video|>",
  "<|endoftext|>": "<|endoftext|>",
  "<|observation|>": "<|observation|>",
  "<|system|>": "<|system|>",
  "<|user|>": "<|user|>",
  "[MASK]": "[MASK]",
  "[gMASK]": "[gMASK]",
  "[sMASK]": "[sMASK]",
  "backend": "tokenizers",
  "clean_up_tokenization_spaces": false,
  "do_lower_case": false,
  "eos_token": "<|endoftext|>",
  "is_local": false,
  "max_length": null,
  "model_max_length": 128000,
  "model_specific_special_tokens": {
    "<eop>": "<eop>",
    "<sop>": "<sop>",
    "<|assistant|>": "<|assistant|>",
    "<|begin_of_audio|>": "<|begin_of_audio|>",
    "<|begin_of_image|>": "<|begin_of_image|>",
    "<|begin_of_transcription|>": "<|begin_of_transcription|>",
    "<|begin_of_video|>": "<|begin_of_video|>",
    "<|end_of_audio|>": "<|end_of_audio|>",
    "<|end_of_image|>": "<|end_of_image|>",
    "<|end_of_transcription|>": "<|end_of_transcription|>",
    "<|end_of_video|>": "<|end_of_video|>",
    "<|endoftext|>": "<|endoftext|>",
    "<|observation|>": "<|observation|>",
    "<|system|>": "<|system|>",
    "<|user|>": "<|user|>",
    "[MASK]": "[MASK]",
    "[gMASK]": "[gMASK]",
    "[sMASK]": "[sMASK]"
  },
  "pad_to_multiple_of": null,
  "pad_token": "<|endoftext|>",
  "pad_token_type_id": 0,
  "padding_side": "left",
  "remove_space": false,
  "tokenizer_class": "TokenizersBackend"
}