File size: 5,785 Bytes
8e4436e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
{
  "added_tokens_decoder": {
    "151643": {"content": "[BOS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151644": {"content": "[EOS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151645": {"content": "<|im_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151646": {"content": "<|im_user|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151647": {"content": "<|im_assistant|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151648": {"content": "<|reserved_token_0|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151649": {"content": "<|start_header_id|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151650": {"content": "<|end_header_id|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151651": {"content": "<|reserved_token_1|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151652": {"content": "[EOT]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151653": {"content": "<|im_system|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151654": {"content": "<|reserved_token_2|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151655": {"content": "<|reserved_token_3|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151656": {"content": "<|reserved_token_4|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151657": {"content": "<|reserved_token_5|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151658": {"content": "<|reserved_token_6|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151659": {"content": "<|reserved_token_7|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151660": {"content": "<|im_middle|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151661": {"content": "<|media_begin|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151662": {"content": "<|media_content|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151663": {"content": "<|media_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151664": {"content": "<|media_placeholder|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},

    "151665": {"content": "<|vision_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151666": {"content": "<|vision_end|>",   "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151667": {"content": "<|image_pad|>",     "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "151668": {"content": "<|video_pad|>",     "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},

    "152062": {"content": "[UNK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true},
    "152063": {"content": "[PAD]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true}
  },

  "additional_special_tokens": [
    "<|im_end|>", "<|im_user|>", "<|im_assistant|>",
    "<|reserved_token_0|>", "<|start_header_id|>", "<|end_header_id|>",
    "<|reserved_token_1|>", "[EOT]", "<|im_system|>",
    "<|reserved_token_2|>", "<|reserved_token_3|>", "<|reserved_token_4|>",
    "<|reserved_token_5|>", "<|reserved_token_6|>", "<|reserved_token_7|>",
    "<|im_middle|>",
    "<|media_begin|>", "<|media_content|>", "<|media_end|>", "<|media_placeholder|>",
    "<|vision_start|>", "<|vision_end|>", "<|image_pad|>", "<|video_pad|>"
  ],

  "bos_token": "[BOS]",
  "clean_up_tokenization_spaces": false,
  "eos_token": "[EOS]",
  "extra_special_tokens": {},
  "chat_template": "{%- for message in messages -%}{%- if loop.first and messages[0]['role'] != 'system' -%}{{'<|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|>'}}{%- endif -%}{%- if message['role'] == 'system' -%}{{'<|im_system|>'}}{%- endif -%}{%- if message['role'] == 'user' -%}{{'<|im_user|>'}}{%- endif -%}{%- if message['role'] == 'assistant' -%}{{'<|im_assistant|>'}}{%- endif -%}{{- message['role'] -}}{{'<|im_middle|>'}}{%- if message['content'] is string -%}{{- message['content'] + '<|im_end|>' -}}{%- else -%}{%- for content in message['content'] -%}{%- if content['type'] == 'image' or 'image' in content or 'image_url' in content -%}{{'<|media_begin|>image<|media_content|><|media_placeholder|><|media_end|>'}}{%- else -%}{{content['text']}}{%- endif -%}{%- endfor -%}{{'<|im_end|>'}}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{'<|im_assistant|>assistant<|im_middle|>'}}{%- endif -%}",
  "model_max_length": 1000000000000000019884624838656,
  "pad_token": "[PAD]",
  "tokenizer_class": "TikTokenV3",
  "unk_token": "[UNK]",
  "auto_map": {
    "AutoTokenizer": ["tokenization_opencua.TikTokenV3", null]
  }
}