File size: 2,895 Bytes
866bcdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
{
    "add_prefix_space": false,
    "added_tokens_decoder": {
        "0": {
            "content": "<|endoftext|>",
            "lstrip": false,
            "normalized": false,
            "rstrip": false,
            "single_word": false,
            "special": true
        },
        "32768": {
            "content": "<|im_start|>",
            "lstrip": false,
            "normalized": false,
            "rstrip": false,
            "single_word": false,
            "special": true
        },
        "32769": {
            "content": "<|im_end|>",
            "lstrip": false,
            "normalized": false,
            "rstrip": false,
            "single_word": false,
            "special": true
        },
        "32770": {
            "content": "<think>",
            "lstrip": false,
            "normalized": false,
            "rstrip": false,
            "single_word": false,
            "special": false
        },
        "32771": {
            "content": "</think>",
            "lstrip": false,
            "normalized": false,
            "rstrip": false,
            "single_word": false,
            "special": false
        }
    },
    "additional_special_tokens": [
        "<|im_start|>",
        "<|im_end|>"
    ],
    "bos_token": null,
    "eos_token": "<|im_end|>",
    "pad_token": "<|endoftext|>",
    "unk_token": null,
    "chat_template": "{%- if messages[0].role == 'system' %}\n    {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = '' %}\n    {%- endif %}\n    {%- if message.role == \"user\" %}\n        {{- '<|im_start|>user\\n' + content + '<|im_end|>\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set has_think = false %}\n        {%- set think_content = '' %}\n        {%- set main_content = content %}\n        \n        {%- if '</think>' in content %}\n            {%- set think_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n            {%- set main_content = content.split('</think>')[-1].lstrip('\\n') %}\n            {%- set has_think = true %}\n        {%- endif %}\n        \n        {{- '<|im_start|>assistant\\n' }}\n        {%- if has_think %}\n            {{- '<think>\\n' + think_content + '\\n</think>\\n\\n' + main_content }}\n        {%- else %}\n            {{- content }}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}",
    "clean_up_tokenization_spaces": false,
    "errors": "replace",
    "model_max_length": 32768,
    "split_special_tokens": false,
    "tokenizer_class": "PreTrainedTokenizerFast",
    "add_bos_token": false
}