File size: 6,018 Bytes
932aa54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
{
  "add_prefix_space": false,
  "added_tokens_decoder": {
    "0": {
      "content": "<|endoftext|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<|im_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "<|im_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "3": {
      "content": "<repo_name>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "4": {
      "content": "<reponame>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "5": {
      "content": "<file_sep>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "6": {
      "content": "<filename>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "7": {
      "content": "<gh_stars>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "8": {
      "content": "<issue_start>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "9": {
      "content": "<issue_comment>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "10": {
      "content": "<issue_closed>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "11": {
      "content": "<jupyter_start>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "12": {
      "content": "<jupyter_text>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "13": {
      "content": "<jupyter_code>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "14": {
      "content": "<jupyter_output>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "15": {
      "content": "<jupyter_script>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "16": {
      "content": "<empty_output>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>"
  ],
  "bos_token": "<|im_start|>",
  "chat_template": "{%- set first_is_system = (messages and messages[0]['role'] == 'system') -%}\n\n{# ---------- SYSTEM MESSAGE WITH TOOL INSTRUCTIONS ---------- #}\n<|im_start|>system\n{%- if first_is_system -%}\n{{ messages[0]['content'] }}\n{%- else -%}\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face.\n{%- endif %}\n\nYou can call external tools (\"functions\") to get real-time data or take actions for the user.\n\nTOOL CALL PROTOCOL:\n1. When you decide a tool is needed, respond with ONLY one or more <tool_call>...</tool_call> blocks.\n   Do not include anything else in that turn.\n2. Each <tool_call> block MUST contain valid JSON with this shape:\n   {\n     \"id\": \"<unique_call_id>\",\n     \"type\": \"function\",\n     \"function\": {\n       \"name\": \"<tool_name>\",\n       \"arguments\": { ... }\n     }\n   }\n   \"arguments\" MUST be valid JSON. Use the correct parameter names and types.\n3. After the tool(s) run, you will receive one or more <tool_response>...</tool_response> messages.\n   Call another tool or respond as assistant, using the tool results.\n4. If no tool is required, answer directly in natural language.\n\nAVAILABLE TOOLS:\n{%- if tools is defined and tools|length > 0 %}\n{%- for t in tools %}\n- {{ t[\"function\"][\"name\"] }} :\n  {{ t[\"function\"][\"description\"] }}\n  params: {{ t[\"function\"][\"parameters\"] | tojson }}\n{%- endfor %}\n{%- else %}\n(no tools provided)\n{%- endif %}\n\nYou must follow the protocol exactly.\n<|im_end|>\n\n{# ---------- REST OF MESSAGES ---------- #}\n{%- for m in (messages[1:] if first_is_system else messages) %}\n\n{# Assistant turn that issues tool calls #}\n{%- if m['role'] == 'assistant' and ('tool_calls' in m) and m['tool_calls'] %}\n<|im_start|>assistant\n{%- for c in m['tool_calls'] %}\n<tool_call>{{ {\n  \"id\": c[\"id\"],\n  \"type\": c.get(\"type\", \"function\"),\n  \"function\": {\n    \"name\": c[\"function\"][\"name\"],\n    \"arguments\": (\n        c[\"function\"][\"arguments\"]\n        if c[\"function\"][\"arguments\"] is not none\n        else {}\n    )\n  }\n} | tojson }}</tool_call>\n{%- endfor %}\n<|im_end|>\n\n{# Tool result turn #}\n{%- elif m['role'] == 'tool' %}\n<|im_start|>user\n<tool_response{% if m.get('tool_call_id') %} id=\"{{ m['tool_call_id'] }}\"{% endif %}>\n{{ m['content'] }}\n</tool_response>\n<|im_end|>\n\n{# Normal assistant / user / other turns #}\n{%- else %}\n<|im_start|>{{ m['role'] }}\n{{ m['content'] }}\n<|im_end|>\n{%- endif %}\n\n{%- endfor %}\n\n{# ---------- GENERATION PROMPT ---------- #}\n{%- if add_generation_prompt %}\n<|im_start|>assistant\n{%- endif %}\n",
  "clean_up_tokenization_spaces": false,
  "eos_token": "<|im_end|>",
  "model_max_length": 8192,
  "pad_token": "<|im_end|>",
  "tokenizer_class": "GPT2Tokenizer",
  "unk_token": "<|endoftext|>",
  "vocab_size": 49152
}