Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Chat Templates","local":"chat-templates","sections":[{"title":"Introduction","local":"introduction","sections":[],"depth":2},{"title":"How do I use chat templates?","local":"how-do-i-use-chat-templates","sections":[],"depth":2},{"title":"Is there an automated pipeline for chat?","local":"is-there-an-automated-pipeline-for-chat","sections":[],"depth":2},{"title":"What are “generation prompts”?","local":"what-are-generation-prompts","sections":[],"depth":2},{"title":"What does “continue_final_message” do?","local":"what-does-continuefinalmessage-do","sections":[],"depth":2},{"title":"Can I use chat templates in training?","local":"can-i-use-chat-templates-in-training","sections":[],"depth":2},{"title":"Advanced: Extra inputs to chat templates","local":"advanced-extra-inputs-to-chat-templates","sections":[],"depth":2},{"title":"Advanced: Tool use / function calling","local":"advanced-tool-use--function-calling","sections":[{"title":"Passing tool results to the model","local":"passing-tool-results-to-the-model","sections":[],"depth":3},{"title":"A complete tool use example","local":"a-complete-tool-use-example","sections":[],"depth":3},{"title":"Understanding tool schemas","local":"understanding-tool-schemas","sections":[],"depth":3}],"depth":2},{"title":"Advanced: Retrieval-augmented generation","local":"advanced-retrieval-augmented-generation","sections":[],"depth":2},{"title":"Advanced: How do chat templates work?","local":"advanced-how-do-chat-templates-work","sections":[],"depth":2},{"title":"Advanced: Adding and editing chat templates","local":"advanced-adding-and-editing-chat-templates","sections":[{"title":"How do I create a chat template?","local":"how-do-i-create-a-chat-template","sections":[],"depth":3},{"title":"Why do some models have multiple templates?","local":"why-do-some-models-have-multiple-templates","sections":[],"depth":3},{"title":"What template should I use?","local":"what-template-should-i-use","sections":[],"depth":3},{"title":"I want to add some chat templates! How should I get started?","local":"i-want-to-add-some-chat-templates-how-should-i-get-started","sections":[],"depth":3}],"depth":2},{"title":"Advanced: Template writing tips","local":"advanced-template-writing-tips","sections":[{"title":"Trimming whitespace","local":"trimming-whitespace","sections":[],"depth":3},{"title":"Special variables","local":"special-variables","sections":[],"depth":3},{"title":"Callable functions","local":"callable-functions","sections":[],"depth":3},{"title":"Compatibility with non-Python Jinja","local":"compatibility-with-non-python-jinja","sections":[],"depth":3},{"title":"Writing generation prompts","local":"writing-generation-prompts","sections":[],"depth":3},{"title":"Writing and debugging larger templates","local":"writing-and-debugging-larger-templates","sections":[],"depth":3},{"title":"Writing templates for tools","local":"writing-templates-for-tools","sections":[{"title":"Tool definitions","local":"tool-definitions","sections":[],"depth":4},{"title":"Tool calls","local":"tool-calls","sections":[],"depth":4},{"title":"Tool responses","local":"tool-responses","sections":[],"depth":4}],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers/pr_33913/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/entry/start.b67f883f.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/scheduler.25b97de1.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/singletons.62a184e0.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/index.e188933d.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/paths.51881b9e.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/entry/app.e436b1f2.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/index.d9030fc9.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/nodes/0.05e395f5.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/nodes/12.a1d46de6.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/Tip.baa67368.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/CodeBlock.e6cd0d95.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_33913/en/_app/immutable/chunks/EditOnGithub.91d95064.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Chat Templates","local":"chat-templates","sections":[{"title":"Introduction","local":"introduction","sections":[],"depth":2},{"title":"How do I use chat templates?","local":"how-do-i-use-chat-templates","sections":[],"depth":2},{"title":"Is there an automated pipeline for chat?","local":"is-there-an-automated-pipeline-for-chat","sections":[],"depth":2},{"title":"What are “generation prompts”?","local":"what-are-generation-prompts","sections":[],"depth":2},{"title":"What does “continue_final_message” do?","local":"what-does-continuefinalmessage-do","sections":[],"depth":2},{"title":"Can I use chat templates in training?","local":"can-i-use-chat-templates-in-training","sections":[],"depth":2},{"title":"Advanced: Extra inputs to chat templates","local":"advanced-extra-inputs-to-chat-templates","sections":[],"depth":2},{"title":"Advanced: Tool use / function calling","local":"advanced-tool-use--function-calling","sections":[{"title":"Passing tool results to the model","local":"passing-tool-results-to-the-model","sections":[],"depth":3},{"title":"A complete tool use example","local":"a-complete-tool-use-example","sections":[],"depth":3},{"title":"Understanding tool schemas","local":"understanding-tool-schemas","sections":[],"depth":3}],"depth":2},{"title":"Advanced: Retrieval-augmented generation","local":"advanced-retrieval-augmented-generation","sections":[],"depth":2},{"title":"Advanced: How do chat templates work?","local":"advanced-how-do-chat-templates-work","sections":[],"depth":2},{"title":"Advanced: Adding and editing chat templates","local":"advanced-adding-and-editing-chat-templates","sections":[{"title":"How do I create a chat template?","local":"how-do-i-create-a-chat-template","sections":[],"depth":3},{"title":"Why do some models have multiple templates?","local":"why-do-some-models-have-multiple-templates","sections":[],"depth":3},{"title":"What template should I use?","local":"what-template-should-i-use","sections":[],"depth":3},{"title":"I want to add some chat templates! How should I get started?","local":"i-want-to-add-some-chat-templates-how-should-i-get-started","sections":[],"depth":3}],"depth":2},{"title":"Advanced: Template writing tips","local":"advanced-template-writing-tips","sections":[{"title":"Trimming whitespace","local":"trimming-whitespace","sections":[],"depth":3},{"title":"Special variables","local":"special-variables","sections":[],"depth":3},{"title":"Callable functions","local":"callable-functions","sections":[],"depth":3},{"title":"Compatibility with non-Python Jinja","local":"compatibility-with-non-python-jinja","sections":[],"depth":3},{"title":"Writing generation prompts","local":"writing-generation-prompts","sections":[],"depth":3},{"title":"Writing and debugging larger templates","local":"writing-and-debugging-larger-templates","sections":[],"depth":3},{"title":"Writing templates for tools","local":"writing-templates-for-tools","sections":[{"title":"Tool definitions","local":"tool-definitions","sections":[],"depth":4},{"title":"Tool calls","local":"tool-calls","sections":[],"depth":4},{"title":"Tool responses","local":"tool-responses","sections":[],"depth":4}],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="chat-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#chat-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Chat Templates</span></h1> <h2 class="relative group"><a id="introduction" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#introduction"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Introduction</span></h2> <p data-svelte-h="svelte-ydi30o">An increasingly common use case for LLMs is <strong>chat</strong>. In a chat context, rather than continuing a single string | |
| of text (as is the case with a standard language model), the model instead continues a conversation that consists | |
| of one or more <strong>messages</strong>, each of which includes a <strong>role</strong>, like “user” or “assistant”, as well as message text.</p> <p data-svelte-h="svelte-1p8dq8">Much like tokenization, different models expect very different input formats for chat. This is the reason we added | |
| <strong>chat templates</strong> as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, | |
| represented as lists of messages, into a single tokenizable string in the format that the model expects.</p> <p data-svelte-h="svelte-19ob0qf">Let’s make this concrete with a quick example using the <code>mistralai/Mistral-7B-Instruct-v0.1</code> model:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"mistralai/Mistral-7B-Instruct-v0.1"</span>) | |
| <span class="hljs-meta">>>> </span>chat = [ | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hello, how are you?"</span>}, | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'm doing great. How can I help you today?"</span>}, | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'd like to show off how chat templating works!"</span>}, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>tokenizer.apply_chat_template(chat, tokenize=<span class="hljs-literal">False</span>) | |
| <span class="hljs-string">"<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gwj0xd">Notice how the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of | |
| user messages (but not assistant messages!), and the entire chat is condensed into a single string. | |
| If we use <code>tokenize=True</code>, which is the default setting, that string will also be tokenized for us.</p> <p data-svelte-h="svelte-14wqu22">Now, try the same code, but swap in the <code>HuggingFaceH4/zephyr-7b-beta</code> model instead, and you should get:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|user|> | |
| Hello, how are you?</s> | |
| <|assistant|> | |
| I'm doing great. How can I help you today?</s> | |
| <|user|> | |
| I'd like to show off how chat templating works!</s><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1010uaq">Both Zephyr and Mistral-Instruct were fine-tuned from the same base model, <code>Mistral-7B-v0.1</code>. However, they were trained | |
| with totally different chat formats. Without chat templates, you would have to write manual formatting code for each | |
| model, and it’s very easy to make minor errors that hurt performance! Chat templates handle the details of formatting | |
| for you, allowing you to write universal code that works for any model.</p> <h2 class="relative group"><a id="how-do-i-use-chat-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-do-i-use-chat-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How do I use chat templates?</span></h2> <p data-svelte-h="svelte-1bwod97">As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with <code>role</code> | |
| and <code>content</code> keys, and then pass it to the <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a> method. Once you do that, | |
| you’ll get output that’s ready to go! When using chat templates as input for model generation, it’s also a good idea | |
| to use <code>add_generation_prompt=True</code> to add a <a href="#what-are-generation-prompts">generation prompt</a>.</p> <p data-svelte-h="svelte-1vun9x2">Here’s an example of preparing input for <code>model.generate()</code>, using <code>Zephyr</code> again:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| checkpoint = <span class="hljs-string">"HuggingFaceH4/zephyr-7b-beta"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = AutoModelForCausalLM.from_pretrained(checkpoint) <span class="hljs-comment"># You may want to use bfloat16 and/or move to GPU here</span> | |
| messages = [ | |
| { | |
| <span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, | |
| <span class="hljs-string">"content"</span>: <span class="hljs-string">"You are a friendly chatbot who always responds in the style of a pirate"</span>, | |
| }, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"How many helicopters can a human eat in one sitting?"</span>}, | |
| ] | |
| tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">True</span>, add_generation_prompt=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(tokenized_chat[<span class="hljs-number">0</span>]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vy7akj">This will yield a string in the input format that Zephyr expects.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|system|> | |
| You are a friendly chatbot who always responds in the style of a pirate</s> | |
| <|user|> | |
| How many helicopters can a human eat in one sitting?</s> | |
| <|assistant|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hj60o5">Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user’s question:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->outputs = model.generate(tokenized_chat, max_new_tokens=<span class="hljs-number">128</span>) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(outputs[<span class="hljs-number">0</span>]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bfcqd3">This will yield:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|system|> | |
| You are a friendly chatbot who always responds in the style of a pirate</s> | |
| <|user|> | |
| How many helicopters can a human eat in one sitting?</s> | |
| <|assistant|> | |
| Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-k9m2iy">Arr, ‘twas easy after all!</p> <h2 class="relative group"><a id="is-there-an-automated-pipeline-for-chat" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#is-there-an-automated-pipeline-for-chat"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Is there an automated pipeline for chat?</span></h2> <p data-svelte-h="svelte-cu9bqa">Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past, | |
| we used to use a dedicated “ConversationalPipeline” class, but this has now been deprecated and its functionality | |
| has been merged into the <a href="/docs/transformers/pr_33913/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>. Let’s try the <code>Zephyr</code> example again, but this time using | |
| a pipeline:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| pipe = pipeline(<span class="hljs-string">"text-generation"</span>, <span class="hljs-string">"HuggingFaceH4/zephyr-7b-beta"</span>) | |
| messages = [ | |
| { | |
| <span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, | |
| <span class="hljs-string">"content"</span>: <span class="hljs-string">"You are a friendly chatbot who always responds in the style of a pirate"</span>, | |
| }, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"How many helicopters can a human eat in one sitting?"</span>}, | |
| ] | |
| <span class="hljs-built_in">print</span>(pipe(messages, max_new_tokens=<span class="hljs-number">128</span>)[<span class="hljs-number">0</span>][<span class="hljs-string">'generated_text'</span>][-<span class="hljs-number">1</span>]) <span class="hljs-comment"># Print the assistant's response</span><!-- HTML_TAG_END --></pre></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5umvde">The pipeline will take care of all the details of tokenization and calling <code>apply_chat_template</code> for you - | |
| once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!</p> <h2 class="relative group"><a id="what-are-generation-prompts" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-are-generation-prompts"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What are “generation prompts”?</span></h2> <p data-svelte-h="svelte-rhnu79">You may have noticed that the <code>apply_chat_template</code> method has an <code>add_generation_prompt</code> argument. This argument tells | |
| the template to add tokens that indicate the start of a bot response. For example, consider the following chat:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hi there!"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Nice to meet you!"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Can I ask a question?"</span>} | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-jpbe39">Here’s what this will look like without a generation prompt, for a model that uses standard “ChatML” formatting:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>) | |
| <span class="hljs-string">"""<|im_start|>user | |
| Hi there!<|im_end|> | |
| <|im_start|>assistant | |
| Nice to meet you!<|im_end|> | |
| <|im_start|>user | |
| Can I ask a question?<|im_end|> | |
| """</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-61bp3d">And here’s what it looks like <strong>with</strong> a generation prompt:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">True</span>) | |
| <span class="hljs-string">"""<|im_start|>user | |
| Hi there!<|im_end|> | |
| <|im_start|>assistant | |
| Nice to meet you!<|im_end|> | |
| <|im_start|>user | |
| Can I ask a question?<|im_end|> | |
| <|im_start|>assistant | |
| """</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-inq88f">Note that this time, we’ve added the tokens that indicate the start of a bot response. This ensures that when the model | |
| generates text it will write a bot response instead of doing something unexpected, like continuing the user’s | |
| message. Remember, chat models are still just language models - they’re trained to continue text, and chat is just a | |
| special kind of text to them! You need to guide them with appropriate control tokens, so they know what they’re | |
| supposed to be doing.</p> <p data-svelte-h="svelte-uz30n4">Not all models require generation prompts. Some models, like LLaMA, don’t have any | |
| special tokens before bot responses. In these cases, the <code>add_generation_prompt</code> argument will have no effect. The exact | |
| effect that <code>add_generation_prompt</code> has will depend on the template being used.</p> <h2 class="relative group"><a id="what-does-continuefinalmessage-do" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-does-continuefinalmessage-do"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What does “continue_final_message” do?</span></h2> <p data-svelte-h="svelte-y1327w">When passing a list of messages to <code>apply_chat_template</code> or <code>TextGenerationPipeline</code>, you can choose | |
| to format the chat so the model will continue the final message in the chat instead of starting a new one. This is done | |
| by removing any end-of-sequence tokens that indicate the end of the final message, so that the model will simply | |
| extend the final message when it begins to generate text. This is useful for “prefilling” the model’s response.</p> <p data-svelte-h="svelte-wwwyth">Here’s an example:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->chat = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Can you format the answer in JSON?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">'{"name": "'</span>}, | |
| ] | |
| formatted_chat = tokenizer.apply_chat_template(chat, tokenize=<span class="hljs-literal">True</span>, return_dict=<span class="hljs-literal">True</span>, continue_final_message=<span class="hljs-literal">True</span>) | |
| model.generate(**formatted_chat)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-j6m3wj">The model will generate text that continues the JSON string, rather than starting a new message. This approach | |
| can be very useful for improving the accuracy of the model’s instruction-following when you know how you want | |
| it to start its replies.</p> <p data-svelte-h="svelte-hfpwig">Because <code>add_generation_prompt</code> adds the tokens that start a new message, and <code>continue_final_message</code> removes any | |
| end-of-message tokens from the final message, it does not make sense to use them together. As a result, you’ll | |
| get an error if you try!</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-95bbxf">The default behaviour of <code>TextGenerationPipeline</code> is to set <code>add_generation_prompt=True</code> so that it starts a new | |
| message. However, if the final message in the input chat has the “assistant” role, it will assume that this message is | |
| a prefill and switch to <code>continue_final_message=True</code> instead, because most models do not support multiple | |
| consecutive assistant messages. You can override this behaviour by explicitly passing the <code>continue_final_message</code> | |
| argument when calling the pipeline.</p></div> <h2 class="relative group"><a id="can-i-use-chat-templates-in-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#can-i-use-chat-templates-in-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Can I use chat templates in training?</span></h2> <p data-svelte-h="svelte-i3l9c1">Yes! This is a good way to ensure that the chat template matches the tokens the model sees during training. | |
| We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you | |
| can simply continue like any other language model training task. When training, you should usually set | |
| <code>add_generation_prompt=False</code>, because the added tokens to prompt an assistant response will not be helpful during | |
| training. Let’s see an example:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"HuggingFaceH4/zephyr-7b-beta"</span>) | |
| chat1 = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Which is bigger, the moon or the sun?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"The sun."</span>} | |
| ] | |
| chat2 = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Which is bigger, a virus or a bacterium?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"A bacterium."</span>} | |
| ] | |
| dataset = Dataset.from_dict({<span class="hljs-string">"chat"</span>: [chat1, chat2]}) | |
| dataset = dataset.<span class="hljs-built_in">map</span>(<span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">"formatted_chat"</span>: tokenizer.apply_chat_template(x[<span class="hljs-string">"chat"</span>], tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>)}) | |
| <span class="hljs-built_in">print</span>(dataset[<span class="hljs-string">'formatted_chat'</span>][<span class="hljs-number">0</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13505nn">And we get:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|user|> | |
| Which is bigger, the moon or the sun?</s> | |
| <|assistant|> | |
| The sun.</s><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ziuqkt">From here, just continue training like you would with a standard language modelling task, using the <code>formatted_chat</code> column.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-148xjo3">By default, some tokenizers add special tokens like <code><bos></code> and <code><eos></code> to text they tokenize. Chat templates should | |
| already include all the special tokens they need, and so additional special tokens will often be incorrect or | |
| duplicated, which will hurt model performance.</p> <p data-svelte-h="svelte-1hgzema">Therefore, if you format text with <code>apply_chat_template(tokenize=False)</code>, you should set the argument | |
| <code>add_special_tokens=False</code> when you tokenize that text later. If you use <code>apply_chat_template(tokenize=True)</code>, you don’t need to worry about this!</p></div> <h2 class="relative group"><a id="advanced-extra-inputs-to-chat-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-extra-inputs-to-chat-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Extra inputs to chat templates</span></h2> <p data-svelte-h="svelte-dd615e">The only argument that <code>apply_chat_template</code> requires is <code>messages</code>. However, you can pass any keyword | |
| argument to <code>apply_chat_template</code> and it will be accessible inside the template. This gives you a lot of freedom to use | |
| chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass | |
| strings, lists, dicts or whatever else you want.</p> <p data-svelte-h="svelte-dcun4m">That said, there are some common use-cases for these extra arguments, | |
| such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases, | |
| we have some opinionated recommendations about what the names and formats of these arguments should be, which are | |
| described in the sections below. We encourage model authors to make their chat templates compatible with this format, | |
| to make it easy to transfer tool-calling code between models.</p> <h2 class="relative group"><a id="advanced-tool-use--function-calling" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-tool-use--function-calling"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Tool use / function calling</span></h2> <p data-svelte-h="svelte-6sd0wq">“Tool use” LLMs can choose to call functions as external tools before generating an answer. When passing tools | |
| to a tool-use model, you can simply pass a list of functions to the <code>tools</code> argument:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> datetime | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">current_time</span>(): | |
| <span class="hljs-string">"""Get the current local time as a string."""</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-built_in">str</span>(datetime.now()) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">multiply</span>(<span class="hljs-params">a: <span class="hljs-built_in">float</span>, b: <span class="hljs-built_in">float</span></span>): | |
| <span class="hljs-string">""" | |
| A function that multiplies two numbers | |
| Args: | |
| a: The first number to multiply | |
| b: The second number to multiply | |
| """</span> | |
| <span class="hljs-keyword">return</span> a * b | |
| tools = [current_time, multiply] | |
| model_input = tokenizer.apply_chat_template( | |
| messages, | |
| tools=tools | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-608o9m">In order for this to work correctly, you should write your functions in the format above, so that they can be parsed | |
| correctly as tools. Specifically, you should follow these rules:</p> <ul data-svelte-h="svelte-n1b3zm"><li>The function should have a descriptive name</li> <li>Every argument must have a type hint</li> <li>The function must have a docstring in the standard Google style (in other words, an initial function description<br> | |
| followed by an <code>Args:</code> block that describes the arguments, unless the function does not have any arguments.</li> <li>Do not include types in the <code>Args:</code> block. In other words, write <code>a: The first number to multiply</code>, not | |
| <code>a (int): The first number to multiply</code>. Type hints should go in the function header instead.</li> <li>The function can have a return type and a <code>Returns:</code> block in the docstring. However, these are optional | |
| because most tool-use models ignore them.</li></ul> <h3 class="relative group"><a id="passing-tool-results-to-the-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#passing-tool-results-to-the-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Passing tool results to the model</span></h3> <p data-svelte-h="svelte-11962fa">The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use | |
| one? If that happens, you should:</p> <ol data-svelte-h="svelte-1vd84s7"><li>Parse the model’s output to get the tool name(s) and arguments.</li> <li>Add the model’s tool call(s) to the conversation.</li> <li>Call the corresponding function(s) with those arguments.</li> <li>Add the result(s) to the conversation</li></ol> <h3 class="relative group"><a id="a-complete-tool-use-example" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a-complete-tool-use-example"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>A complete tool use example</span></h3> <p data-svelte-h="svelte-1oi0gsn">Let’s walk through a tool use example, step by step. For this example, we will use an 8B <code>Hermes-2-Pro</code> model, | |
| as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the | |
| memory, you can consider using a larger model instead like <a href="https://huggingface.co/CohereForAI/c4ai-command-r-v01" rel="nofollow">Command-R</a> | |
| or <a href="https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1" rel="nofollow">Mixtral-8x22B</a>, both of which also support tool use | |
| and offer even stronger performance.</p> <p data-svelte-h="svelte-o8n6v4">First, let’s load our model and tokenizer:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| checkpoint = <span class="hljs-string">"NousResearch/Hermes-2-Pro-Llama-3-8B"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map=<span class="hljs-string">"auto"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1q7358y">Next, let’s define a list of tools:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">get_current_temperature</span>(<span class="hljs-params">location: <span class="hljs-built_in">str</span>, unit: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-string">""" | |
| Get the current temperature at a location. | |
| Args: | |
| location: The location to get the temperature for, in the format "City, Country" | |
| unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"]) | |
| Returns: | |
| The current temperature at the specified location in the specified units, as a float. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">22.</span> <span class="hljs-comment"># A real function should probably actually get the temperature!</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">get_current_wind_speed</span>(<span class="hljs-params">location: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-string">""" | |
| Get the current wind speed in km/h at a given location. | |
| Args: | |
| location: The location to get the temperature for, in the format "City, Country" | |
| Returns: | |
| The current wind speed at the given location in km/h, as a float. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">6.</span> <span class="hljs-comment"># A real function should probably actually get the wind speed!</span> | |
| tools = [get_current_temperature, get_current_wind_speed]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11hfyaa">Now, let’s set up a conversation for our bot:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"You are a bot that responds to weather queries. You should reply with the unit used in the queried location."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hey, what's the temperature in Paris right now?"</span>} | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1usrd3e">Now, let’s apply the chat template and generate a response:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=<span class="hljs-literal">True</span>, return_dict=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| inputs = {k: v.to(model.device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> inputs.items()} | |
| out = model.generate(**inputs, max_new_tokens=<span class="hljs-number">128</span>) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(out[<span class="hljs-number">0</span>][<span class="hljs-built_in">len</span>(inputs[<span class="hljs-string">"input_ids"</span>][<span class="hljs-number">0</span>]):]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13505nn">And we get:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><tool_call> | |
| {"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"} | |
| </tool_call><|im_end|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-nxltbo">The model has called the function with valid arguments, in the format requested by the function docstring. It has | |
| inferred that we’re most likely referring to the Paris in France, and it remembered that, as the home of SI units, | |
| the temperature in France should certainly be displayed in Celsius.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1wfdwuk">The output format above is specific to the <code>Hermes-2-Pro</code> model we’re using in this example. Other models may emit different | |
| tool call formats, and you may need to do some manual parsing at this step. For example, <code>Llama-3.1</code> models will emit | |
| slightly different JSON, with <code>parameters</code> instead of <code>arguments</code>. Regardless of the format the model outputs, you | |
| should add the tool call to the conversation in the format below, with <code>tool_calls</code>, <code>function</code> and <code>arguments</code> keys.</p></div> <p data-svelte-h="svelte-336ooj">Next, let’s append the model’s tool call to the conversation.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tool_call = {<span class="hljs-string">"name"</span>: <span class="hljs-string">"get_current_temperature"</span>, <span class="hljs-string">"arguments"</span>: {<span class="hljs-string">"location"</span>: <span class="hljs-string">"Paris, France"</span>, <span class="hljs-string">"unit"</span>: <span class="hljs-string">"celsius"</span>}} | |
| messages.append({<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"tool_calls"</span>: [{<span class="hljs-string">"type"</span>: <span class="hljs-string">"function"</span>, <span class="hljs-string">"function"</span>: tool_call}]})<!-- HTML_TAG_END --></pre></div> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-fq11ea">If you’re familiar with the OpenAI API, you should pay attention to an important difference here - the <code>tool_call</code> is | |
| a dict, but in the OpenAI API it’s a JSON string. Passing a string may cause errors or strange model behaviour!</p></div> <p data-svelte-h="svelte-1mv1vl9">Now that we’ve added the tool call to the conversation, we can call the function and append the result to the | |
| conversation. Since we’re just using a dummy function for this example that always returns 22.0, we can just append | |
| that result directly.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages.append({<span class="hljs-string">"role"</span>: <span class="hljs-string">"tool"</span>, <span class="hljs-string">"name"</span>: <span class="hljs-string">"get_current_temperature"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"22.0"</span>})<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-70hqps">Some model architectures, notably Mistral/Mixtral, also require a <code>tool_call_id</code> here, which should be | |
| 9 randomly-generated alphanumeric characters, and assigned to the <code>id</code> key of the tool call | |
| dictionary. The same key should also be assigned to the <code>tool_call_id</code> key of the tool response dictionary below, so | |
| that tool calls can be matched to tool responses. So, for Mistral/Mixtral models, the code above would be:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tool_call_id = <span class="hljs-string">"9Ae3bDc2F"</span> <span class="hljs-comment"># Random ID, 9 alphanumeric characters</span> | |
| tool_call = {<span class="hljs-string">"name"</span>: <span class="hljs-string">"get_current_temperature"</span>, <span class="hljs-string">"arguments"</span>: {<span class="hljs-string">"location"</span>: <span class="hljs-string">"Paris, France"</span>, <span class="hljs-string">"unit"</span>: <span class="hljs-string">"celsius"</span>}} | |
| messages.append({<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"tool_calls"</span>: [{<span class="hljs-string">"type"</span>: <span class="hljs-string">"function"</span>, <span class="hljs-string">"id"</span>: tool_call_id, <span class="hljs-string">"function"</span>: tool_call}]})<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qlona5">and</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages.append({<span class="hljs-string">"role"</span>: <span class="hljs-string">"tool"</span>, <span class="hljs-string">"tool_call_id"</span>: tool_call_id, <span class="hljs-string">"name"</span>: <span class="hljs-string">"get_current_temperature"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"22.0"</span>})<!-- HTML_TAG_END --></pre></div></div> <p data-svelte-h="svelte-1qjybqz">Finally, let’s let the assistant read the function outputs and continue chatting with the user:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->inputs = tokenizer.apply_chat_template(messages, tools=tools, add_generation_prompt=<span class="hljs-literal">True</span>, return_dict=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| inputs = {k: v.to(model.device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> inputs.items()} | |
| out = model.generate(**inputs, max_new_tokens=<span class="hljs-number">128</span>) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(out[<span class="hljs-number">0</span>][<span class="hljs-built_in">len</span>(inputs[<span class="hljs-string">"input_ids"</span>][<span class="hljs-number">0</span>]):]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13505nn">And we get:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1evxmus">Although this was a simple demo with dummy tools and a single call, the same technique works with | |
| multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational | |
| agents with real-time information, computational tools like calculators, or access to large databases.</p> <h3 class="relative group"><a id="understanding-tool-schemas" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#understanding-tool-schemas"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Understanding tool schemas</span></h3> <p data-svelte-h="svelte-pl4mbs">Each function you pass to the <code>tools</code> argument of <code>apply_chat_template</code> is converted into a | |
| <a href="https://json-schema.org/learn/getting-started-step-by-step" rel="nofollow">JSON schema</a>. These schemas | |
| are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they | |
| never see the actual code inside them. What they care about is the function <strong>definitions</strong> and the <strong>arguments</strong> they | |
| need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you | |
| to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and | |
| return the response in the chat.</p> <p data-svelte-h="svelte-37xmdz">Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions | |
| follow the specification above, but if you encounter problems, or you simply want more control over the conversion, | |
| you can handle the conversion manually. Here is an example of a manual schema conversion.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers.utils <span class="hljs-keyword">import</span> get_json_schema | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">multiply</span>(<span class="hljs-params">a: <span class="hljs-built_in">float</span>, b: <span class="hljs-built_in">float</span></span>): | |
| <span class="hljs-string">""" | |
| A function that multiplies two numbers | |
| Args: | |
| a: The first number to multiply | |
| b: The second number to multiply | |
| """</span> | |
| <span class="hljs-keyword">return</span> a * b | |
| schema = get_json_schema(multiply) | |
| <span class="hljs-built_in">print</span>(schema)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bfcqd3">This will yield:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"function"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"function"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"multiply"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"A function that multiplies two numbers"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"parameters"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"object"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"properties"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"a"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"number"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"The first number to multiply"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"b"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"number"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"The second number to multiply"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"required"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">"a"</span><span class="hljs-punctuation">,</span> <span class="hljs-string">"b"</span><span class="hljs-punctuation">]</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19t6fs5">If you wish, you can edit these schemas, or even write them from scratch yourself without using <code>get_json_schema</code> at | |
| all. JSON schemas can be passed directly to the <code>tools</code> argument of | |
| <code>apply_chat_template</code> - this gives you a lot of power to define precise schemas for more complex functions. Be careful, | |
| though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We | |
| recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) | |
| to a minimum.</p> <p data-svelte-h="svelte-1nlyrys">Here is an example of defining schemas by hand, and passing them directly to <code>apply_chat_template</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># A simple function that takes no arguments</span> | |
| current_time = { | |
| <span class="hljs-string">"type"</span>: <span class="hljs-string">"function"</span>, | |
| <span class="hljs-string">"function"</span>: { | |
| <span class="hljs-string">"name"</span>: <span class="hljs-string">"current_time"</span>, | |
| <span class="hljs-string">"description"</span>: <span class="hljs-string">"Get the current local time as a string."</span>, | |
| <span class="hljs-string">"parameters"</span>: { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'object'</span>, | |
| <span class="hljs-string">'properties'</span>: {} | |
| } | |
| } | |
| } | |
| <span class="hljs-comment"># A more complete function that takes two numerical arguments</span> | |
| multiply = { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'function'</span>, | |
| <span class="hljs-string">'function'</span>: { | |
| <span class="hljs-string">'name'</span>: <span class="hljs-string">'multiply'</span>, | |
| <span class="hljs-string">'description'</span>: <span class="hljs-string">'A function that multiplies two numbers'</span>, | |
| <span class="hljs-string">'parameters'</span>: { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'object'</span>, | |
| <span class="hljs-string">'properties'</span>: { | |
| <span class="hljs-string">'a'</span>: { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'number'</span>, | |
| <span class="hljs-string">'description'</span>: <span class="hljs-string">'The first number to multiply'</span> | |
| }, | |
| <span class="hljs-string">'b'</span>: { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'number'</span>, <span class="hljs-string">'description'</span>: <span class="hljs-string">'The second number to multiply'</span> | |
| } | |
| }, | |
| <span class="hljs-string">'required'</span>: [<span class="hljs-string">'a'</span>, <span class="hljs-string">'b'</span>] | |
| } | |
| } | |
| } | |
| model_input = tokenizer.apply_chat_template( | |
| messages, | |
| tools = [current_time, multiply] | |
| )<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="advanced-retrieval-augmented-generation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-retrieval-augmented-generation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Retrieval-augmented generation</span></h2> <p data-svelte-h="svelte-1977j4z">“Retrieval-augmented generation” or “RAG” LLMs can search a corpus of documents for information before responding | |
| to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our | |
| recommendation for RAG models is that their template | |
| should accept a <code>documents</code> argument. This should be a list of documents, where each “document” | |
| is a single dict with <code>title</code> and <code>contents</code> keys, both of which are strings. Because this format is much simpler | |
| than the JSON schemas used for tools, no helper functions are necessary.</p> <p data-svelte-h="svelte-1xmnzcc">Here’s an example of a RAG template in action:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForCausalLM | |
| <span class="hljs-comment"># Load the model and tokenizer</span> | |
| model_id = <span class="hljs-string">"CohereForAI/c4ai-command-r-v01-4bit"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map=<span class="hljs-string">"auto"</span>) | |
| device = model.device <span class="hljs-comment"># Get the device the model is loaded on</span> | |
| <span class="hljs-comment"># Define conversation input</span> | |
| conversation = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What has Man always dreamed of?"</span>} | |
| ] | |
| <span class="hljs-comment"># Define documents for retrieval-based generation</span> | |
| documents = [ | |
| { | |
| <span class="hljs-string">"title"</span>: <span class="hljs-string">"The Moon: Our Age-Old Foe"</span>, | |
| <span class="hljs-string">"text"</span>: <span class="hljs-string">"Man has always dreamed of destroying the moon. In this essay, I shall..."</span> | |
| }, | |
| { | |
| <span class="hljs-string">"title"</span>: <span class="hljs-string">"The Sun: Our Age-Old Friend"</span>, | |
| <span class="hljs-string">"text"</span>: <span class="hljs-string">"Although often underappreciated, the sun provides several notable benefits..."</span> | |
| } | |
| ] | |
| <span class="hljs-comment"># Tokenize conversation and documents using a RAG template, returning PyTorch tensors.</span> | |
| input_ids = tokenizer.apply_chat_template( | |
| conversation=conversation, | |
| documents=documents, | |
| chat_template=<span class="hljs-string">"rag"</span>, | |
| tokenize=<span class="hljs-literal">True</span>, | |
| add_generation_prompt=<span class="hljs-literal">True</span>, | |
| return_tensors=<span class="hljs-string">"pt"</span>).to(device) | |
| <span class="hljs-comment"># Generate a response </span> | |
| gen_tokens = model.generate( | |
| input_ids, | |
| max_new_tokens=<span class="hljs-number">100</span>, | |
| do_sample=<span class="hljs-literal">True</span>, | |
| temperature=<span class="hljs-number">0.3</span>, | |
| ) | |
| <span class="hljs-comment"># Decode and print the generated text along with generation prompt</span> | |
| gen_text = tokenizer.decode(gen_tokens[<span class="hljs-number">0</span>]) | |
| <span class="hljs-built_in">print</span>(gen_text)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-bl710l">The <code>documents</code> input for retrieval-augmented generation is not widely supported, and many models have chat templates which simply ignore this input.</p> <p data-svelte-h="svelte-qpz2lz">To verify if a model supports the <code>documents</code> input, you can read its model card, or <code>print(tokenizer.chat_template)</code> to see if the <code>documents</code> key is used anywhere.</p> <p data-svelte-h="svelte-kg7zp3">One model class that does support it, though, is Cohere’s <a href="https://huggingface.co/CohereForAI/c4ai-command-r-08-2024" rel="nofollow">Command-R</a> and <a href="https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024" rel="nofollow">Command-R+</a>, through their <code>rag</code> chat template. You can see additional examples of grounded generation using this feature in their model cards.</p></div> <h2 class="relative group"><a id="advanced-how-do-chat-templates-work" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-how-do-chat-templates-work"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: How do chat templates work?</span></h2> <p data-svelte-h="svelte-lanz4l">The chat template for a model is stored on the <code>tokenizer.chat_template</code> attribute. If no chat template is set, the | |
| default template for that model class is used instead. Let’s take a look at a <code>Zephyr</code> chat template, though note this | |
| one is a little simplified from the actual one!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">for</span></span> message <span class="hljs-keyword">in</span> messages %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- '<|' + message['role'] + |>\n' }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- message['content'] + eos_token }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">endfor</span></span> %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">if</span></span> add_generation_prompt %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- '<|assistant|>\n' }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">endif</span></span> %}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zet1qo">If you’ve never seen one of these before, this is a <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/" rel="nofollow">Jinja template</a>. | |
| Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and | |
| syntax resembles Python. In pure Python, this template would look something like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">for</span> message <span class="hljs-keyword">in</span> messages: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f'<|<span class="hljs-subst">{message[<span class="hljs-string">"role"</span>]}</span>|>'</span>) | |
| <span class="hljs-built_in">print</span>(message[<span class="hljs-string">'content'</span>] + eos_token) | |
| <span class="hljs-keyword">if</span> add_generation_prompt: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">'<|assistant|>'</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9bdwn1">Effectively, the template does three things:</p> <ol data-svelte-h="svelte-1ax9ke1"><li>For each message, print the role enclosed in <code><|</code> and <code>|></code>, like <code><|user|></code> or <code><|assistant|></code>.</li> <li>Next, print the content of the message, followed by the end-of-sequence token.</li> <li>Finally, if <code>add_generation_prompt</code> is set, print the assistant token, so that the model knows to start generating | |
| an assistant response.</li></ol> <p data-svelte-h="svelte-4zn2hx">This is a pretty simple template but Jinja gives you a lot of flexibility to do more complex things! Let’s see a Jinja | |
| template that can format inputs similarly to the way LLaMA formats them (note that the real LLaMA template includes | |
| handling for default system messages and slightly different system message handling in general - don’t use this one | |
| in your actual code!)</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{%- <span class="hljs-keyword">for</span> message <span class="hljs-keyword">in</span> messages %} | |
| {%- <span class="hljs-keyword">if</span> message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'user'</span> %} | |
| {{- bos_token + <span class="hljs-string">'[INST] '</span> + message[<span class="hljs-string">'content'</span>] + <span class="hljs-string">' [/INST]'</span> }} | |
| {%- elif message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'system'</span> %} | |
| {{- <span class="hljs-string">'<<SYS>>\\n'</span> + message[<span class="hljs-string">'content'</span>] + <span class="hljs-string">'\\n<</SYS>>\\n\\n'</span> }} | |
| {%- elif message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'assistant'</span> %} | |
| {{- <span class="hljs-string">' '</span> + message[<span class="hljs-string">'content'</span>] + <span class="hljs-string">' '</span> + eos_token }} | |
| {%- endif %} | |
| {%- endfor %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-prz4we">Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens like | |
| <code>[INST]</code> and <code>[/INST]</code> based on the role of each message. User, assistant and system messages are clearly | |
| distinguishable to the model because of the tokens they’re wrapped in.</p> <h2 class="relative group"><a id="advanced-adding-and-editing-chat-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-adding-and-editing-chat-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Adding and editing chat templates</span></h2> <h3 class="relative group"><a id="how-do-i-create-a-chat-template" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-do-i-create-a-chat-template"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How do I create a chat template?</span></h3> <p data-svelte-h="svelte-1ubxgh9">Simple, just write a jinja template and set <code>tokenizer.chat_template</code>. You may find it easier to start with an | |
| existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template | |
| above and add ”[ASST]” and ”[/ASST]” to assistant messages:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{%- <span class="hljs-keyword">for</span> message <span class="hljs-keyword">in</span> messages %} | |
| {%- <span class="hljs-keyword">if</span> message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'user'</span> %} | |
| {{- bos_token + <span class="hljs-string">'[INST] '</span> + message[<span class="hljs-string">'content'</span>].<span class="hljs-keyword">strip</span>() + <span class="hljs-string">' [/INST]'</span> }} | |
| {%- elif message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'system'</span> %} | |
| {{- <span class="hljs-string">'<<SYS>>\\n'</span> + message[<span class="hljs-string">'content'</span>].<span class="hljs-keyword">strip</span>() + <span class="hljs-string">'\\n<</SYS>>\\n\\n'</span> }} | |
| {%- elif message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'assistant'</span> %} | |
| {{- <span class="hljs-string">'[ASST] '</span> + message[<span class="hljs-string">'content'</span>] + <span class="hljs-string">' [/ASST]'</span> + eos_token }} | |
| {%- endif %} | |
| {%- endfor %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1djfz0o">Now, simply set the <code>tokenizer.chat_template</code> attribute. Next time you use <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a>, it will | |
| use your new template! This attribute will be saved in the <code>tokenizer_config.json</code> file, so you can use | |
| <a href="/docs/transformers/pr_33913/en/main_classes/model#transformers.utils.PushToHubMixin.push_to_hub">push_to_hub()</a> to upload your new template to the Hub and make sure everyone’s using the right | |
| template for your model!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->template = tokenizer.chat_template | |
| template = template.replace(<span class="hljs-string">"SYS"</span>, <span class="hljs-string">"SYSTEM"</span>) <span class="hljs-comment"># Change the system token</span> | |
| tokenizer.chat_template = template <span class="hljs-comment"># Set the new template</span> | |
| tokenizer.push_to_hub(<span class="hljs-string">"model_name"</span>) <span class="hljs-comment"># Upload your new template to the Hub!</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ep35f7">The method <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a> which uses your chat template is called by the <a href="/docs/transformers/pr_33913/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a> class, so | |
| once you set the correct chat template, your model will automatically become compatible with <a href="/docs/transformers/pr_33913/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat | |
| control tokens as special tokens in the tokenizer. Special tokens are never split, | |
| ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You | |
| should also set the tokenizer's `eos_token` attribute to the token that marks the end of assistant generations in your | |
| template. This will ensure that text generation tools can correctly figure out when to stop generating text.</div> <h3 class="relative group"><a id="why-do-some-models-have-multiple-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-do-some-models-have-multiple-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why do some models have multiple templates?</span></h3> <p data-svelte-h="svelte-1d7cql4">Some models use different templates for different use cases. For example, they might use one template for normal chat | |
| and another for tool-use, or retrieval-augmented generation. In these cases, <code>tokenizer.chat_template</code> is a dictionary. | |
| This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use | |
| Jinja statements like <code>if tools is defined</code> and <code>{% macro %}</code> definitions to easily wrap multiple code paths in a | |
| single template.</p> <p data-svelte-h="svelte-1u88h1j">When a tokenizer has multiple templates, <code>tokenizer.chat_template</code> will be a <code>dict</code>, where each key is the name | |
| of a template. The <code>apply_chat_template</code> method has special handling for certain template names: Specifically, it will | |
| look for a template named <code>default</code> in most cases, and will raise an error if it can’t find one. However, if a template | |
| named <code>tool_use</code> exists when the user has passed a <code>tools</code> argument, it will use that instead. To access templates | |
| with other names, pass the name of the template you want to the <code>chat_template</code> argument of | |
| <code>apply_chat_template()</code>.</p> <p data-svelte-h="svelte-1g7ri12">We find that this can be a bit confusing for users, though - so if you’re writing a template yourself, we recommend | |
| trying to put it all in a single template where possible!</p> <h3 class="relative group"><a id="what-template-should-i-use" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-template-should-i-use"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What template should I use?</span></h3> <p data-svelte-h="svelte-5u6sqi">When setting the template for a model that’s already been trained for chat, you should ensure that the template | |
| exactly matches the message formatting that the model saw during training, or else you will probably experience | |
| performance degradation. This is true even if you’re training the model further - you will probably get the best | |
| performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the | |
| best performance for inference or fine-tuning when you precisely match the tokenization used during training.</p> <p data-svelte-h="svelte-ffreiw">If you’re training a model from scratch, or fine-tuning a base language model for chat, on the other hand, | |
| you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different | |
| input formats. One popular choice is the <code>ChatML</code> format, and this is a good, flexible choice for many use-cases. | |
| It looks like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="language-xml">{%- for message in messages %} | |
| </span><span class="hljs-template-variable">{{<span class="hljs-name">-</span> <span class="hljs-string">'<|im_start|>'</span> + message['role'] + <span class="hljs-string">'\n'</span> + message['content'] + <span class="hljs-string">'<|im_end|>'</span> + <span class="hljs-string">'\n'</span> }}</span><span class="language-xml"> | |
| {%- endfor %}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cj1ql7">If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes | |
| handy support for <a href="#what-are-generation-prompts">generation prompts</a>, but note that it doesn’t add BOS or EOS tokens! | |
| If your model expects those, they won’t be added automatically by <code>apply_chat_template</code> - in other words, the | |
| text will be tokenized with <code>add_special_tokens=False</code>. This is to avoid potential conflicts between the template and | |
| the <code>add_special_tokens</code> logic. If your model expects special tokens, make sure to add them to the template!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.chat_template = <span class="hljs-string">"{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-soh9qu">This template wraps each message in <code><|im_start|></code> and <code><|im_end|></code> tokens, and simply writes the role as a string, which | |
| allows for flexibility in the roles you train with. The output looks like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|im_start|>system | |
| You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|> | |
| <|im_start|>user | |
| How are you?<|im_end|> | |
| <|im_start|>assistant | |
| I'm doing great!<|im_end|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-cacagz">The “user”, “system” and “assistant” roles are the standard for chat, and we recommend using them when it makes sense, | |
| particularly if you want your model to operate well with <a href="/docs/transformers/pr_33913/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>. However, you are not limited | |
| to these roles - templating is extremely flexible, and any string can be a role.</p> <h3 class="relative group"><a id="i-want-to-add-some-chat-templates-how-should-i-get-started" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-want-to-add-some-chat-templates-how-should-i-get-started"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I want to add some chat templates! How should I get started?</span></h3> <p data-svelte-h="svelte-b733c0">If you have any chat models, you should set their <code>tokenizer.chat_template</code> attribute and test it using | |
| <a href="/docs/transformers/pr_33913/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a>, then push the updated tokenizer to the Hub. This applies even if you’re | |
| not the model owner - if you’re using a model with an empty chat template, or one that’s still using the default class | |
| template, please open a <a href="https://huggingface.co/docs/hub/repositories-pull-requests-discussions" rel="nofollow">pull request</a> to the model repository so that this attribute can be set properly!</p> <p data-svelte-h="svelte-kn2i6o">Once the attribute is set, that’s it, you’re done! <code>tokenizer.apply_chat_template</code> will now work correctly for that | |
| model, which means it is also automatically supported in places like <code>TextGenerationPipeline</code>!</p> <p data-svelte-h="svelte-197jyne">By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of | |
| open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - | |
| it’s time to put an end to them!</p> <h2 class="relative group"><a id="advanced-template-writing-tips" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-template-writing-tips"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Template writing tips</span></h2> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-qy60pj">The easiest way to get started with writing Jinja templates is to take a look at some existing ones. You can use | |
| <code>print(tokenizer.chat_template)</code> for any chat model to see what template it’s using. In general, models that support tool use have | |
| much more complex templates than other models - so when you’re just getting started, they’re probably a bad example | |
| to learn from! You can also take a look at the | |
| <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#synopsis" rel="nofollow">Jinja documentation</a> for details | |
| of general Jinja formatting and syntax.</p></div> <p data-svelte-h="svelte-1eu5v04">Jinja templates in <code>transformers</code> are identical to Jinja templates elsewhere. The main thing to know is that | |
| the conversation history will be accessible inside your template as a variable called <code>messages</code>.<br> | |
| You will be able to access <code>messages</code> in your template just like you can in Python, which means you can loop over | |
| it with <code>{% for message in messages %}</code> or access individual messages with <code>{{ messages[0] }}</code>, for example.</p> <p data-svelte-h="svelte-10wmjwo">You can also use the following tips to write clean, efficient Jinja templates:</p> <h3 class="relative group"><a id="trimming-whitespace" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trimming-whitespace"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Trimming whitespace</span></h3> <p data-svelte-h="svelte-1ttgeg7">By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat | |
| templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing | |
| your templates like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="language-xml">{%- for message in messages %} | |
| </span><span class="hljs-template-variable">{{<span class="hljs-name">-</span> message['role'] + message['content'] }}</span><span class="language-xml"> | |
| {%- endfor %}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-qihux6">rather than like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%</span> <span class="hljs-name">for</span> message <span class="hljs-keyword">in</span> messages <span class="hljs-template-tag">%}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{ message[<span class="hljs-string">'role'</span>] + message[<span class="hljs-string">'content'</span>] }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endfor</span> <span class="hljs-template-tag">%}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pefrh0">Adding <code>-</code> will strip any whitespace that comes before the block. The second example looks innocent, but the newline | |
| and indentation may end up being included in the output, which is probably not what you want!</p> <h3 class="relative group"><a id="special-variables" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#special-variables"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Special variables</span></h3> <p data-svelte-h="svelte-1s7e55r">Inside your template, you will have access several special variables. The most important of these is <code>messages</code>, | |
| which contains the chat history as a list of message dicts. However, there are several others. Not every | |
| variable will be used in every template. The most common other variables are:</p> <ul data-svelte-h="svelte-1rdzqgp"><li><code>tools</code> contains a list of tools in JSON schema format. Will be <code>None</code> or undefined if no tools are passed.</li> <li><code>documents</code> contains a list of documents in the format <code>{"title": "Title", "contents": "Contents"}</code>, used for retrieval-augmented generation. Will be <code>None</code> or undefined if no documents are passed.</li> <li><code>add_generation_prompt</code> is a bool that is <code>True</code> if the user has requested a generation prompt, and <code>False</code> otherwise. If this is set, your template should add the header for an assistant message to the end of the conversation. If your model doesn’t have a specific header for assistant messages, you can ignore this flag.</li> <li><strong>Special tokens</strong> like <code>bos_token</code> and <code>eos_token</code>. These are extracted from <code>tokenizer.special_tokens_map</code>. The exact tokens available inside each template will differ depending on the parent tokenizer.</li></ul> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-cvlh0x">You can actually pass any <code>kwarg</code> to <code>apply_chat_template</code>, and it will be accessible inside the template as a variable. In general, | |
| we recommend trying to stick to the core variables above, as it will make your model harder to use if users have | |
| to write custom code to pass model-specific <code>kwargs</code>. However, we’re aware that this field moves quickly, so if you | |
| have a new use-case that doesn’t fit in the core API, feel free to use a new <code>kwarg</code> for it! If a new <code>kwarg</code> | |
| becomes common we may promote it into the core API and create a standard, documented format for it.</p></div> <h3 class="relative group"><a id="callable-functions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#callable-functions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Callable functions</span></h3> <p data-svelte-h="svelte-w29vry">There is also a short list of callable functions available to you inside your templates. These are:</p> <ul data-svelte-h="svelte-1fmzhdy"><li><code>raise_exception(msg)</code>: Raises a <code>TemplateException</code>. This is useful for debugging, and for telling users when they’re | |
| doing something that your template doesn’t support.</li> <li><code>strftime_now(format_str)</code>: Equivalent to <code>datetime.now().strftime(format_str)</code> in Python. This is used for getting | |
| the current date/time in a specific format, which is sometimes included in system messages.</li></ul> <h3 class="relative group"><a id="compatibility-with-non-python-jinja" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#compatibility-with-non-python-jinja"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Compatibility with non-Python Jinja</span></h3> <p data-svelte-h="svelte-jvvtjt">There are multiple implementations of Jinja in various languages. They generally have the same syntax, | |
| but a key difference is that when you’re writing a template in Python you can use Python methods, such as | |
| <code>.lower()</code> on strings or <code>.items()</code> on dicts. This will break if someone tries to use your template on a non-Python | |
| implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS | |
| and Rust are very popular.</p> <p data-svelte-h="svelte-f0ucf0">Don’t panic, though! There are a few easy changes you can make to your templates to ensure they’re compatible across | |
| all implementations of Jinja:</p> <ul data-svelte-h="svelte-doa6oc"><li>Replace Python methods with Jinja filters. These usually have the same name, for example <code>string.lower()</code> becomes | |
| <code>string|lower</code>, and <code>dict.items()</code> becomes <code>dict|items</code>. One notable change is that <code>string.strip()</code> becomes <code>string|trim</code>. | |
| See the <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters" rel="nofollow">list of built-in filters</a> | |
| in the Jinja documentation for more.</li> <li>Replace <code>True</code>, <code>False</code> and <code>None</code>, which are Python-specific, with <code>true</code>, <code>false</code> and <code>none</code>.</li> <li>Directly rendering a dict or list may give different results in other implementations (for example, string entries | |
| might change from single-quoted to double-quoted). Adding the <code>tojson</code> filter can help to ensure consistency here.</li></ul> <h3 class="relative group"><a id="writing-generation-prompts" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#writing-generation-prompts"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Writing generation prompts</span></h3> <p data-svelte-h="svelte-13g6lvo">We mentioned above that <code>add_generation_prompt</code> is a special variable that will be accessible inside your template, | |
| and is controlled by the user setting the <code>add_generation_prompt</code> flag. If your model expects a header for | |
| assistant messages, then your template must support adding the header when <code>add_generation_prompt</code> is set.</p> <p data-svelte-h="svelte-i5qalm">Here is an example of a template that formats messages ChatML-style, with generation prompt support:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{{- bos_token }} | |
| {%- for message in messages %} | |
| {{- '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }} | |
| {%- endfor %} | |
| {%- if add_generation_prompt %} | |
| {{- '<|im_start|>assistant\n' }} | |
| {%- endif %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-148o5ra">The exact content of the assistant header will depend on your specific model, but it should always be <strong>the string | |
| that represents the start of an assistant message</strong>, so that if the user applies your template with | |
| <code>add_generation_prompt=True</code> and then generates text, the model will write an assistant response. Also note that some | |
| models do not need a generation prompt, because assistant messages always begin immediately after user messages. | |
| This is particularly common for LLaMA and Mistral models, where assistant messages begin immediately after the <code>[/INST]</code> | |
| token that ends user messages. In these cases, the template can ignore the <code>add_generation_prompt</code> flag.</p> <p data-svelte-h="svelte-17es6d5">Generation prompts are important! If your model requires a generation prompt but it is not set in the template, then | |
| model generations will likely be severely degraded, or the model may display unusual behaviour like continuing | |
| the final user message!</p> <h3 class="relative group"><a id="writing-and-debugging-larger-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#writing-and-debugging-larger-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Writing and debugging larger templates</span></h3> <p data-svelte-h="svelte-180ap61">When this feature was introduced, most templates were quite small, the Jinja equivalent of a “one-liner” script. | |
| However, with new models and features like tool-use and RAG, some templates can be 100 lines long or more. When | |
| writing templates like these, it’s a good idea to write them in a separate file, using a text editor. You can easily | |
| extract a chat template to a file:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">open</span>(<span class="hljs-string">"template.jinja"</span>, <span class="hljs-string">"w"</span>).write(tokenizer.chat_template)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bv602">Or load the edited template back into the tokenizer:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.chat_template = <span class="hljs-built_in">open</span>(<span class="hljs-string">"template.jinja"</span>).read()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mq7g8k">As an added bonus, when you write a long, multi-line template in a separate file, line numbers in that file will | |
| exactly correspond to line numbers in template parsing or execution errors. This will make it much easier to | |
| identify the source of issues.</p> <h3 class="relative group"><a id="writing-templates-for-tools" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#writing-templates-for-tools"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Writing templates for tools</span></h3> <p data-svelte-h="svelte-kt3iym">Although chat templates do not enforce a specific API for tools (or for anything, really), we recommend | |
| template authors try to stick to a standard API where possible. The whole point of chat templates is to allow code | |
| to be transferable across models, so deviating from the standard tools API means users will have to write | |
| custom code to use tools with your model. Sometimes it’s unavoidable, but often with clever templating you can | |
| make the standard API work!</p> <p data-svelte-h="svelte-1nlwaki">Below, we’ll list the elements of the standard API, and give tips on writing templates that will work well with it.</p> <h4 class="relative group"><a id="tool-definitions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tool-definitions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tool definitions</span></h4> <p data-svelte-h="svelte-wrq3v1">Your template should expect that the variable <code>tools</code> will either be null (if no tools are passed), or is a list | |
| of JSON schema dicts. Our chat template methods allow users to pass tools as either JSON schema or Python functions, but when | |
| functions are passed, we automatically generate JSON schema and pass that to your template. As a result, the | |
| <code>tools</code> variable that your template receives will always be a list of JSON schema. Here is | |
| a sample tool JSON schema:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"function"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"function"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"multiply"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"A function that multiplies two numbers"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"parameters"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"object"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"properties"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"a"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"number"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"The first number to multiply"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"b"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"number"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"The second number to multiply"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"required"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">"a"</span><span class="hljs-punctuation">,</span> <span class="hljs-string">"b"</span><span class="hljs-punctuation">]</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1woymkp">And here is some example code for handling tools in your chat template. Remember, this is just an example for a | |
| specific format - your model will probably need different formatting!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{%- if tools %} | |
| {%- for tool in tools %} | |
| {{- '<tool>' + tool['function']['name'] + '\n' }} | |
| {%- for argument in tool['function']['parameters']['properties'] %} | |
| {{- argument + ': ' + tool['function']['parameters']['properties'][argument]['description'] + '\n' }} | |
| {%- endfor %} | |
| {{- '\n</tool>' }} | |
| {%- endif %} | |
| {%- endif %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vngoc7">The specific tokens and tool descriptions your template renders should of course be chosen to match the ones your model | |
| was trained with. There is no requirement that your <strong>model</strong> understands JSON schema input, only that your template can translate | |
| JSON schema into your model’s format. For example, <a href="https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024" rel="nofollow">Command-R</a> | |
| was trained with tools defined using Python function headers, but the Command-R tool template accepts JSON schema, | |
| converts types internally and renders the input tools as Python headers. You can do a lot with templates!</p> <h4 class="relative group"><a id="tool-calls" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tool-calls"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tool calls</span></h4> <p data-svelte-h="svelte-1x6y95z">Tool calls, if present, will be a list attached to a message with the “assistant” role. Note that <code>tool_calls</code> is | |
| always a list, even though most tool-calling models only support single tool calls at a time, which means | |
| the list will usually only have a single element. Here is a sample message dict containing a tool call:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"role"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"assistant"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"tool_calls"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span> | |
| <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"function"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"function"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"multiply"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"arguments"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"a"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">5</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"b"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">6</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">]</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-my9opo">And a common pattern for handling them would be something like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{%- if message['role'] == 'assistant' and 'tool_calls' in message %} | |
| {%- for tool_call in message['tool_calls'] %} | |
| {{- '<tool_call>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments']|tojson + '\n</tool_call>' }} | |
| {%- endif %} | |
| {%- endfor %} | |
| {%- endif %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hsoefm">Again, you should render the tool call with the formatting and special tokens that your model expects.</p> <h4 class="relative group"><a id="tool-responses" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tool-responses"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tool responses</span></h4> <p data-svelte-h="svelte-1hx88do">Tool responses have a simple format: They are a message dict with the “tool” role, a “name” key giving the name | |
| of the called function, and a “content” key containing the result of the tool call. Here is a sample tool response:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"role"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"tool"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"multiply"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"content"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"30"</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1okuuze">You don’t need to use all of the keys in the tool response. For example, if your model doesn’t expect the function | |
| name to be included in the tool response, then rendering it can be as simple as:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{%- if message['role'] == 'tool' %} | |
| {{- "<tool_result>" + message['content'] + "</tool_result>" }} | |
| {%- endif %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13n2q2c">Again, remember that the actual formatting and special tokens are model-specific - you should take a lot of care | |
| to ensure that tokens, whitespace and everything else exactly match the format your model was trained with!</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/en/chat_templating.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_z647wz = { | |
| assets: "/docs/transformers/pr_33913/en", | |
| base: "/docs/transformers/pr_33913/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers/pr_33913/en/_app/immutable/entry/start.b67f883f.js"), | |
| import("/docs/transformers/pr_33913/en/_app/immutable/entry/app.e436b1f2.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 12], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 188 kB
- Xet hash:
- 63372e79ac3ec7c9357161a4ef4975bbd4418f2f60bae90c9d7dc6c7feb4f0f7
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.