Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Templates for Chat Models","local":"templates-for-chat-models","sections":[{"title":"Introduction","local":"introduction","sections":[],"depth":2},{"title":"How do I use chat templates?","local":"how-do-i-use-chat-templates","sections":[],"depth":2},{"title":"Is there an automated pipeline for chat?","local":"is-there-an-automated-pipeline-for-chat","sections":[],"depth":2},{"title":"What are “generation prompts”?","local":"what-are-generation-prompts","sections":[],"depth":2},{"title":"Can I use chat templates in training?","local":"can-i-use-chat-templates-in-training","sections":[],"depth":2},{"title":"Advanced: Extra inputs to chat templates","local":"advanced-extra-inputs-to-chat-templates","sections":[],"depth":2},{"title":"Advanced: Tool use / function calling","local":"advanced-tool-use--function-calling","sections":[{"title":"Passing tool results to the model","local":"passing-tool-results-to-the-model","sections":[],"depth":3},{"title":"A complete tool use example","local":"a-complete-tool-use-example","sections":[],"depth":3},{"title":"Understanding tool schemas","local":"understanding-tool-schemas","sections":[],"depth":3}],"depth":2},{"title":"Advanced: Retrieval-augmented generation","local":"advanced-retrieval-augmented-generation","sections":[],"depth":2},{"title":"Advanced: How do chat templates work?","local":"advanced-how-do-chat-templates-work","sections":[],"depth":2},{"title":"Advanced: Adding and editing chat templates","local":"advanced-adding-and-editing-chat-templates","sections":[{"title":"How do I create a chat template?","local":"how-do-i-create-a-chat-template","sections":[],"depth":3},{"title":"Why do some models have multiple templates?","local":"why-do-some-models-have-multiple-templates","sections":[],"depth":3},{"title":"What are “default” templates?","local":"what-are-default-templates","sections":[],"depth":3},{"title":"What template should I use?","local":"what-template-should-i-use","sections":[],"depth":3},{"title":"I want to add some chat templates! How should I get started?","local":"i-want-to-add-some-chat-templates-how-should-i-get-started","sections":[],"depth":3}],"depth":2},{"title":"Advanced: Template writing tips","local":"advanced-template-writing-tips","sections":[{"title":"Trimming whitespace","local":"trimming-whitespace","sections":[],"depth":3},{"title":"For loops","local":"for-loops","sections":[],"depth":3},{"title":"If statements","local":"if-statements","sections":[],"depth":3},{"title":"Special variables","local":"special-variables","sections":[],"depth":3},{"title":"Compatibility with non-Python Jinja","local":"compatibility-with-non-python-jinja","sections":[],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers/pr_30862/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/entry/start.4daeeae2.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/scheduler.25b97de1.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/singletons.3041786c.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/index.e188933d.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/paths.8203287c.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/entry/app.7c143034.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/index.d9030fc9.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/nodes/0.2f8b49cd.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/nodes/11.5eaaa30b.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/Tip.baa67368.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/CodeBlock.e6cd0d95.js"> | |
| <link rel="modulepreload" href="/docs/transformers/pr_30862/en/_app/immutable/chunks/EditOnGithub.91d95064.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Templates for Chat Models","local":"templates-for-chat-models","sections":[{"title":"Introduction","local":"introduction","sections":[],"depth":2},{"title":"How do I use chat templates?","local":"how-do-i-use-chat-templates","sections":[],"depth":2},{"title":"Is there an automated pipeline for chat?","local":"is-there-an-automated-pipeline-for-chat","sections":[],"depth":2},{"title":"What are “generation prompts”?","local":"what-are-generation-prompts","sections":[],"depth":2},{"title":"Can I use chat templates in training?","local":"can-i-use-chat-templates-in-training","sections":[],"depth":2},{"title":"Advanced: Extra inputs to chat templates","local":"advanced-extra-inputs-to-chat-templates","sections":[],"depth":2},{"title":"Advanced: Tool use / function calling","local":"advanced-tool-use--function-calling","sections":[{"title":"Passing tool results to the model","local":"passing-tool-results-to-the-model","sections":[],"depth":3},{"title":"A complete tool use example","local":"a-complete-tool-use-example","sections":[],"depth":3},{"title":"Understanding tool schemas","local":"understanding-tool-schemas","sections":[],"depth":3}],"depth":2},{"title":"Advanced: Retrieval-augmented generation","local":"advanced-retrieval-augmented-generation","sections":[],"depth":2},{"title":"Advanced: How do chat templates work?","local":"advanced-how-do-chat-templates-work","sections":[],"depth":2},{"title":"Advanced: Adding and editing chat templates","local":"advanced-adding-and-editing-chat-templates","sections":[{"title":"How do I create a chat template?","local":"how-do-i-create-a-chat-template","sections":[],"depth":3},{"title":"Why do some models have multiple templates?","local":"why-do-some-models-have-multiple-templates","sections":[],"depth":3},{"title":"What are “default” templates?","local":"what-are-default-templates","sections":[],"depth":3},{"title":"What template should I use?","local":"what-template-should-i-use","sections":[],"depth":3},{"title":"I want to add some chat templates! How should I get started?","local":"i-want-to-add-some-chat-templates-how-should-i-get-started","sections":[],"depth":3}],"depth":2},{"title":"Advanced: Template writing tips","local":"advanced-template-writing-tips","sections":[{"title":"Trimming whitespace","local":"trimming-whitespace","sections":[],"depth":3},{"title":"For loops","local":"for-loops","sections":[],"depth":3},{"title":"If statements","local":"if-statements","sections":[],"depth":3},{"title":"Special variables","local":"special-variables","sections":[],"depth":3},{"title":"Compatibility with non-Python Jinja","local":"compatibility-with-non-python-jinja","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="templates-for-chat-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#templates-for-chat-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Templates for Chat Models</span></h1> <h2 class="relative group"><a id="introduction" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#introduction"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Introduction</span></h2> <p data-svelte-h="svelte-ydi30o">An increasingly common use case for LLMs is <strong>chat</strong>. In a chat context, rather than continuing a single string | |
| of text (as is the case with a standard language model), the model instead continues a conversation that consists | |
| of one or more <strong>messages</strong>, each of which includes a <strong>role</strong>, like “user” or “assistant”, as well as message text.</p> <p data-svelte-h="svelte-1p8dq8">Much like tokenization, different models expect very different input formats for chat. This is the reason we added | |
| <strong>chat templates</strong> as a feature. Chat templates are part of the tokenizer. They specify how to convert conversations, | |
| represented as lists of messages, into a single tokenizable string in the format that the model expects.</p> <p data-svelte-h="svelte-1n6wf0k">Let’s make this concrete with a quick example using the <code>BlenderBot</code> model. BlenderBot has an extremely simple default | |
| template, which mostly just adds whitespace between rounds of dialogue:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"facebook/blenderbot-400M-distill"</span>) | |
| <span class="hljs-meta">>>> </span>chat = [ | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hello, how are you?"</span>}, | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'm doing great. How can I help you today?"</span>}, | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'd like to show off how chat templating works!"</span>}, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>tokenizer.apply_chat_template(chat, tokenize=<span class="hljs-literal">False</span>) | |
| <span class="hljs-string">" Hello, how are you? I'm doing great. How can I help you today? I'd like to show off how chat templating works!</s>"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1e3bvfs">Notice how the entire chat is condensed into a single string. If we use <code>tokenize=True</code>, which is the default setting, | |
| that string will also be tokenized for us. To see a more complex template in action, though, let’s use the | |
| <code>mistralai/Mistral-7B-Instruct-v0.1</code> model.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"mistralai/Mistral-7B-Instruct-v0.1"</span>) | |
| <span class="hljs-meta">>>> </span>chat = [ | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hello, how are you?"</span>}, | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'm doing great. How can I help you today?"</span>}, | |
| <span class="hljs-meta">... </span> {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'd like to show off how chat templating works!"</span>}, | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span>tokenizer.apply_chat_template(chat, tokenize=<span class="hljs-literal">False</span>) | |
| <span class="hljs-string">"<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-15k3bj3">Note that this time, the tokenizer has added the control tokens [INST] and [/INST] to indicate the start and end of | |
| user messages (but not assistant messages!). Mistral-instruct was trained with these tokens, but BlenderBot was not.</p> <h2 class="relative group"><a id="how-do-i-use-chat-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-do-i-use-chat-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How do I use chat templates?</span></h2> <p data-svelte-h="svelte-g5oriz">As you can see in the example above, chat templates are easy to use. Simply build a list of messages, with <code>role</code> | |
| and <code>content</code> keys, and then pass it to the <a href="/docs/transformers/pr_30862/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a> method. Once you do that, | |
| you’ll get output that’s ready to go! When using chat templates as input for model generation, it’s also a good idea | |
| to use <code>add_generation_prompt=True</code> to add a <a href="#what-are-generation-prompts">generation prompt</a>.</p> <p data-svelte-h="svelte-gmslqw">Here’s an example of preparing input for <code>model.generate()</code>, using the <code>Zephyr</code> assistant model:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| checkpoint = <span class="hljs-string">"HuggingFaceH4/zephyr-7b-beta"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
| model = AutoModelForCausalLM.from_pretrained(checkpoint) <span class="hljs-comment"># You may want to use bfloat16 and/or move to GPU here</span> | |
| messages = [ | |
| { | |
| <span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, | |
| <span class="hljs-string">"content"</span>: <span class="hljs-string">"You are a friendly chatbot who always responds in the style of a pirate"</span>, | |
| }, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"How many helicopters can a human eat in one sitting?"</span>}, | |
| ] | |
| tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">True</span>, add_generation_prompt=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(tokenized_chat[<span class="hljs-number">0</span>]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vy7akj">This will yield a string in the input format that Zephyr expects.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|system|> | |
| You are a friendly chatbot who always responds in the style of a pirate</s> | |
| <|user|> | |
| How many helicopters can a human eat in one sitting?</s> | |
| <|assistant|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hj60o5">Now that our input is formatted correctly for Zephyr, we can use the model to generate a response to the user’s question:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->outputs = model.generate(tokenized_chat, max_new_tokens=<span class="hljs-number">128</span>) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(outputs[<span class="hljs-number">0</span>]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bfcqd3">This will yield:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|system|> | |
| You are a friendly chatbot who always responds in the style of a pirate</s> | |
| <|user|> | |
| How many helicopters can a human eat in one sitting?</s> | |
| <|assistant|> | |
| Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-k9m2iy">Arr, ‘twas easy after all!</p> <h2 class="relative group"><a id="is-there-an-automated-pipeline-for-chat" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#is-there-an-automated-pipeline-for-chat"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Is there an automated pipeline for chat?</span></h2> <p data-svelte-h="svelte-thucuy">Yes, there is! Our text generation pipelines support chat inputs, which makes it easy to use chat models. In the past, | |
| we used to use a dedicated “ConversationalPipeline” class, but this has now been deprecated and its functionality | |
| has been merged into the <a href="/docs/transformers/pr_30862/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>. Let’s try the <code>Zephyr</code> example again, but this time using | |
| a pipeline:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| pipe = pipeline(<span class="hljs-string">"text-generation"</span>, <span class="hljs-string">"HuggingFaceH4/zephyr-7b-beta"</span>) | |
| messages = [ | |
| { | |
| <span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, | |
| <span class="hljs-string">"content"</span>: <span class="hljs-string">"You are a friendly chatbot who always responds in the style of a pirate"</span>, | |
| }, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"How many helicopters can a human eat in one sitting?"</span>}, | |
| ] | |
| <span class="hljs-built_in">print</span>(pipe(messages, max_new_tokens=<span class="hljs-number">128</span>)[<span class="hljs-number">0</span>][<span class="hljs-string">'generated_text'</span>][-<span class="hljs-number">1</span>]) <span class="hljs-comment"># Print the assistant's response</span><!-- HTML_TAG_END --></pre></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{'role': 'assistant', 'content': "Matey, I'm afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o' grog, a savory bowl o' stew, or a delicious loaf o' bread. But helicopters, they be for transportin' and movin' around, not for eatin'. So, I'd say none, me hearties. None at all."}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5umvde">The pipeline will take care of all the details of tokenization and calling <code>apply_chat_template</code> for you - | |
| once the model has a chat template, all you need to do is initialize the pipeline and pass it the list of messages!</p> <h2 class="relative group"><a id="what-are-generation-prompts" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-are-generation-prompts"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What are “generation prompts”?</span></h2> <p data-svelte-h="svelte-rhnu79">You may have noticed that the <code>apply_chat_template</code> method has an <code>add_generation_prompt</code> argument. This argument tells | |
| the template to add tokens that indicate the start of a bot response. For example, consider the following chat:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hi there!"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Nice to meet you!"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Can I ask a question?"</span>} | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1g5nifq">Here’s what this will look like without a generation prompt, using the ChatML template we saw in the Zephyr example:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>) | |
| <span class="hljs-string">"""<|im_start|>user | |
| Hi there!<|im_end|> | |
| <|im_start|>assistant | |
| Nice to meet you!<|im_end|> | |
| <|im_start|>user | |
| Can I ask a question?<|im_end|> | |
| """</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-61bp3d">And here’s what it looks like <strong>with</strong> a generation prompt:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">True</span>) | |
| <span class="hljs-string">"""<|im_start|>user | |
| Hi there!<|im_end|> | |
| <|im_start|>assistant | |
| Nice to meet you!<|im_end|> | |
| <|im_start|>user | |
| Can I ask a question?<|im_end|> | |
| <|im_start|>assistant | |
| """</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-inq88f">Note that this time, we’ve added the tokens that indicate the start of a bot response. This ensures that when the model | |
| generates text it will write a bot response instead of doing something unexpected, like continuing the user’s | |
| message. Remember, chat models are still just language models - they’re trained to continue text, and chat is just a | |
| special kind of text to them! You need to guide them with appropriate control tokens, so they know what they’re | |
| supposed to be doing.</p> <p data-svelte-h="svelte-76qw2e">Not all models require generation prompts. Some models, like BlenderBot and LLaMA, don’t have any | |
| special tokens before bot responses. In these cases, the <code>add_generation_prompt</code> argument will have no effect. The exact | |
| effect that <code>add_generation_prompt</code> has will depend on the template being used.</p> <h2 class="relative group"><a id="can-i-use-chat-templates-in-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#can-i-use-chat-templates-in-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Can I use chat templates in training?</span></h2> <p data-svelte-h="svelte-i3l9c1">Yes! This is a good way to ensure that the chat template matches the tokens the model sees during training. | |
| We recommend that you apply the chat template as a preprocessing step for your dataset. After this, you | |
| can simply continue like any other language model training task. When training, you should usually set | |
| <code>add_generation_prompt=False</code>, because the added tokens to prompt an assistant response will not be helpful during | |
| training. Let’s see an example:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"HuggingFaceH4/zephyr-7b-beta"</span>) | |
| chat1 = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Which is bigger, the moon or the sun?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"The sun."</span>} | |
| ] | |
| chat2 = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Which is bigger, a virus or a bacterium?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"A bacterium."</span>} | |
| ] | |
| dataset = Dataset.from_dict({<span class="hljs-string">"chat"</span>: [chat1, chat2]}) | |
| dataset = dataset.<span class="hljs-built_in">map</span>(<span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">"formatted_chat"</span>: tokenizer.apply_chat_template(x[<span class="hljs-string">"chat"</span>], tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>)}) | |
| <span class="hljs-built_in">print</span>(dataset[<span class="hljs-string">'formatted_chat'</span>][<span class="hljs-number">0</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13505nn">And we get:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|user|> | |
| Which is bigger, the moon or the sun?</s> | |
| <|assistant|> | |
| The sun.</s><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ziuqkt">From here, just continue training like you would with a standard language modelling task, using the <code>formatted_chat</code> column.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">If you format text with `apply_chat_template(tokenize=False)` and then tokenize it in a separate step, you should set the argument | |
| `add_special_tokens=False`. If you use `apply_chat_template(tokenize=True)`, you don't need to worry about this! | |
| <p data-svelte-h="svelte-tedoa1">By default, some tokenizers add special tokens like <code><bos></code> and <code><eos></code> to text they tokenize. Chat templates should | |
| always include all of the special tokens they need, and so adding extra special tokens with | |
| the default <code>add_special_tokens=True</code> can result in incorrect or duplicated special tokens, which will hurt model | |
| performance.</p></div> <h2 class="relative group"><a id="advanced-extra-inputs-to-chat-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-extra-inputs-to-chat-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Extra inputs to chat templates</span></h2> <p data-svelte-h="svelte-dd615e">The only argument that <code>apply_chat_template</code> requires is <code>messages</code>. However, you can pass any keyword | |
| argument to <code>apply_chat_template</code> and it will be accessible inside the template. This gives you a lot of freedom to use | |
| chat templates for many things. There are no restrictions on the names or the format of these arguments - you can pass | |
| strings, lists, dicts or whatever else you want.</p> <p data-svelte-h="svelte-dcun4m">That said, there are some common use-cases for these extra arguments, | |
| such as passing tools for function calling, or documents for retrieval-augmented generation. In these common cases, | |
| we have some opinionated recommendations about what the names and formats of these arguments should be, which are | |
| described in the sections below. We encourage model authors to make their chat templates compatible with this format, | |
| to make it easy to transfer tool-calling code between models.</p> <h2 class="relative group"><a id="advanced-tool-use--function-calling" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-tool-use--function-calling"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Tool use / function calling</span></h2> <p data-svelte-h="svelte-6sd0wq">“Tool use” LLMs can choose to call functions as external tools before generating an answer. When passing tools | |
| to a tool-use model, you can simply pass a list of functions to the <code>tools</code> argument:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> datetime | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">current_time</span>(): | |
| <span class="hljs-string">"""Get the current local time as a string."""</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-built_in">str</span>(datetime.now()) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">multiply</span>(<span class="hljs-params">a: <span class="hljs-built_in">float</span>, b: <span class="hljs-built_in">float</span></span>): | |
| <span class="hljs-string">""" | |
| A function that multiplies two numbers | |
| Args: | |
| a: The first number to multiply | |
| b: The second number to multiply | |
| """</span> | |
| <span class="hljs-keyword">return</span> a * b | |
| tools = [current_time, multiply] | |
| model_input = tokenizer.apply_chat_template( | |
| messages, | |
| tools=tools | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-608o9m">In order for this to work correctly, you should write your functions in the format above, so that they can be parsed | |
| correctly as tools. Specifically, you should follow these rules:</p> <ul data-svelte-h="svelte-n1b3zm"><li>The function should have a descriptive name</li> <li>Every argument must have a type hint</li> <li>The function must have a docstring in the standard Google style (in other words, an initial function description<br> | |
| followed by an <code>Args:</code> block that describes the arguments, unless the function does not have any arguments.</li> <li>Do not include types in the <code>Args:</code> block. In other words, write <code>a: The first number to multiply</code>, not | |
| <code>a (int): The first number to multiply</code>. Type hints should go in the function header instead.</li> <li>The function can have a return type and a <code>Returns:</code> block in the docstring. However, these are optional | |
| because most tool-use models ignore them.</li></ul> <h3 class="relative group"><a id="passing-tool-results-to-the-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#passing-tool-results-to-the-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Passing tool results to the model</span></h3> <p data-svelte-h="svelte-11962fa">The sample code above is enough to list the available tools for your model, but what happens if it wants to actually use | |
| one? If that happens, you should:</p> <ol data-svelte-h="svelte-1vd84s7"><li>Parse the model’s output to get the tool name(s) and arguments.</li> <li>Add the model’s tool call(s) to the conversation.</li> <li>Call the corresponding function(s) with those arguments.</li> <li>Add the result(s) to the conversation</li></ol> <h3 class="relative group"><a id="a-complete-tool-use-example" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a-complete-tool-use-example"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>A complete tool use example</span></h3> <p data-svelte-h="svelte-1oi0gsn">Let’s walk through a tool use example, step by step. For this example, we will use an 8B <code>Hermes-2-Pro</code> model, | |
| as it is one of the highest-performing tool-use models in its size category at the time of writing. If you have the | |
| memory, you can consider using a larger model instead like <a href="https://huggingface.co/CohereForAI/c4ai-command-r-v01" rel="nofollow">Command-R</a> | |
| or <a href="https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1" rel="nofollow">Mixtral-8x22B</a>, both of which also support tool use | |
| and offer even stronger performance.</p> <p data-svelte-h="svelte-o8n6v4">First, let’s load our model and tokenizer:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer | |
| checkpoint = <span class="hljs-string">"NousResearch/Hermes-2-Pro-Llama-3-8B"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(checkpoint, revision=<span class="hljs-string">"pr/13"</span>) | |
| model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map=<span class="hljs-string">"auto"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1q7358y">Next, let’s define a list of tools:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">get_current_temperature</span>(<span class="hljs-params">location: <span class="hljs-built_in">str</span>, unit: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-string">""" | |
| Get the current temperature at a location. | |
| Args: | |
| location: The location to get the temperature for, in the format "City, Country" | |
| unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"]) | |
| Returns: | |
| The current temperature at the specified location in the specified units, as a float. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">22.</span> <span class="hljs-comment"># A real function should probably actually get the temperature!</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">get_current_wind_speed</span>(<span class="hljs-params">location: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">float</span>: | |
| <span class="hljs-string">""" | |
| Get the current wind speed in km/h at a given location. | |
| Args: | |
| location: The location to get the temperature for, in the format "City, Country" | |
| Returns: | |
| The current wind speed at the given location in km/h, as a float. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-number">6.</span> <span class="hljs-comment"># A real function should probably actually get the wind speed!</span> | |
| tools = [get_current_temperature, get_current_wind_speed]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11hfyaa">Now, let’s set up a conversation for our bot:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"system"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"You are a bot that responds to weather queries. You should reply with the unit used in the queried location."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hey, what's the temperature in Paris right now?"</span>} | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1usrd3e">Now, let’s apply the chat template and generate a response:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->inputs = tokenizer.apply_chat_template(messages, chat_template=<span class="hljs-string">"tool_use"</span>, tools=tools, add_generation_prompt=<span class="hljs-literal">True</span>, return_dict=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| inputs = {k: v.to(model.device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> inputs.items()} | |
| out = model.generate(**inputs, max_new_tokens=<span class="hljs-number">128</span>) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(out[<span class="hljs-number">0</span>][<span class="hljs-built_in">len</span>(inputs[<span class="hljs-string">"input_ids"</span>][<span class="hljs-number">0</span>]):]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13505nn">And we get:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><tool_call> | |
| {"arguments": {"location": "Paris, France", "unit": "celsius"}, "name": "get_current_temperature"} | |
| </tool_call><|im_end|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-nxltbo">The model has called the function with valid arguments, in the format requested by the function docstring. It has | |
| inferred that we’re most likely referring to the Paris in France, and it remembered that, as the home of SI units, | |
| the temperature in France should certainly be displayed in Celsius.</p> <p data-svelte-h="svelte-1fjq90r">Let’s append the model’s tool call to the conversation. Note that we generate a random <code>tool_call_id</code> here. These IDs | |
| are not used by all models, but they allow models to issue multiple tool calls at once and keep track of which response | |
| corresponds to which call. You can generate them any way you like, but they should be unique within each chat.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tool_call_id = <span class="hljs-string">"vAHdf3"</span> <span class="hljs-comment"># Random ID, should be unique for each tool call</span> | |
| tool_call = {<span class="hljs-string">"name"</span>: <span class="hljs-string">"get_current_temperature"</span>, <span class="hljs-string">"arguments"</span>: {<span class="hljs-string">"location"</span>: <span class="hljs-string">"Paris, France"</span>, <span class="hljs-string">"unit"</span>: <span class="hljs-string">"celsius"</span>}} | |
| messages.append({<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"tool_calls"</span>: [{<span class="hljs-string">"id"</span>: tool_call_id, <span class="hljs-string">"type"</span>: <span class="hljs-string">"function"</span>, <span class="hljs-string">"function"</span>: tool_call}]})<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6h0phb">Now that we’ve added the tool call to the conversation, we can call the function and append the result to the | |
| conversation. Since we’re just using a dummy function for this example that always returns 22.0, we can just append | |
| that result directly. Again, note the <code>tool_call_id</code> - this should match the ID used in the tool call above.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages.append({<span class="hljs-string">"role"</span>: <span class="hljs-string">"tool"</span>, <span class="hljs-string">"tool_call_id"</span>: tool_call_id, <span class="hljs-string">"name"</span>: <span class="hljs-string">"get_current_temperature"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"22.0"</span>})<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qjybqz">Finally, let’s let the assistant read the function outputs and continue chatting with the user:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->inputs = tokenizer.apply_chat_template(messages, chat_template=<span class="hljs-string">"tool_use"</span>, tools=tools, add_generation_prompt=<span class="hljs-literal">True</span>, return_dict=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">"pt"</span>) | |
| inputs = {k: v.to(model.device) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> inputs.items()} | |
| out = model.generate(**inputs, max_new_tokens=<span class="hljs-number">128</span>) | |
| <span class="hljs-built_in">print</span>(tokenizer.decode(out[<span class="hljs-number">0</span>][<span class="hljs-built_in">len</span>(inputs[<span class="hljs-string">"input_ids"</span>][<span class="hljs-number">0</span>]):]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13505nn">And we get:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->The current temperature in Paris, France is 22.0 ° Celsius.<|im_end|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1evxmus">Although this was a simple demo with dummy tools and a single call, the same technique works with | |
| multiple real tools and longer conversations. This can be a powerful way to extend the capabilities of conversational | |
| agents with real-time information, computational tools like calculators, or access to large databases.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">Not all of the tool-calling features shown above are used by all models. Some use tool call IDs, others simply use the function name and | |
| match tool calls to results using the ordering, and there are several models that use neither and only issue one tool | |
| call at a time to avoid confusion. If you want your code to be compatible across as many models as possible, we | |
| recommend structuring your tools calls like we've shown here, and returning tool results in the order that | |
| they were issued by the model. The chat templates on each model should handle the rest.</div> <h3 class="relative group"><a id="understanding-tool-schemas" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#understanding-tool-schemas"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Understanding tool schemas</span></h3> <p data-svelte-h="svelte-pl4mbs">Each function you pass to the <code>tools</code> argument of <code>apply_chat_template</code> is converted into a | |
| <a href="https://json-schema.org/learn/getting-started-step-by-step" rel="nofollow">JSON schema</a>. These schemas | |
| are then passed to the model chat template. In other words, tool-use models do not see your functions directly, and they | |
| never see the actual code inside them. What they care about is the function <strong>definitions</strong> and the <strong>arguments</strong> they | |
| need to pass to them - they care about what the tools do and how to use them, not how they work! It is up to you | |
| to read their outputs, detect if they have requested to use a tool, pass their arguments to the tool function, and | |
| return the response in the chat.</p> <p data-svelte-h="svelte-37xmdz">Generating JSON schemas to pass to the template should be automatic and invisible as long as your functions | |
| follow the specification above, but if you encounter problems, or you simply want more control over the conversion, | |
| you can handle the conversion manually. Here is an example of a manual schema conversion.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers.utils <span class="hljs-keyword">import</span> get_json_schema | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">multiply</span>(<span class="hljs-params">a: <span class="hljs-built_in">float</span>, b: <span class="hljs-built_in">float</span></span>): | |
| <span class="hljs-string">""" | |
| A function that multiplies two numbers | |
| Args: | |
| a: The first number to multiply | |
| b: The second number to multiply | |
| """</span> | |
| <span class="hljs-keyword">return</span> a * b | |
| schema = get_json_schema(multiply) | |
| <span class="hljs-built_in">print</span>(schema)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bfcqd3">This will yield:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"function"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"function"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"name"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"multiply"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"A function that multiplies two numbers"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"parameters"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"object"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"properties"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"a"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"number"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"The first number to multiply"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"b"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"number"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"description"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"The second number to multiply"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"required"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">[</span><span class="hljs-string">"a"</span><span class="hljs-punctuation">,</span> <span class="hljs-string">"b"</span><span class="hljs-punctuation">]</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19t6fs5">If you wish, you can edit these schemas, or even write them from scratch yourself without using <code>get_json_schema</code> at | |
| all. JSON schemas can be passed directly to the <code>tools</code> argument of | |
| <code>apply_chat_template</code> - this gives you a lot of power to define precise schemas for more complex functions. Be careful, | |
| though - the more complex your schemas, the more likely the model is to get confused when dealing with them! We | |
| recommend simple function signatures where possible, keeping arguments (and especially complex, nested arguments) | |
| to a minimum.</p> <p data-svelte-h="svelte-1nlyrys">Here is an example of defining schemas by hand, and passing them directly to <code>apply_chat_template</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># A simple function that takes no arguments</span> | |
| current_time = { | |
| <span class="hljs-string">"type"</span>: <span class="hljs-string">"function"</span>, | |
| <span class="hljs-string">"function"</span>: { | |
| <span class="hljs-string">"name"</span>: <span class="hljs-string">"current_time"</span>, | |
| <span class="hljs-string">"description"</span>: <span class="hljs-string">"Get the current local time as a string."</span>, | |
| <span class="hljs-string">"parameters"</span>: { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'object'</span>, | |
| <span class="hljs-string">'properties'</span>: {} | |
| } | |
| } | |
| } | |
| <span class="hljs-comment"># A more complete function that takes two numerical arguments</span> | |
| multiply = { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'function'</span>, | |
| <span class="hljs-string">'function'</span>: { | |
| <span class="hljs-string">'name'</span>: <span class="hljs-string">'multiply'</span>, | |
| <span class="hljs-string">'description'</span>: <span class="hljs-string">'A function that multiplies two numbers'</span>, | |
| <span class="hljs-string">'parameters'</span>: { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'object'</span>, | |
| <span class="hljs-string">'properties'</span>: { | |
| <span class="hljs-string">'a'</span>: { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'number'</span>, | |
| <span class="hljs-string">'description'</span>: <span class="hljs-string">'The first number to multiply'</span> | |
| }, | |
| <span class="hljs-string">'b'</span>: { | |
| <span class="hljs-string">'type'</span>: <span class="hljs-string">'number'</span>, <span class="hljs-string">'description'</span>: <span class="hljs-string">'The second number to multiply'</span> | |
| } | |
| }, | |
| <span class="hljs-string">'required'</span>: [<span class="hljs-string">'a'</span>, <span class="hljs-string">'b'</span>] | |
| } | |
| } | |
| } | |
| model_input = tokenizer.apply_chat_template( | |
| messages, | |
| tools = [current_time, multiply] | |
| )<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="advanced-retrieval-augmented-generation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-retrieval-augmented-generation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Retrieval-augmented generation</span></h2> <p data-svelte-h="svelte-1977j4z">“Retrieval-augmented generation” or “RAG” LLMs can search a corpus of documents for information before responding | |
| to a query. This allows models to vastly expand their knowledge base beyond their limited context size. Our | |
| recommendation for RAG models is that their template | |
| should accept a <code>documents</code> argument. This should be a list of documents, where each “document” | |
| is a single dict with <code>title</code> and <code>contents</code> keys, both of which are strings. Because this format is much simpler | |
| than the JSON schemas used for tools, no helper functions are necessary.</p> <p data-svelte-h="svelte-1xmnzcc">Here’s an example of a RAG template in action:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->document1 = { | |
| <span class="hljs-string">"title"</span>: <span class="hljs-string">"The Moon: Our Age-Old Foe"</span>, | |
| <span class="hljs-string">"contents"</span>: <span class="hljs-string">"Man has always dreamed of destroying the moon. In this essay, I shall..."</span> | |
| } | |
| document2 = { | |
| <span class="hljs-string">"title"</span>: <span class="hljs-string">"The Sun: Our Age-Old Friend"</span>, | |
| <span class="hljs-string">"contents"</span>: <span class="hljs-string">"Although often underappreciated, the sun provides several notable benefits..."</span> | |
| } | |
| model_input = tokenizer.apply_chat_template( | |
| messages, | |
| documents=[document1, document2] | |
| )<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="advanced-how-do-chat-templates-work" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-how-do-chat-templates-work"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: How do chat templates work?</span></h2> <p data-svelte-h="svelte-9hx4bd">The chat template for a model is stored on the <code>tokenizer.chat_template</code> attribute. If no chat template is set, the | |
| default template for that model class is used instead. Let’s take a look at the template for <code>BlenderBot</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"facebook/blenderbot-400M-distill"</span>) | |
| <span class="hljs-meta">>>> </span>tokenizer.default_chat_template | |
| <span class="hljs-string">"{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1adekhh">That’s kind of intimidating. Let’s clean it up a little to make it more readable. In the process, though, we also make | |
| sure that the newlines and indentation we add don’t end up being included in the template output - see the tip on | |
| <a href="#trimming-whitespace">trimming whitespace</a> below!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">for</span></span> message <span class="hljs-keyword">in</span> messages %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">if</span></span> message['role'] == 'user' %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- ' ' }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">endif</span></span> %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- message['content'] }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">if</span></span> not loop.last %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- ' ' }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">endif</span></span> %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">endfor</span></span> %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- eos_token }}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zet1qo">If you’ve never seen one of these before, this is a <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/" rel="nofollow">Jinja template</a>. | |
| Jinja is a templating language that allows you to write simple code that generates text. In many ways, the code and | |
| syntax resembles Python. In pure Python, this template would look something like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">for</span> idx, message <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(messages): | |
| <span class="hljs-keyword">if</span> message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'user'</span>: | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">' '</span>) | |
| <span class="hljs-built_in">print</span>(message[<span class="hljs-string">'content'</span>]) | |
| <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> idx == <span class="hljs-built_in">len</span>(messages) - <span class="hljs-number">1</span>: <span class="hljs-comment"># Check for the last message in the conversation</span> | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">' '</span>) | |
| <span class="hljs-built_in">print</span>(eos_token)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9bdwn1">Effectively, the template does three things:</p> <ol data-svelte-h="svelte-yy2gop"><li>For each message, if the message is a user message, add a blank space before it, otherwise print nothing.</li> <li>Add the message content</li> <li>If the message is not the last message, add two spaces after it. After the final message, print the EOS token.</li></ol> <p data-svelte-h="svelte-jxu6rq">This is a pretty simple template - it doesn’t add any control tokens, and it doesn’t support “system” messages, which | |
| are a common way to give the model directives about how it should behave in the subsequent conversation. | |
| But Jinja gives you a lot of flexibility to do those things! Let’s see a Jinja template that can format inputs | |
| similarly to the way LLaMA formats them (note that the real LLaMA template includes handling for default system | |
| messages and slightly different system message handling in general - don’t use this one in your actual code!)</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{%- <span class="hljs-keyword">for</span> message <span class="hljs-keyword">in</span> messages %} | |
| {%- <span class="hljs-keyword">if</span> message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'user'</span> %} | |
| {{- bos_token + <span class="hljs-string">'[INST] '</span> + message[<span class="hljs-string">'content'</span>] + <span class="hljs-string">' [/INST]'</span> }} | |
| {%- elif message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'system'</span> %} | |
| {{- <span class="hljs-string">'<<SYS>>\\n'</span> + message[<span class="hljs-string">'content'</span>] + <span class="hljs-string">'\\n<</SYS>>\\n\\n'</span> }} | |
| {%- elif message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'assistant'</span> %} | |
| {{- <span class="hljs-string">' '</span> + message[<span class="hljs-string">'content'</span>] + <span class="hljs-string">' '</span> + eos_token }} | |
| {%- endif %} | |
| {%- endfor %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-dqaxjt">Hopefully if you stare at this for a little bit you can see what this template is doing - it adds specific tokens based | |
| on the “role” of each message, which represents who sent it. User, assistant and system messages are clearly | |
| distinguishable to the model because of the tokens they’re wrapped in.</p> <h2 class="relative group"><a id="advanced-adding-and-editing-chat-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-adding-and-editing-chat-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Adding and editing chat templates</span></h2> <h3 class="relative group"><a id="how-do-i-create-a-chat-template" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-do-i-create-a-chat-template"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How do I create a chat template?</span></h3> <p data-svelte-h="svelte-1ubxgh9">Simple, just write a jinja template and set <code>tokenizer.chat_template</code>. You may find it easier to start with an | |
| existing template from another model and simply edit it for your needs! For example, we could take the LLaMA template | |
| above and add ”[ASST]” and ”[/ASST]” to assistant messages:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{%- <span class="hljs-keyword">for</span> message <span class="hljs-keyword">in</span> messages %} | |
| {%- <span class="hljs-keyword">if</span> message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'user'</span> %} | |
| {{- bos_token + <span class="hljs-string">'[INST] '</span> + message[<span class="hljs-string">'content'</span>].<span class="hljs-keyword">strip</span>() + <span class="hljs-string">' [/INST]'</span> }} | |
| {%- elif message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'system'</span> %} | |
| {{- <span class="hljs-string">'<<SYS>>\\n'</span> + message[<span class="hljs-string">'content'</span>].<span class="hljs-keyword">strip</span>() + <span class="hljs-string">'\\n<</SYS>>\\n\\n'</span> }} | |
| {%- elif message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'assistant'</span> %} | |
| {{- <span class="hljs-string">'[ASST] '</span> + message[<span class="hljs-string">'content'</span>] + <span class="hljs-string">' [/ASST]'</span> + eos_token }} | |
| {%- endif %} | |
| {%- endfor %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-jqsxu8">Now, simply set the <code>tokenizer.chat_template</code> attribute. Next time you use <a href="/docs/transformers/pr_30862/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a>, it will | |
| use your new template! This attribute will be saved in the <code>tokenizer_config.json</code> file, so you can use | |
| <a href="/docs/transformers/pr_30862/en/main_classes/model#transformers.utils.PushToHubMixin.push_to_hub">push_to_hub()</a> to upload your new template to the Hub and make sure everyone’s using the right | |
| template for your model!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->template = tokenizer.chat_template | |
| template = template.replace(<span class="hljs-string">"SYS"</span>, <span class="hljs-string">"SYSTEM"</span>) <span class="hljs-comment"># Change the system token</span> | |
| tokenizer.chat_template = template <span class="hljs-comment"># Set the new template</span> | |
| tokenizer.push_to_hub(<span class="hljs-string">"model_name"</span>) <span class="hljs-comment"># Upload your new template to the Hub!</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-kqqxkz">The method <a href="/docs/transformers/pr_30862/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a> which uses your chat template is called by the <a href="/docs/transformers/pr_30862/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a> class, so | |
| once you set the correct chat template, your model will automatically become compatible with <a href="/docs/transformers/pr_30862/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">If you're fine-tuning a model for chat, in addition to setting a chat template, you should probably add any new chat | |
| control tokens as special tokens in the tokenizer. Special tokens are never split, | |
| ensuring that your control tokens are always handled as single tokens rather than being tokenized in pieces. You | |
| should also set the tokenizer's `eos_token` attribute to the token that marks the end of assistant generations in your | |
| template. This will ensure that text generation tools can correctly figure out when to stop generating text.</div> <h3 class="relative group"><a id="why-do-some-models-have-multiple-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#why-do-some-models-have-multiple-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Why do some models have multiple templates?</span></h3> <p data-svelte-h="svelte-1d7cql4">Some models use different templates for different use cases. For example, they might use one template for normal chat | |
| and another for tool-use, or retrieval-augmented generation. In these cases, <code>tokenizer.chat_template</code> is a dictionary. | |
| This can cause some confusion, and where possible, we recommend using a single template for all use-cases. You can use | |
| Jinja statements like <code>if tools is defined</code> and <code>{% macro %}</code> definitions to easily wrap multiple code paths in a | |
| single template.</p> <p data-svelte-h="svelte-1u88h1j">When a tokenizer has multiple templates, <code>tokenizer.chat_template</code> will be a <code>dict</code>, where each key is the name | |
| of a template. The <code>apply_chat_template</code> method has special handling for certain template names: Specifically, it will | |
| look for a template named <code>default</code> in most cases, and will raise an error if it can’t find one. However, if a template | |
| named <code>tool_use</code> exists when the user has passed a <code>tools</code> argument, it will use that instead. To access templates | |
| with other names, pass the name of the template you want to the <code>chat_template</code> argument of | |
| <code>apply_chat_template()</code>.</p> <p data-svelte-h="svelte-1g7ri12">We find that this can be a bit confusing for users, though - so if you’re writing a template yourself, we recommend | |
| trying to put it all in a single template where possible!</p> <h3 class="relative group"><a id="what-are-default-templates" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-are-default-templates"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What are “default” templates?</span></h3> <p data-svelte-h="svelte-wev7i6">Before the introduction of chat templates, chat handling was hardcoded at the model class level. For backwards | |
| compatibility, we have retained this class-specific handling as default templates, also set at the class level. If a | |
| model does not have a chat template set, but there is a default template for its model class, the <code>TextGenerationPipeline</code> | |
| class and methods like <code>apply_chat_template</code> will use the class template instead. You can find out what the default | |
| template for your tokenizer is by checking the <code>tokenizer.default_chat_template</code> attribute.</p> <p data-svelte-h="svelte-1o0lzen">This is something we do purely for backward compatibility reasons, to avoid breaking any existing workflows. Even when | |
| the class template is appropriate for your model, we strongly recommend overriding the default template by | |
| setting the <code>chat_template</code> attribute explicitly to make it clear to users that your model has been correctly configured | |
| for chat.</p> <p data-svelte-h="svelte-1o7n9tk">Now that actual chat templates have been adopted more widely, default templates have been deprecated and will be | |
| removed in a future release. We strongly recommend setting the <code>chat_template</code> attribute for any tokenizers that | |
| still depend on them!</p> <h3 class="relative group"><a id="what-template-should-i-use" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-template-should-i-use"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What template should I use?</span></h3> <p data-svelte-h="svelte-5u6sqi">When setting the template for a model that’s already been trained for chat, you should ensure that the template | |
| exactly matches the message formatting that the model saw during training, or else you will probably experience | |
| performance degradation. This is true even if you’re training the model further - you will probably get the best | |
| performance if you keep the chat tokens constant. This is very analogous to tokenization - you generally get the | |
| best performance for inference or fine-tuning when you precisely match the tokenization used during training.</p> <p data-svelte-h="svelte-ffreiw">If you’re training a model from scratch, or fine-tuning a base language model for chat, on the other hand, | |
| you have a lot of freedom to choose an appropriate template! LLMs are smart enough to learn to handle lots of different | |
| input formats. One popular choice is the <code>ChatML</code> format, and this is a good, flexible choice for many use-cases. | |
| It looks like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="language-xml">{%- for message in messages %} | |
| </span><span class="hljs-template-variable">{{<span class="hljs-name">-</span> <span class="hljs-string">'<|im_start|>'</span> + message['role'] + <span class="hljs-string">'\n'</span> + message['content'] + <span class="hljs-string">'<|im_end|>'</span> + <span class="hljs-string">'\n'</span> }}</span><span class="language-xml"> | |
| {%- endfor %}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cj1ql7">If you like this one, here it is in one-liner form, ready to copy into your code. The one-liner also includes | |
| handy support for <a href="#what-are-generation-prompts">generation prompts</a>, but note that it doesn’t add BOS or EOS tokens! | |
| If your model expects those, they won’t be added automatically by <code>apply_chat_template</code> - in other words, the | |
| text will be tokenized with <code>add_special_tokens=False</code>. This is to avoid potential conflicts between the template and | |
| the <code>add_special_tokens</code> logic. If your model expects special tokens, make sure to add them to the template!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.chat_template = <span class="hljs-string">"{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-soh9qu">This template wraps each message in <code><|im_start|></code> and <code><|im_end|></code> tokens, and simply writes the role as a string, which | |
| allows for flexibility in the roles you train with. The output looks like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|im_start|>system | |
| You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.<|im_end|> | |
| <|im_start|>user | |
| How are you?<|im_end|> | |
| <|im_start|>assistant | |
| I'm doing great!<|im_end|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ohvduz">The “user”, “system” and “assistant” roles are the standard for chat, and we recommend using them when it makes sense, | |
| particularly if you want your model to operate well with <a href="/docs/transformers/pr_30862/en/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>. However, you are not limited | |
| to these roles - templating is extremely flexible, and any string can be a role.</p> <h3 class="relative group"><a id="i-want-to-add-some-chat-templates-how-should-i-get-started" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#i-want-to-add-some-chat-templates-how-should-i-get-started"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>I want to add some chat templates! How should I get started?</span></h3> <p data-svelte-h="svelte-1t2huxs">If you have any chat models, you should set their <code>tokenizer.chat_template</code> attribute and test it using | |
| <a href="/docs/transformers/pr_30862/en/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a>, then push the updated tokenizer to the Hub. This applies even if you’re | |
| not the model owner - if you’re using a model with an empty chat template, or one that’s still using the default class | |
| template, please open a <a href="https://huggingface.co/docs/hub/repositories-pull-requests-discussions" rel="nofollow">pull request</a> to the model repository so that this attribute can be set properly!</p> <p data-svelte-h="svelte-kn2i6o">Once the attribute is set, that’s it, you’re done! <code>tokenizer.apply_chat_template</code> will now work correctly for that | |
| model, which means it is also automatically supported in places like <code>TextGenerationPipeline</code>!</p> <p data-svelte-h="svelte-197jyne">By ensuring that models have this attribute, we can make sure that the whole community gets to use the full power of | |
| open-source models. Formatting mismatches have been haunting the field and silently harming performance for too long - | |
| it’s time to put an end to them!</p> <h2 class="relative group"><a id="advanced-template-writing-tips" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#advanced-template-writing-tips"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Advanced: Template writing tips</span></h2> <p data-svelte-h="svelte-64od1c">If you’re unfamiliar with Jinja, we generally find that the easiest way to write a chat template is to first | |
| write a short Python script that formats messages the way you want, and then convert that script into a template.</p> <p data-svelte-h="svelte-1wubuld">Remember that the template handler will receive the conversation history as a variable called <code>messages</code>.<br> | |
| You will be able to access <code>messages</code> in your template just like you can in Python, which means you can loop over | |
| it with <code>{% for message in messages %}</code> or access individual messages with <code>{{ messages[0] }}</code>, for example.</p> <p data-svelte-h="svelte-1hxm0m6">You can also use the following tips to convert your code to Jinja:</p> <h3 class="relative group"><a id="trimming-whitespace" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trimming-whitespace"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Trimming whitespace</span></h3> <p data-svelte-h="svelte-1ttgeg7">By default, Jinja will print any whitespace that comes before or after a block. This can be a problem for chat | |
| templates, which generally want to be very precise with whitespace! To avoid this, we strongly recommend writing | |
| your templates like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="language-xml">{%- for message in messages %} | |
| </span><span class="hljs-template-variable">{{<span class="hljs-name">-</span> message['role'] + message['content'] }}</span><span class="language-xml"> | |
| {%- endfor %}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-qihux6">rather than like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%</span> <span class="hljs-name">for</span> message <span class="hljs-keyword">in</span> messages <span class="hljs-template-tag">%}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{ message[<span class="hljs-string">'role'</span>] + message[<span class="hljs-string">'content'</span>] }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endfor</span> <span class="hljs-template-tag">%}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pefrh0">Adding <code>-</code> will strip any whitespace that comes before the block. The second example looks innocent, but the newline | |
| and indentation may end up being included in the output, which is probably not what you want!</p> <h3 class="relative group"><a id="for-loops" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#for-loops"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>For loops</span></h3> <p data-svelte-h="svelte-1mepej5">For loops in Jinja look like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">for</span></span> message <span class="hljs-keyword">in</span> messages %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- message['content'] }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">endfor</span></span> %}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-rw0syz">Note that whatever’s inside the {{ expression block }} will be printed to the output. You can use operators like | |
| <code>+</code> to combine strings inside expression blocks.</p> <h3 class="relative group"><a id="if-statements" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#if-statements"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>If statements</span></h3> <p data-svelte-h="svelte-audzqy">If statements in Jinja look like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{%- <span class="hljs-keyword">if</span> message[<span class="hljs-string">'role'</span>] == <span class="hljs-string">'user'</span> %} | |
| {{- message[<span class="hljs-string">'content'</span>] }} | |
| {%- endif %}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gc7ich">Note how where Python uses whitespace to mark the beginnings and ends of <code>for</code> and <code>if</code> blocks, Jinja requires you | |
| to explicitly end them with <code>{% endfor %}</code> and <code>{% endif %}</code>.</p> <h3 class="relative group"><a id="special-variables" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#special-variables"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Special variables</span></h3> <p data-svelte-h="svelte-1ky9l6m">Inside your template, you will have access to the list of <code>messages</code>, but you can also access several other special | |
| variables. These include special tokens like <code>bos_token</code> and <code>eos_token</code>, as well as the <code>add_generation_prompt</code> | |
| variable that we discussed above. You can also use the <code>loop</code> variable to access information about the current loop | |
| iteration, for example using <code>{% if loop.last %}</code> to check if the current message is the last message in the | |
| conversation. Here’s an example that puts these ideas together to add a generation prompt at the end of the | |
| conversation if add_generation_prompt is <code>True</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">if</span></span> loop.last and add_generation_prompt %}</span><span class="language-xml"> | |
| </span><span class="hljs-template-variable">{{- bos_token + 'Assistant:\n' }}</span><span class="language-xml"> | |
| </span><span class="hljs-template-tag">{%- <span class="hljs-name"><span class="hljs-name">endif</span></span> %}</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="compatibility-with-non-python-jinja" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#compatibility-with-non-python-jinja"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Compatibility with non-Python Jinja</span></h3> <p data-svelte-h="svelte-jvvtjt">There are multiple implementations of Jinja in various languages. They generally have the same syntax, | |
| but a key difference is that when you’re writing a template in Python you can use Python methods, such as | |
| <code>.lower()</code> on strings or <code>.items()</code> on dicts. This will break if someone tries to use your template on a non-Python | |
| implementation of Jinja. Non-Python implementations are particularly common in deployment environments, where JS | |
| and Rust are very popular.</p> <p data-svelte-h="svelte-f0ucf0">Don’t panic, though! There are a few easy changes you can make to your templates to ensure they’re compatible across | |
| all implementations of Jinja:</p> <ul data-svelte-h="svelte-doa6oc"><li>Replace Python methods with Jinja filters. These usually have the same name, for example <code>string.lower()</code> becomes | |
| <code>string|lower</code>, and <code>dict.items()</code> becomes <code>dict|items</code>. One notable change is that <code>string.strip()</code> becomes <code>string|trim</code>. | |
| See the <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters" rel="nofollow">list of built-in filters</a> | |
| in the Jinja documentation for more.</li> <li>Replace <code>True</code>, <code>False</code> and <code>None</code>, which are Python-specific, with <code>true</code>, <code>false</code> and <code>none</code>.</li> <li>Directly rendering a dict or list may give different results in other implementations (for example, string entries | |
| might change from single-quoted to double-quoted). Adding the <code>tojson</code> filter can help to ensure consistency here.</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/en/chat_templating.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_qdzrj2 = { | |
| assets: "/docs/transformers/pr_30862/en", | |
| base: "/docs/transformers/pr_30862/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers/pr_30862/en/_app/immutable/entry/start.4daeeae2.js"), | |
| import("/docs/transformers/pr_30862/en/_app/immutable/entry/app.7c143034.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 11], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 154 kB
- Xet hash:
- 7bae7eab751effd4c463bac56855990dc8a61d3216bb9841e1556a3f29a24577
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.