Buckets:

hf-doc-build/doc-dev / transformers /main /zh /chat_templating.html
rtrm's picture
download
raw
95.2 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;聊天模型的模板&quot;,&quot;local&quot;:&quot;聊天模型的模板&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;介绍&quot;,&quot;local&quot;:&quot;介绍&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;我如何使用聊天模板?&quot;,&quot;local&quot;:&quot;我如何使用聊天模板&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;有自动化的聊天 pipeline 吗?&quot;,&quot;local&quot;:&quot;有自动化的聊天-pipeline-吗&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;什么是”generation prompts”?&quot;,&quot;local&quot;:&quot;什么是generation-prompts&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;我可以在训练中使用聊天模板吗?&quot;,&quot;local&quot;:&quot;我可以在训练中使用聊天模板吗&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;高级:聊天模板是如何工作的?&quot;,&quot;local&quot;:&quot;高级聊天模板是如何工作的&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;高级:编辑聊天模板&quot;,&quot;local&quot;:&quot;高级编辑聊天模板&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;如何创建聊天模板?&quot;,&quot;local&quot;:&quot;如何创建聊天模板&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;“默认”模板是什么?&quot;,&quot;local&quot;:&quot;默认模板是什么&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;我应该使用哪个模板?&quot;,&quot;local&quot;:&quot;我应该使用哪个模板&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;如何添加聊天模板?&quot;,&quot;local&quot;:&quot;如何添加聊天模板&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;高级:模板写作技巧&quot;,&quot;local&quot;:&quot;高级模板写作技巧&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;For循环&quot;,&quot;local&quot;:&quot;for循环&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;If语句&quot;,&quot;local&quot;:&quot;if语句&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;特殊变量&quot;,&quot;local&quot;:&quot;特殊变量&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;空格的注意事项&quot;,&quot;local&quot;:&quot;空格的注意事项&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/zh/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/entry/start.a61b9c50.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/scheduler.9991993c.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/singletons.2822fe91.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/index.02cfeb18.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/paths.d66588b4.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/entry/app.99775688.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/index.7fc9a5e7.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/nodes/0.f4c5a5c1.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/nodes/6.a09c7a32.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/CodeBlock.e11cba92.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/EditOnGithub.84ab7f0e.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;聊天模型的模板&quot;,&quot;local&quot;:&quot;聊天模型的模板&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;介绍&quot;,&quot;local&quot;:&quot;介绍&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;我如何使用聊天模板?&quot;,&quot;local&quot;:&quot;我如何使用聊天模板&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;有自动化的聊天 pipeline 吗?&quot;,&quot;local&quot;:&quot;有自动化的聊天-pipeline-吗&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;什么是”generation prompts”?&quot;,&quot;local&quot;:&quot;什么是generation-prompts&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;我可以在训练中使用聊天模板吗?&quot;,&quot;local&quot;:&quot;我可以在训练中使用聊天模板吗&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;高级:聊天模板是如何工作的?&quot;,&quot;local&quot;:&quot;高级聊天模板是如何工作的&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;高级:编辑聊天模板&quot;,&quot;local&quot;:&quot;高级编辑聊天模板&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;如何创建聊天模板?&quot;,&quot;local&quot;:&quot;如何创建聊天模板&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;“默认”模板是什么?&quot;,&quot;local&quot;:&quot;默认模板是什么&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;我应该使用哪个模板?&quot;,&quot;local&quot;:&quot;我应该使用哪个模板&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;如何添加聊天模板?&quot;,&quot;local&quot;:&quot;如何添加聊天模板&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;高级:模板写作技巧&quot;,&quot;local&quot;:&quot;高级模板写作技巧&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;For循环&quot;,&quot;local&quot;:&quot;for循环&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;If语句&quot;,&quot;local&quot;:&quot;if语句&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;特殊变量&quot;,&quot;local&quot;:&quot;特殊变量&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;空格的注意事项&quot;,&quot;local&quot;:&quot;空格的注意事项&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="聊天模型的模板" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#聊天模型的模板"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>聊天模型的模板</span></h1> <h2 class="relative group"><a id="介绍" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#介绍"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>介绍</span></h2> <p data-svelte-h="svelte-12gr1zf">LLM 的一个常见应用场景是聊天。在聊天上下文中,不再是连续的文本字符串构成的语句(不同于标准的语言模型),
聊天模型由一条或多条消息组成的对话组成,每条消息都有一个“用户”或“助手”等 <strong>角色</strong>,还包括消息文本。</p> <p data-svelte-h="svelte-jqg83q"><code>Tokenizer</code>类似,不同的模型对聊天的输入格式要求也不同。这就是我们添加<strong>聊天模板</strong>作为一个功能的原因。
聊天模板是<code>Tokenizer</code>的一部分。用来把问答的对话内容转换为模型的输入<code>prompt</code></p> <p data-svelte-h="svelte-1hl9y7g">让我们通过一个快速的示例来具体说明,使用<code>BlenderBot</code>模型。
BlenderBot有一个非常简单的默认模板,主要是在对话轮之间添加空格:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;facebook/blenderbot-400M-distill&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>chat = [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Hello, how are you?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;m doing great. How can I help you today?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;d like to show off how chat templating works!&quot;</span>},
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.apply_chat_template(chat, tokenize=<span class="hljs-literal">False</span>)
<span class="hljs-string">&quot; Hello, how are you? I&#x27;m doing great. How can I help you today? I&#x27;d like to show off how chat templating works!&lt;/s&gt;&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-2gdq6c">注意,整个聊天对话内容被压缩成了一整个字符串。如果我们使用默认设置的<code>tokenize=True</code>,那么该字符串也将被tokenized处理。
不过,为了看到更复杂的模板实际运行,让我们使用<code>mistralai/Mistral-7B-Instruct-v0.1</code>模型。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;mistralai/Mistral-7B-Instruct-v0.1&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>chat = [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Hello, how are you?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;m doing great. How can I help you today?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;d like to show off how chat templating works!&quot;</span>},
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.apply_chat_template(chat, tokenize=<span class="hljs-literal">False</span>)
<span class="hljs-string">&quot;&lt;s&gt;[INST] Hello, how are you? [/INST]I&#x27;m doing great. How can I help you today?&lt;/s&gt; [INST] I&#x27;d like to show off how chat templating works! [/INST]&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1uu5txi">可以看到,这一次tokenizer已经添加了[INST]和[/INST]来表示用户消息的开始和结束。
Mistral-instruct是有使用这些token进行训练的,但BlenderBot没有。</p> <h2 class="relative group"><a id="我如何使用聊天模板" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#我如何使用聊天模板"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>我如何使用聊天模板?</span></h2> <p data-svelte-h="svelte-1x36djy">正如您在上面的示例中所看到的,聊天模板非常容易使用。只需构建一系列带有<code>role</code><code>content</code>键的消息,
然后将其传递给<a href="/docs/transformers/main/zh/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a>方法。
另外,在将聊天模板用作模型预测的输入时,还建议使用<code>add_generation_prompt=True</code>来添加<a href="#%E4%BB%80%E4%B9%88%E6%98%AFgeneration-prompts">generation prompt</a></p> <p data-svelte-h="svelte-18wsdqk">这是一个准备<code>model.generate()</code>的示例,使用<code>Zephyr</code>模型:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer
checkpoint = <span class="hljs-string">&quot;HuggingFaceH4/zephyr-7b-beta&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint) <span class="hljs-comment"># You may want to use bfloat16 and/or move to GPU here</span>
messages = [
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You are a friendly chatbot who always responds in the style of a pirate&quot;</span>,
},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;How many helicopters can a human eat in one sitting?&quot;</span>},
]
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">True</span>, add_generation_prompt=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-built_in">print</span>(tokenizer.decode(tokenized_chat[<span class="hljs-number">0</span>]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-szjh6z">这将生成Zephyr期望的输入格式的字符串。它看起来像这样:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->&lt;|system|&gt;
You are a friendly chatbot who always responds in the style of a pirate&lt;/s&gt;
&lt;|user|&gt;
How many helicopters can a human eat in one sitting?&lt;/s&gt;
&lt;|assistant|&gt;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1mnwvlc">现在我们已经按照<code>Zephyr</code>的要求传入prompt了,我们可以使用模型来生成对用户问题的回复:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->outputs = model.generate(tokenized_chat, max_new_tokens=<span class="hljs-number">128</span>)
<span class="hljs-built_in">print</span>(tokenizer.decode(outputs[<span class="hljs-number">0</span>]))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-z7s6bp">输出结果是:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->&lt;|system|&gt;
You are a friendly chatbot who always responds in the style of a pirate&lt;/s&gt;
&lt;|user|&gt;
How many helicopters can a human eat in one sitting?&lt;/s&gt;
&lt;|assistant|&gt;
Matey, I&#x27;m afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o&#x27; grog, a savory bowl o&#x27; stew, or a delicious loaf o&#x27; bread. But helicopters, they be for transportin&#x27; and movin&#x27; around, not for eatin&#x27;. So, I&#x27;d say none, me hearties. None at all.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6li582">啊,原来这么容易!</p> <h2 class="relative group"><a id="有自动化的聊天-pipeline-吗" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#有自动化的聊天-pipeline-吗"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>有自动化的聊天 pipeline 吗?</span></h2> <p data-svelte-h="svelte-1j82d6m">有的,<a href="/docs/transformers/main/zh/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>。这个<code>pipeline</code>的设计是为了方便使用聊天模型。让我们再试一次 Zephyr 的例子,但这次使用<code>pipeline</code></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
pipe = pipeline(<span class="hljs-string">&quot;text-generation&quot;</span>, <span class="hljs-string">&quot;HuggingFaceH4/zephyr-7b-beta&quot;</span>)
messages = [
{
<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;system&quot;</span>,
<span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;You are a friendly chatbot who always responds in the style of a pirate&quot;</span>,
},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;How many helicopters can a human eat in one sitting?&quot;</span>},
]
<span class="hljs-built_in">print</span>(pipe(messages, max_new_tokens=<span class="hljs-number">256</span>)[<span class="hljs-string">&#x27;generated_text&#x27;</span>][-<span class="hljs-number">1</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{&#x27;role&#x27;: &#x27;assistant&#x27;, &#x27;content&#x27;: &quot;Matey, I&#x27;m afraid I must inform ye that humans cannot eat helicopters. Helicopters are not food, they are flying machines. Food is meant to be eaten, like a hearty plate o&#x27; grog, a savory bowl o&#x27; stew, or a delicious loaf o&#x27; bread. But helicopters, they be for transportin&#x27; and movin&#x27; around, not for eatin&#x27;. So, I&#x27;d say none, me hearties. None at all.&quot;}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1tow0da"><a href="/docs/transformers/main/zh/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>将负责处理所有的<code>tokenized</code>并调用<code>apply_chat_template</code>,一旦模型有了聊天模板,您只需要初始化pipeline并传递消息列表!</p> <h2 class="relative group"><a id="什么是generation-prompts" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#什么是generation-prompts"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>什么是”generation prompts”?</span></h2> <p data-svelte-h="svelte-iu4par">您可能已经注意到<code>apply_chat_template</code>方法有一个<code>add_generation_prompt</code>参数。
这个参数告诉模板添加模型开始答复的标记。例如,考虑以下对话:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages = [
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Hi there!&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Nice to meet you!&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Can I ask a question?&quot;</span>}
]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-n9il4r">这是<code>add_generation_prompt=False</code>的结果,使用ChatML模板:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>)
<span class="hljs-string">&quot;&quot;&quot;&lt;|im_start|&gt;user
Hi there!&lt;|im_end|&gt;
&lt;|im_start|&gt;assistant
Nice to meet you!&lt;|im_end|&gt;
&lt;|im_start|&gt;user
Can I ask a question?&lt;|im_end|&gt;
&quot;&quot;&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-53qjqh">下面这是<code>add_generation_prompt=True</code>的结果:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.apply_chat_template(messages, tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">True</span>)
<span class="hljs-string">&quot;&quot;&quot;&lt;|im_start|&gt;user
Hi there!&lt;|im_end|&gt;
&lt;|im_start|&gt;assistant
Nice to meet you!&lt;|im_end|&gt;
&lt;|im_start|&gt;user
Can I ask a question?&lt;|im_end|&gt;
&lt;|im_start|&gt;assistant
&quot;&quot;&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-d7wbjj">这一次我们添加了模型开始答复的标记。这可以确保模型生成文本时只会给出答复,而不会做出意外的行为,比如继续用户的消息。
记住,聊天模型只是语言模型,它们被训练来继续文本,而聊天对它们来说只是一种特殊的文本!
你需要用适当的控制标记来引导它们,让它们知道自己应该做什么。</p> <p data-svelte-h="svelte-1w6qwu1">并非所有模型都需要生成提示。一些模型,如BlenderBot和LLaMA,在模型回复之前没有任何特殊标记。
在这些情况下,<code>add_generation_prompt</code>参数将不起作用。<code>add_generation_prompt</code>参数取决于你所使用的模板。</p> <h2 class="relative group"><a id="我可以在训练中使用聊天模板吗" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#我可以在训练中使用聊天模板吗"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>我可以在训练中使用聊天模板吗?</span></h2> <p data-svelte-h="svelte-b1lzbk">可以!我们建议您将聊天模板应用为数据集的预处理步骤。之后,您可以像进行任何其他语言模型训练任务一样继续。
在训练时,通常应该设置<code>add_generation_prompt=False</code>,因为添加的助手标记在训练过程中并不会有帮助。
让我们看一个例子:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset
tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;HuggingFaceH4/zephyr-7b-beta&quot;</span>)
chat1 = [
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Which is bigger, the moon or the sun?&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;The sun.&quot;</span>}
]
chat2 = [
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Which is bigger, a virus or a bacterium?&quot;</span>},
{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;A bacterium.&quot;</span>}
]
dataset = Dataset.from_dict({<span class="hljs-string">&quot;chat&quot;</span>: [chat1, chat2]})
dataset = dataset.<span class="hljs-built_in">map</span>(<span class="hljs-keyword">lambda</span> x: {<span class="hljs-string">&quot;formatted_chat&quot;</span>: tokenizer.apply_chat_template(x[<span class="hljs-string">&quot;chat&quot;</span>], tokenize=<span class="hljs-literal">False</span>, add_generation_prompt=<span class="hljs-literal">False</span>)})
<span class="hljs-built_in">print</span>(dataset[<span class="hljs-string">&#x27;formatted_chat&#x27;</span>][<span class="hljs-number">0</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1j2yk26">结果是:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->&lt;|user|&gt;
Which is bigger, the moon or the sun?&lt;/s&gt;
&lt;|assistant|&gt;
The sun.&lt;/s&gt;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-g2qoag">这样,后面你可以使用<code>formatted_chat</code>列,跟标准语言建模任务中一样训练即可。</p> <h2 class="relative group"><a id="高级聊天模板是如何工作的" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#高级聊天模板是如何工作的"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>高级:聊天模板是如何工作的?</span></h2> <p data-svelte-h="svelte-faviix">模型的聊天模板存储在<code>tokenizer.chat_template</code>属性上。如果没有设置,则将使用该模型的默认模板。
让我们来看看<code>BlenderBot</code>的模板:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;facebook/blenderbot-400M-distill&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.chat_template
<span class="hljs-string">&quot;{% for message in messages %}{% if message[&#x27;role&#x27;] == &#x27;user&#x27; %}{{ &#x27; &#x27; }}{% endif %}{{ message[&#x27;content&#x27;] }}{% if not loop.last %}{{ &#x27; &#x27; }}{% endif %}{% endfor %}{{ eos_token }}&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ur5734">这看着有点复杂。让我们添加一些换行和缩进,使其更易读。
请注意,默认情况下忽略每个块后的第一个换行以及块之前的任何前导空格,
使用Jinja的<code>trim_blocks</code><code>lstrip_blocks</code>标签。
这里,请注意空格的使用。我们强烈建议您仔细检查模板是否打印了多余的空格!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%</span> <span class="hljs-name">for</span> message <span class="hljs-keyword">in</span> messages <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">if</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;user&#x27;</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ <span class="hljs-string">&#x27; &#x27;</span> }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endif</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ message[<span class="hljs-string">&#x27;content&#x27;</span>] }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">if</span> not loop.last <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ <span class="hljs-string">&#x27; &#x27;</span> }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endif</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endfor</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ eos_token }}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1xg0oyk">如果你之前不了解<a href="https://jinja.palletsprojects.com/en/3.1.x/templates/" rel="nofollow">Jinja template</a>
Jinja是一种模板语言,允许你编写简单的代码来生成文本。
在许多方面,代码和语法类似于Python。在纯Python中,这个模板看起来会像这样:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">for</span> idx, message <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(messages):
<span class="hljs-keyword">if</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;user&#x27;</span>:
<span class="hljs-built_in">print</span>(<span class="hljs-string">&#x27; &#x27;</span>)
<span class="hljs-built_in">print</span>(message[<span class="hljs-string">&#x27;content&#x27;</span>])
<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> idx == <span class="hljs-built_in">len</span>(messages) - <span class="hljs-number">1</span>: <span class="hljs-comment"># Check for the last message in the conversation</span>
<span class="hljs-built_in">print</span>(<span class="hljs-string">&#x27; &#x27;</span>)
<span class="hljs-built_in">print</span>(eos_token)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-126w0pj">这里使用Jinja模板处理如下三步:</p> <ol data-svelte-h="svelte-1i1qfyx"><li>对于每条消息,如果消息是用户消息,则在其前面添加一个空格,否则不打印任何内容</li> <li>添加消息内容</li> <li>如果消息不是最后一条,请在其后添加两个空格。在最后一条消息之后,打印<code>EOS</code></li></ol> <p data-svelte-h="svelte-1f8rf5v">这是一个简单的模板,它不添加任何控制tokens,也不支持<code>system</code>消息(常用于指导模型在后续对话中如何表现)。
但 Jinja 给了你很大的灵活性来做这些事情!让我们看一个 Jinja 模板,
它可以实现类似于LLaMA的prompt输入(请注意,真正的LLaMA模板包括<code>system</code>消息,请不要在实际代码中使用这个简单模板!)</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%</span> <span class="hljs-name">for</span> message <span class="hljs-keyword">in</span> messages <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">if</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;user&#x27;</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ bos_token + <span class="hljs-string">&#x27;[INST] &#x27;</span> + message[<span class="hljs-string">&#x27;content&#x27;</span>] + <span class="hljs-string">&#x27; [/INST]&#x27;</span> }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">elif</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;system&#x27;</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ <span class="hljs-string">&#x27;&lt;&lt;SYS&gt;&gt;\\n&#x27;</span> + message[<span class="hljs-string">&#x27;content&#x27;</span>] + <span class="hljs-string">&#x27;\\n&lt;&lt;/SYS&gt;&gt;\\n\\n&#x27;</span> }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">elif</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;assistant&#x27;</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ <span class="hljs-string">&#x27; &#x27;</span> + message[<span class="hljs-string">&#x27;content&#x27;</span>] + <span class="hljs-string">&#x27; &#x27;</span> + eos_token }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endif</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endfor</span> <span class="hljs-template-tag">%}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16pwtdq">这里稍微看一下,就能明白这个模板的作用:它根据每条消息的“角色”添加对应的消息。
<code>user</code><code>assistant</code><code>system</code>的消息需要分别处理,因为它们代表不同的角色输入。</p> <h2 class="relative group"><a id="高级编辑聊天模板" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#高级编辑聊天模板"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>高级:编辑聊天模板</span></h2> <h3 class="relative group"><a id="如何创建聊天模板" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#如何创建聊天模板"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>如何创建聊天模板?</span></h3> <p data-svelte-h="svelte-qjomqd">很简单,你只需编写一个jinja模板并设置<code>tokenizer.chat_template</code>。你也可以从一个现有模板开始,只需要简单编辑便可以!
例如,我们可以采用上面的LLaMA模板,并在助手消息中添加”[ASST]“和”[/ASST]“:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%</span> <span class="hljs-name">for</span> message <span class="hljs-keyword">in</span> messages <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">if</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;user&#x27;</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ bos_token + <span class="hljs-string">&#x27;[INST] &#x27;</span> + message[<span class="hljs-string">&#x27;content&#x27;</span>].strip() + <span class="hljs-string">&#x27; [/INST]&#x27;</span> }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">elif</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;system&#x27;</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ <span class="hljs-string">&#x27;&lt;&lt;SYS&gt;&gt;\\n&#x27;</span> + message[<span class="hljs-string">&#x27;content&#x27;</span>].strip() + <span class="hljs-string">&#x27;\\n&lt;&lt;/SYS&gt;&gt;\\n\\n&#x27;</span> }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">elif</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;assistant&#x27;</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ <span class="hljs-string">&#x27;[ASST] &#x27;</span> + message[<span class="hljs-string">&#x27;content&#x27;</span>] + <span class="hljs-string">&#x27; [/ASST]&#x27;</span> + eos_token }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endif</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endfor</span> <span class="hljs-template-tag">%}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1d5yh10">现在,只需设置<code>tokenizer.chat_template</code>属性。下次使用<a href="/docs/transformers/main/zh/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a>时,它将使用您的新模板!
此属性将保存在<code>tokenizer_config.json</code>文件中,因此您可以使用<a href="/docs/transformers/main/zh/main_classes/model#transformers.utils.PushToHubMixin.push_to_hub">push_to_hub()</a>将新模板上传到 Hub,
这样每个人都可以使用你模型的模板!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->template = tokenizer.chat_template
template = template.replace(<span class="hljs-string">&quot;SYS&quot;</span>, <span class="hljs-string">&quot;SYSTEM&quot;</span>) <span class="hljs-comment"># Change the system token</span>
tokenizer.chat_template = template <span class="hljs-comment"># Set the new template</span>
tokenizer.push_to_hub(<span class="hljs-string">&quot;model_name&quot;</span>) <span class="hljs-comment"># Upload your new template to the Hub!</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-10id108">由于<a href="/docs/transformers/main/zh/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a>方法是由<a href="/docs/transformers/main/zh/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>类调用,
因此一旦你设置了聊天模板,您的模型将自动与<a href="/docs/transformers/main/zh/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>兼容。</p> <h3 class="relative group"><a id="默认模板是什么" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#默认模板是什么"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>“默认”模板是什么?</span></h3> <p data-svelte-h="svelte-p3sf7i">在引入聊天模板(chat_template)之前,聊天prompt是在模型中通过硬编码处理的。为了向前兼容,我们保留了这种硬编码处理聊天prompt的方法。
如果一个模型没有设置聊天模板,但其模型有默认模板,<code>TextGenerationPipeline</code>类和<code>apply_chat_template</code>等方法将使用该模型的聊天模板。
您可以通过检查<code>tokenizer.default_chat_template</code>属性来查找<code>tokenizer</code>的默认模板。</p> <p data-svelte-h="svelte-uaicwn">这是我们纯粹为了向前兼容性而做的事情,以避免破坏任何现有的工作流程。即使默认的聊天模板适用于您的模型,
我们强烈建议通过显式设置<code>chat_template</code>属性来覆盖默认模板,以便向用户清楚地表明您的模型已经正确的配置了聊天模板,
并且为了未来防范默认模板被修改或弃用的情况。</p> <h3 class="relative group"><a id="我应该使用哪个模板" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#我应该使用哪个模板"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>我应该使用哪个模板?</span></h3> <p data-svelte-h="svelte-1h7775y">在为已经训练过的聊天模型设置模板时,您应确保模板与模型在训练期间看到的消息格式完全匹配,否则可能会导致性能下降。
即使您继续对模型进行训练,也应保持聊天模板不变,这样可能会获得最佳性能。
这与<code>tokenization</code>非常类似,在推断时,你选用跟训练时一样的<code>tokenization</code>,通常会获得最佳性能。</p> <p data-svelte-h="svelte-1ewfwsy">如果您从头开始训练模型,或者在微调基础语言模型进行聊天时,您有很大的自由选择适当的模板!
LLMs足够聪明,可以学会处理许多不同的输入格式。我们为没有特定类别模板的模型提供一个默认模板,该模板遵循
<code>ChatML</code> format格式要求,对于许多用例来说,
这是一个很好的、灵活的选择。</p> <p data-svelte-h="svelte-1t3g01w">默认模板看起来像这样:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%</span> <span class="hljs-name">for</span> message <span class="hljs-keyword">in</span> messages <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{<span class="hljs-string">&#x27;&lt;|im_start|&gt;&#x27;</span> + message[<span class="hljs-string">&#x27;role&#x27;</span>] + <span class="hljs-string">&#x27;\n&#x27;</span> + message[<span class="hljs-string">&#x27;content&#x27;</span>] + <span class="hljs-string">&#x27;&lt;|im_end|&gt;&#x27;</span> + <span class="hljs-string">&#x27;\n&#x27;</span>}}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endfor</span> <span class="hljs-template-tag">%}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1t0o56l">如果您喜欢这个模板,下面是一行代码的模板形式,它可以直接复制到您的代码中。这一行代码还包括了[generation prompts](#什么是”generation prompts”?),
但请注意它不会添加<code>BOS</code><code>EOS</code>token。
如果您的模型需要这些token,它们不会被<code>apply_chat_template</code>自动添加,换句话说,文本的默认处理参数是<code>add_special_tokens=False</code>
这是为了避免模板和<code>add_special_tokens</code>逻辑产生冲突,如果您的模型需要特殊tokens,请确保将它们添加到模板中!</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="language-xml">tokenizer.chat_template = &quot;</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">if</span> not add_generation_prompt is defined <span class="hljs-template-tag">%}</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">set</span> add_generation_prompt = false <span class="hljs-template-tag">%}</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endif</span> <span class="hljs-template-tag">%}</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">for</span> message <span class="hljs-keyword">in</span> messages <span class="hljs-template-tag">%}</span><span class="hljs-template-variable">{{<span class="hljs-string">&#x27;&lt;|im_start|&gt;&#x27;</span> + message[<span class="hljs-string">&#x27;role&#x27;</span>] + <span class="hljs-string">&#x27;\n&#x27;</span> + message[<span class="hljs-string">&#x27;content&#x27;</span>] + <span class="hljs-string">&#x27;&lt;|im_end|&gt;&#x27;</span> + <span class="hljs-string">&#x27;\n&#x27;</span>}}</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endfor</span> <span class="hljs-template-tag">%}</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">if</span> add_generation_prompt <span class="hljs-template-tag">%}</span><span class="hljs-template-variable">{{ <span class="hljs-string">&#x27;&lt;|im_start|&gt;assistant\n&#x27;</span> }}</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endif</span> <span class="hljs-template-tag">%}</span><span class="language-xml">&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11rufda">该模板将每条消息包装在<code>&lt;|im_start|&gt;</code><code>&lt;|im_end|&gt;</code>tokens里面,并将角色简单地写为字符串,这样可以灵活地训练角色。输出如下:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->&lt;|im_start|&gt;system
You are a helpful chatbot that will do its best not to say anything so stupid that people tweet about it.&lt;|im_end|&gt;
&lt;|im_start|&gt;user
How are you?&lt;|im_end|&gt;
&lt;|im_start|&gt;assistant
I&#x27;m doing great!&lt;|im_end|&gt;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9rgz4t"><code>user</code><code>system</code><code>assistant</code>是对话助手模型的标准角色,如果您的模型要与<a href="/docs/transformers/main/zh/main_classes/pipelines#transformers.TextGenerationPipeline">TextGenerationPipeline</a>兼容,我们建议你使用这些角色。
但您可以不局限于这些角色,模板非常灵活,任何字符串都可以成为角色。</p> <h3 class="relative group"><a id="如何添加聊天模板" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#如何添加聊天模板"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>如何添加聊天模板?</span></h3> <p data-svelte-h="svelte-pgliis">如果您有任何聊天模型,您应该设置它们的<code>tokenizer.chat_template</code>属性,并使用<a href="/docs/transformers/main/zh/internal/tokenization_utils#transformers.PreTrainedTokenizerBase.apply_chat_template">apply_chat_template()</a>测试,
然后将更新后的<code>tokenizer</code>推送到 Hub。
即使您不是模型所有者,如果您正在使用一个空的聊天模板或者仍在使用默认的聊天模板,
请发起一个<a href="https://huggingface.co/docs/hub/repositories-pull-requests-discussions" rel="nofollow">pull request</a>,以便正确设置该属性!</p> <p data-svelte-h="svelte-13qjuka">一旦属性设置完成,就完成了!<code>tokenizer.apply_chat_template</code>现在将在该模型中正常工作,
这意味着它也会自动支持在诸如<code>TextGenerationPipeline</code>的地方!</p> <p data-svelte-h="svelte-1qxn4vk">通过确保模型具有这一属性,我们可以确保整个社区都能充分利用开源模型的全部功能。
格式不匹配已经困扰这个领域并悄悄地损害了性能太久了,是时候结束它们了!</p> <h2 class="relative group"><a id="高级模板写作技巧" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#高级模板写作技巧"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>高级:模板写作技巧</span></h2> <p data-svelte-h="svelte-t1396b">如果你对Jinja不熟悉,我们通常发现编写聊天模板的最简单方法是先编写一个简短的Python脚本,按照你想要的方式格式化消息,然后将该脚本转换为模板。</p> <p data-svelte-h="svelte-59uiu8">请记住,模板处理程序将接收对话历史作为名为<code>messages</code>的变量。每条<code>message</code>都是一个带有两个键<code>role</code><code>content</code>的字典。
您可以在模板中像在Python中一样访问<code>messages</code>,这意味着您可以使用<code>{% for message in messages %}</code>进行循环,
或者例如使用<code>{{ messages[0] }}</code>访问单个消息。</p> <p data-svelte-h="svelte-1aooy9k">您也可以使用以下提示将您的代码转换为Jinja:</p> <h3 class="relative group"><a id="for循环" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#for循环"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>For循环</span></h3> <p data-svelte-h="svelte-1qr6npj">在Jinja中,for循环看起来像这样:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{% <span class="hljs-name"><span class="hljs-name">for</span></span> message <span class="hljs-keyword">in</span> messages %}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ message[&#x27;content&#x27;] }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{% <span class="hljs-name"><span class="hljs-name">endfor</span></span> %}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1necxra">请注意,<code>{{ expression block }}</code>中的内容将被打印到输出。您可以在表达式块中使用像<code>+</code>这样的运算符来组合字符串。</p> <h3 class="relative group"><a id="if语句" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#if语句"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>If语句</span></h3> <p data-svelte-h="svelte-1ioywy7">Jinja中的if语句如下所示:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{%</span> <span class="hljs-name">if</span> message[<span class="hljs-string">&#x27;role&#x27;</span>] == <span class="hljs-string">&#x27;user&#x27;</span> <span class="hljs-template-tag">%}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ message[<span class="hljs-string">&#x27;content&#x27;</span>] }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{%</span> <span class="hljs-name">endif</span> <span class="hljs-template-tag">%}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6wil9x">注意Jinja使用<code>{% endfor %}</code><code>{% endif %}</code>来表示<code>for</code><code>if</code>的结束。</p> <h3 class="relative group"><a id="特殊变量" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#特殊变量"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>特殊变量</span></h3> <p data-svelte-h="svelte-y2yvc0">在您的模板中,您将可以访问<code>messages</code>列表,但您还可以访问其他几个特殊变量。
这些包括特殊<code>token</code>,如<code>bos_token</code><code>eos_token</code>,以及我们上面讨论过的<code>add_generation_prompt</code>变量。
您还可以使用<code>loop</code>变量来访问有关当前循环迭代的信息,例如使用<code>{% if loop.last %}</code>来检查当前消息是否是对话中的最后一条消息。</p> <p data-svelte-h="svelte-mdpqzu">以下是一个示例,如果<code>add_generation_prompt=True</code>需要在对话结束时添加<code>generate_prompt</code></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-template-tag">{% <span class="hljs-name"><span class="hljs-name">if</span></span> loop.last and add_generation_prompt %}</span><span class="language-xml">
</span><span class="hljs-template-variable">{{ bos_token + &#x27;Assistant:\n&#x27; }}</span><span class="language-xml">
</span><span class="hljs-template-tag">{% <span class="hljs-name"><span class="hljs-name">endif</span></span> %}</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="空格的注意事项" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#空格的注意事项"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>空格的注意事项</span></h3> <p data-svelte-h="svelte-u83arb">我们已经尽可能尝试让Jinja忽略除<code>{{ expressions }}</code>之外的空格。
然而,请注意Jinja是一个通用的模板引擎,它可能会将同一行文本块之间的空格视为重要,并将其打印到输出中。
我们<strong>强烈</strong>建议在上传模板之前检查一下,确保模板没有在不应该的地方打印额外的空格!</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/zh/chat_templating.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_173uja2 = {
assets: "/docs/transformers/main/zh",
base: "/docs/transformers/main/zh",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/zh/_app/immutable/entry/start.a61b9c50.js"),
import("/docs/transformers/main/zh/_app/immutable/entry/app.99775688.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 6],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
95.2 kB
·
Xet hash:
577173a79e8612d2e5c99ed5a9b2f3a71d0f52abb38f67bd953003b0628681fa

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.