Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Dataset formats and types","local":"dataset-formats-and-types","sections":[{"title":"Overview of the dataset formats and types","local":"overview-of-the-dataset-formats-and-types","sections":[{"title":"Formats","local":"formats","sections":[{"title":"Standard","local":"standard","sections":[],"depth":4},{"title":"Conversational","local":"conversational","sections":[],"depth":4},{"title":"Tool Calling","local":"tool-calling","sections":[],"depth":4}],"depth":3},{"title":"Harmony","local":"harmony","sections":[],"depth":3},{"title":"Types","local":"types","sections":[{"title":"Language modeling","local":"language-modeling","sections":[],"depth":4},{"title":"Prompt-only","local":"prompt-only","sections":[],"depth":4},{"title":"Prompt-completion","local":"prompt-completion","sections":[],"depth":4},{"title":"Preference","local":"preference","sections":[],"depth":4},{"title":"Unpaired preference","local":"unpaired-preference","sections":[],"depth":4},{"title":"Stepwise supervision","local":"stepwise-supervision","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Which dataset type to use?","local":"which-dataset-type-to-use","sections":[],"depth":2},{"title":"Using any dataset with TRL: preprocessing and conversion","local":"using-any-dataset-with-trl-preprocessing-and-conversion","sections":[{"title":"Example: UltraFeedback dataset","local":"example-ultrafeedback-dataset","sections":[],"depth":3}],"depth":2},{"title":"Utilities for converting dataset types","local":"utilities-for-converting-dataset-types","sections":[{"title":"From prompt-completion to language modeling dataset","local":"from-prompt-completion-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From prompt-completion to prompt-only dataset","local":"from-prompt-completion-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From preference with implicit prompt to language modeling dataset","local":"from-preference-with-implicit-prompt-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From preference with implicit prompt to prompt-completion dataset","local":"from-preference-with-implicit-prompt-to-prompt-completion-dataset","sections":[],"depth":3},{"title":"From preference with implicit prompt to prompt-only dataset","local":"from-preference-with-implicit-prompt-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From implicit to explicit prompt preference dataset","local":"from-implicit-to-explicit-prompt-preference-dataset","sections":[],"depth":3},{"title":"From preference with implicit prompt to unpaired preference dataset","local":"from-preference-with-implicit-prompt-to-unpaired-preference-dataset","sections":[],"depth":3},{"title":"From preference to language modeling dataset","local":"from-preference-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From preference to prompt-completion dataset","local":"from-preference-to-prompt-completion-dataset","sections":[],"depth":3},{"title":"From preference to prompt-only dataset","local":"from-preference-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From explicit to implicit prompt preference dataset","local":"from-explicit-to-implicit-prompt-preference-dataset","sections":[],"depth":3},{"title":"From preference to unpaired preference dataset","local":"from-preference-to-unpaired-preference-dataset","sections":[],"depth":3},{"title":"From unpaired preference to language modeling dataset","local":"from-unpaired-preference-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From unpaired preference to prompt-completion dataset","local":"from-unpaired-preference-to-prompt-completion-dataset","sections":[],"depth":3},{"title":"From unpaired preference to prompt-only dataset","local":"from-unpaired-preference-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From stepwise supervision to language modeling dataset","local":"from-stepwise-supervision-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From stepwise supervision to prompt-completion dataset","local":"from-stepwise-supervision-to-prompt-completion-dataset","sections":[],"depth":3},{"title":"From stepwise supervision to prompt-only dataset","local":"from-stepwise-supervision-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From stepwise supervision to unpaired preference dataset","local":"from-stepwise-supervision-to-unpaired-preference-dataset","sections":[],"depth":3}],"depth":2},{"title":"Vision datasets","local":"vision-datasets","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/trl/pr_5607/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/entry/start.151d81bd.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/scheduler.7b731bd4.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/singletons.2cf51804.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/index.ac28c20f.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/paths.ba01f37d.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/entry/app.3d9a91c0.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/preload-helper.e1689b3a.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/index.cc268345.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/nodes/0.cd288160.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/nodes/12.b0e3ac71.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.f0d99f98.js"> | |
| <link rel="modulepreload" href="/docs/trl/pr_5607/en/_app/immutable/chunks/CodeBlock.169a125f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Dataset formats and types","local":"dataset-formats-and-types","sections":[{"title":"Overview of the dataset formats and types","local":"overview-of-the-dataset-formats-and-types","sections":[{"title":"Formats","local":"formats","sections":[{"title":"Standard","local":"standard","sections":[],"depth":4},{"title":"Conversational","local":"conversational","sections":[],"depth":4},{"title":"Tool Calling","local":"tool-calling","sections":[],"depth":4}],"depth":3},{"title":"Harmony","local":"harmony","sections":[],"depth":3},{"title":"Types","local":"types","sections":[{"title":"Language modeling","local":"language-modeling","sections":[],"depth":4},{"title":"Prompt-only","local":"prompt-only","sections":[],"depth":4},{"title":"Prompt-completion","local":"prompt-completion","sections":[],"depth":4},{"title":"Preference","local":"preference","sections":[],"depth":4},{"title":"Unpaired preference","local":"unpaired-preference","sections":[],"depth":4},{"title":"Stepwise supervision","local":"stepwise-supervision","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Which dataset type to use?","local":"which-dataset-type-to-use","sections":[],"depth":2},{"title":"Using any dataset with TRL: preprocessing and conversion","local":"using-any-dataset-with-trl-preprocessing-and-conversion","sections":[{"title":"Example: UltraFeedback dataset","local":"example-ultrafeedback-dataset","sections":[],"depth":3}],"depth":2},{"title":"Utilities for converting dataset types","local":"utilities-for-converting-dataset-types","sections":[{"title":"From prompt-completion to language modeling dataset","local":"from-prompt-completion-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From prompt-completion to prompt-only dataset","local":"from-prompt-completion-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From preference with implicit prompt to language modeling dataset","local":"from-preference-with-implicit-prompt-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From preference with implicit prompt to prompt-completion dataset","local":"from-preference-with-implicit-prompt-to-prompt-completion-dataset","sections":[],"depth":3},{"title":"From preference with implicit prompt to prompt-only dataset","local":"from-preference-with-implicit-prompt-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From implicit to explicit prompt preference dataset","local":"from-implicit-to-explicit-prompt-preference-dataset","sections":[],"depth":3},{"title":"From preference with implicit prompt to unpaired preference dataset","local":"from-preference-with-implicit-prompt-to-unpaired-preference-dataset","sections":[],"depth":3},{"title":"From preference to language modeling dataset","local":"from-preference-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From preference to prompt-completion dataset","local":"from-preference-to-prompt-completion-dataset","sections":[],"depth":3},{"title":"From preference to prompt-only dataset","local":"from-preference-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From explicit to implicit prompt preference dataset","local":"from-explicit-to-implicit-prompt-preference-dataset","sections":[],"depth":3},{"title":"From preference to unpaired preference dataset","local":"from-preference-to-unpaired-preference-dataset","sections":[],"depth":3},{"title":"From unpaired preference to language modeling dataset","local":"from-unpaired-preference-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From unpaired preference to prompt-completion dataset","local":"from-unpaired-preference-to-prompt-completion-dataset","sections":[],"depth":3},{"title":"From unpaired preference to prompt-only dataset","local":"from-unpaired-preference-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From stepwise supervision to language modeling dataset","local":"from-stepwise-supervision-to-language-modeling-dataset","sections":[],"depth":3},{"title":"From stepwise supervision to prompt-completion dataset","local":"from-stepwise-supervision-to-prompt-completion-dataset","sections":[],"depth":3},{"title":"From stepwise supervision to prompt-only dataset","local":"from-stepwise-supervision-to-prompt-only-dataset","sections":[],"depth":3},{"title":"From stepwise supervision to unpaired preference dataset","local":"from-stepwise-supervision-to-unpaired-preference-dataset","sections":[],"depth":3}],"depth":2},{"title":"Vision datasets","local":"vision-datasets","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="dataset-formats-and-types" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dataset-formats-and-types"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dataset formats and types</span></h1> <p data-svelte-h="svelte-1976wsu">This guide provides an overview of the dataset formats and types supported by each trainer in TRL.</p> <h2 class="relative group"><a id="overview-of-the-dataset-formats-and-types" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#overview-of-the-dataset-formats-and-types"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Overview of the dataset formats and types</span></h2> <ul data-svelte-h="svelte-fvfcsu"><li>The <em>format</em> of a dataset refers to how the data is structured, typically categorized as either <em>standard</em> or <em>conversational</em>.</li> <li>The <em>type</em> is associated with the specific task the dataset is designed for, such as <em>prompt-only</em> or <em>preference</em>. Each type is characterized by its columns, which vary according to the task, as shown in the table.</li></ul> <table data-svelte-h="svelte-10kjmc8"><tbody><tr><th>Type \ Format</th> <th>Standard</th> <th>Conversational</th></tr> <tr><td>Language modeling</td> <td><pre><code>{"text": "The sky is blue."}</code></pre></td> <td><pre><code>{"messages": [{"role": "user", "content": "What color is the sky?"}, | |
| {"role": "assistant", "content": "It is blue."}]}</code></pre></td></tr> <tr><td>Prompt-only</td> <td><pre><code>{"prompt": "The sky is"}</code></pre></td> <td><pre><code>{"prompt": [{"role": "user", "content": "What color is the sky?"}]}</code></pre></td></tr> <tr><td>Prompt-completion</td> <td><pre><code>{"prompt": "The sky is", | |
| "completion": " blue."}</code></pre></td> <td><pre><code>{"prompt": [{"role": "user", "content": "What color is the sky?"}], | |
| "completion": [{"role": "assistant", "content": "It is blue."}]}</code></pre></td></tr> <tr><td>Preference</td> <td><pre><code>{"prompt": "The sky is", | |
| "chosen": " blue.", | |
| "rejected": " green."}</code></pre> | |
| or, with implicit prompt: | |
| <pre><code>{"chosen": "The sky is blue.", | |
| "rejected": "The sky is green."}</code></pre></td> <td><pre><code>{"prompt": [{"role": "user", "content": "What color is the sky?"}], | |
| "chosen": [{"role": "assistant", "content": "It is blue."}], | |
| "rejected": [{"role": "assistant", "content": "It is green."}]}</code></pre> | |
| or, with implicit prompt: | |
| <pre><code>{"chosen": [{"role": "user", "content": "What color is the sky?"}, | |
| {"role": "assistant", "content": "It is blue."}], | |
| "rejected": [{"role": "user", "content": "What color is the sky?"}, | |
| {"role": "assistant", "content": "It is green."}]}</code></pre></td></tr> <tr><td>Unpaired preference</td> <td><pre><code>{"prompt": "The sky is", | |
| "completion": " blue.", | |
| "label": True}</code></pre></td> <td><pre><code>{"prompt": [{"role": "user", "content": "What color is the sky?"}], | |
| "completion": [{"role": "assistant", "content": "It is green."}], | |
| "label": False}</code></pre></td></tr> <tr><td>Stepwise supervision</td> <td><pre><code>{"prompt": "Which number is larger, 9.8 or 9.11?", | |
| "completions": ["The fractional part of 9.8 is 0.8.", | |
| "The fractional part of 9.11 is 0.11.", | |
| "0.11 is greater than 0.8.", | |
| "Hence, 9.11 > 9.8."], | |
| "labels": [True, True, False, False]}</code></pre></td> <td></td></tr></tbody></table> <h3 class="relative group"><a id="formats" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#formats"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Formats</span></h3> <h4 class="relative group"><a id="standard" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#standard"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Standard</span></h4> <p data-svelte-h="svelte-n22pbc">The standard dataset format typically consists of plain text strings. The columns in the dataset vary depending on the task. This is the format expected by TRL trainers. Below are examples of standard dataset formats for different tasks:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Language modeling</span> | |
| language_modeling_example = {<span class="hljs-string">"text"</span>: <span class="hljs-string">"The sky is blue."</span>} | |
| <span class="hljs-comment"># Preference</span> | |
| preference_example = {<span class="hljs-string">"prompt"</span>: <span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"chosen"</span>: <span class="hljs-string">" blue."</span>, <span class="hljs-string">"rejected"</span>: <span class="hljs-string">" green."</span>} | |
| <span class="hljs-comment"># Unpaired preference</span> | |
| unpaired_preference_example = {<span class="hljs-string">"prompt"</span>: <span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"completion"</span>: <span class="hljs-string">" blue."</span>, <span class="hljs-string">"label"</span>: <span class="hljs-literal">True</span>}<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="conversational" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#conversational"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Conversational</span></h4> <p data-svelte-h="svelte-ftn6so">Conversational datasets are used for tasks involving dialogues or chat interactions between users and assistants. Unlike standard dataset formats, these contain sequences of messages where each message has a <code>role</code> (e.g., <code>"user"</code> or <code>"assistant"</code>) and <code>content</code> (the message text).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hello, how are you?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'm doing great. How can I help you today?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'd like to show off how chat templating works!"</span>}, | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1n2btf9">Just like standard datasets, the columns in conversational datasets vary depending on the task. Below are examples of conversational dataset formats for different tasks:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Prompt-completion</span> | |
| prompt_completion_example = {<span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}], | |
| <span class="hljs-string">"completion"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}]} | |
| <span class="hljs-comment"># Preference</span> | |
| preference_example = { | |
| <span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}], | |
| <span class="hljs-string">"chosen"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| <span class="hljs-string">"rejected"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}], | |
| }<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="tool-calling" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tool-calling"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tool Calling</span></h4> <p data-svelte-h="svelte-14xxjkt">Some chat templates support <em>tool calling</em>, which allows the model to interact with external functions—referred to as <strong>tools</strong>—during generation. This extends the conversational capabilities of the model by enabling it to output a <code>"tool_calls"</code> field instead of a standard <code>"content"</code> message whenever it decides to invoke a tool.</p> <p data-svelte-h="svelte-ni0rj5">After the assistant initiates a tool call, the tool executes and returns its output. The assistant can then process this output and continue the conversation accordingly.</p> <p data-svelte-h="svelte-ahhjs">Here’s a simple example of a tool-calling interaction:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Turn on the living room lights."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"tool_calls"</span>: [ | |
| {<span class="hljs-string">"type"</span>: <span class="hljs-string">"function"</span>, <span class="hljs-string">"function"</span>: { | |
| <span class="hljs-string">"name"</span>: <span class="hljs-string">"control_light"</span>, | |
| <span class="hljs-string">"arguments"</span>: {<span class="hljs-string">"room"</span>: <span class="hljs-string">"living room"</span>, <span class="hljs-string">"state"</span>: <span class="hljs-string">"on"</span>} | |
| }}] | |
| }, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"tool"</span>, <span class="hljs-string">"name"</span>: <span class="hljs-string">"control_light"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"The lights in the living room are now on."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Done!"</span>} | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ivsv5q">When preparing datasets for Supervised Fine-Tuning (SFT) with tool calling, it is important that your dataset includes an additional column named <code>tools</code>. This column contains the list of available tools for the model, which is usually used by the chat template to construct the system prompt.</p> <p data-svelte-h="svelte-1wc60el">The tools must be specified in a codified JSON schema format. You can automatically generate this schema from Python function signatures using the <code>get_json_schema</code> utility:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> json | |
| <span class="hljs-keyword">from</span> transformers.utils <span class="hljs-keyword">import</span> get_json_schema | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">control_light</span>(<span class="hljs-params">room: <span class="hljs-built_in">str</span>, state: <span class="hljs-built_in">str</span></span>) -> <span class="hljs-built_in">str</span>: | |
| <span class="hljs-string">""" | |
| Controls the lights in a room. | |
| Args: | |
| room: The name of the room. | |
| state: The desired state of the light ("on" or "off"). | |
| Returns: | |
| str: A message indicating the new state of the lights. | |
| """</span> | |
| <span class="hljs-keyword">return</span> <span class="hljs-string">f"The lights in <span class="hljs-subst">{room}</span> are now <span class="hljs-subst">{state}</span>."</span> | |
| <span class="hljs-comment"># Generate JSON schema</span> | |
| json_schema = get_json_schema(control_light)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1e3dkbk">The generated schema would look like:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">"type"</span>: <span class="hljs-string">"function"</span>, <span class="hljs-string">"function"</span>: {<span class="hljs-string">"name"</span>: <span class="hljs-string">"control_light"</span>, <span class="hljs-string">"description"</span>: <span class="hljs-string">"Controls the lights in a room."</span>, <span class="hljs-string">"parameters"</span>: {<span class="hljs-string">"type"</span>: <span class="hljs-string">"object"</span>, <span class="hljs-string">"properties"</span>: {<span class="hljs-string">"room"</span>: {<span class="hljs-string">"type"</span>: <span class="hljs-string">"string"</span>, <span class="hljs-string">"description"</span>: <span class="hljs-string">"The name of the room."</span>}, <span class="hljs-string">"state"</span>: {<span class="hljs-string">"type"</span>: <span class="hljs-string">"string"</span>, <span class="hljs-string">"description"</span>: <span class="hljs-string">"The desired state of the light (\"on\" or \"off\")."</span>}}, <span class="hljs-string">"required"</span>: [<span class="hljs-string">"room"</span>, <span class="hljs-string">"state"</span>]}, <span class="hljs-string">"return"</span>: {<span class="hljs-string">"type"</span>: <span class="hljs-string">"string"</span>, <span class="hljs-string">"description"</span>: <span class="hljs-string">"str: A message indicating the new state of the lights."</span>}}}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rdwc3r">A complete dataset entry for SFT might look like:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">"messages"</span>: messages, <span class="hljs-string">"tools"</span>: [json_schema]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1axxesh">To get a <code>Dataset</code> you need to use the <code>Json()</code> type for tool arguments since they are arbitrary JSON objects, and not dictionaries with fixed fields and types:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| data = [ | |
| {<span class="hljs-string">"messages"</span>: messages1, <span class="hljs-string">"tools"</span>: [json_schema1]}, | |
| {<span class="hljs-string">"messages"</span>: messages2, <span class="hljs-string">"tools"</span>: [json_schema2]}, | |
| ] | |
| <span class="hljs-comment"># auto-apply the Json() type</span> | |
| dataset = Dataset.from_list(data, on_mixed_types=<span class="hljs-string">"use_json"</span>) | |
| <span class="hljs-comment"># or specify the features manually</span> | |
| <span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Features, Json, <span class="hljs-type">List</span>, Value | |
| features = Features( | |
| { | |
| <span class="hljs-string">"messages"</span>: <span class="hljs-type">List</span>({<span class="hljs-string">"role"</span>: Value(<span class="hljs-string">"string"</span>), <span class="hljs-string">"content"</span>: Value(<span class="hljs-string">"string"</span>), <span class="hljs-string">"tool_calls"</span>: <span class="hljs-type">List</span>(Json())}), | |
| <span class="hljs-string">"tools"</span>: <span class="hljs-type">List</span>(Json()), | |
| } | |
| ) | |
| dataset = Dataset.from_list(data, features=features)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-n0cefj">On older versions of <code>datasets</code> (<4.7.0) that don’t have the <code>Json()</code> type, you should store <code>tools</code> as a JSON <code>str</code> (with <code>json.dumps([...])</code>):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->dataset = Dataset.from_list( | |
| [{<span class="hljs-string">"messages"</span>: messages1, <span class="hljs-string">"tools"</span>: json.dumps([json_schema1])}, | |
| {<span class="hljs-string">"messages"</span>: messages2, <span class="hljs-string">"tools"</span>: json.dumps([json_schema2])}] | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-158aotm">For more detailed information on tool calling, refer to the <a href="https://huggingface.co/docs/transformers/chat_extras#tools-and-rag" rel="nofollow">Tool Calling section in the <code>transformers</code> documentation</a> and the blog post <a href="https://huggingface.co/blog/unified-tool-use" rel="nofollow">Tool Use, Unified</a>.</p> <h3 class="relative group"><a id="harmony" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#harmony"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Harmony</span></h3> <p data-svelte-h="svelte-1ab456i">The <a href="https://cookbook.openai.com/articles/openai-harmony" rel="nofollow">Harmony response format</a> was introduced with the <a href="https://huggingface.co/collections/openai/gpt-oss-68911959590a1634ba11c7a4" rel="nofollow">OpenAI GPT OSS models</a>. It extends the conversational format by adding richer structure for reasoning, function calls, and metadata about the model’s behavior. Key features include:</p> <ul data-svelte-h="svelte-36l94o"><li><p><strong>Developer role</strong> – Provides high level instructions (similar to a system prompt) and lists available tools.</p></li> <li><p><strong>Channels</strong> – Separate types of assistant output into distinct streams:</p> <ul><li><code>analysis</code> – for internal reasoning, from the key <code>"thinking"</code></li> <li><code>final</code> – for the user-facing answer, from the key <code>"content"</code></li> <li><code>commentary</code> – for tool calls or meta notes</li></ul></li> <li><p><strong>Reasoning effort</strong> – Signals how much thinking the model should show (e.g., <code>"low"</code>, <code>"medium"</code>, <code>"high"</code>).</p></li> <li><p><strong>Model identity</strong> – Explicitly defines the assistant’s persona.</p></li></ul> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"openai/gpt-oss-20b"</span>) | |
| messages = [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"developer"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Use a friendly tone."</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What is the meaning of life?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"thinking"</span>: <span class="hljs-string">"Deep reflection..."</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"The final answer is..."</span>}, | |
| ] | |
| <span class="hljs-built_in">print</span>( | |
| tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=<span class="hljs-literal">False</span>, | |
| reasoning_effort=<span class="hljs-string">"low"</span>, | |
| model_identity=<span class="hljs-string">"You are HuggingGPT, a large language model trained by Hugging Face."</span>, | |
| ) | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5e8pi3">This produces:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><|start|>system<|message|>You are HuggingGPT, a large language model trained by Hugging Face. | |
| Knowledge cutoff: 2024-06 | |
| Current date: 2025-08-03 | |
| Reasoning: low | |
| # Valid channels: analysis, commentary, final. Channel must be included for every message.<|end|><|start|>developer<|message|># Instructions | |
| Use a friendly tone.<|end|><|start|>user<|message|>What is the meaning of life?<|end|><|start|>assistant<|channel|>analysis<|message|>Deep reflection...<|end|><|start|>assistant<|channel|>final<|message|>The final answer is...<|return|><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9o2fvs">For full details on message structure, supported fields, and advanced usage, see the <a href="https://cookbook.openai.com/articles/openai-harmony" rel="nofollow">Harmony documentation</a>.</p> <h3 class="relative group"><a id="types" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#types"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Types</span></h3> <h4 class="relative group"><a id="language-modeling" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#language-modeling"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Language modeling</span></h4> <p data-svelte-h="svelte-zfzona">A language modeling dataset consists of a column <code>"text"</code> (or <code>"messages"</code> for conversational datasets) containing a full sequence of text.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Standard format</span> | |
| language_modeling_example = {<span class="hljs-string">"text"</span>: <span class="hljs-string">"The sky is blue."</span>} | |
| <span class="hljs-comment"># Conversational format</span> | |
| language_modeling_example = {<span class="hljs-string">"messages"</span>: [ | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>} | |
| ]}<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="prompt-only" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prompt-only"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prompt-only</span></h4> <p data-svelte-h="svelte-10d3tml">In a prompt-only dataset, only the initial prompt (the question or partial sentence) is provided under the key <code>"prompt"</code>. The training typically involves generating completion based on this prompt, where the model learns to continue or complete the given input.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Standard format</span> | |
| prompt_only_example = {<span class="hljs-string">"prompt"</span>: <span class="hljs-string">"The sky is"</span>} | |
| <span class="hljs-comment"># Conversational format</span> | |
| prompt_only_example = {<span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19lc5cw">For examples of prompt-only datasets, refer to the <a href="https://huggingface.co/collections/trl-lib/prompt-only-datasets-677ea25245d20252cea00368" rel="nofollow">Prompt-only datasets collection</a>.</p> <blockquote class="tip"><p data-svelte-h="svelte-1k9p4sz">While both the prompt-only and language modeling types are similar, they differ in how the input is handled. In the prompt-only type, the prompt represents a partial input that expects the model to complete or continue, while in the language modeling type, the input is treated as a complete sentence or sequence. These two types are processed differently by TRL. Below is an example showing the difference in the output of the <code>apply_chat_template</code> function for each type:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> apply_chat_template | |
| tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"microsoft/Phi-3-mini-128k-instruct"</span>) | |
| <span class="hljs-comment"># Example for prompt-only type</span> | |
| prompt_only_example = {<span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}]} | |
| apply_chat_template(prompt_only_example, tokenizer) | |
| <span class="hljs-comment"># Output: {'prompt': '<|user|>\nWhat color is the sky?<|end|>\n<|assistant|>\n'}</span> | |
| <span class="hljs-comment"># Example for language modeling type</span> | |
| lm_example = {<span class="hljs-string">"messages"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}]} | |
| apply_chat_template(lm_example, tokenizer) | |
| <span class="hljs-comment"># Output: {'text': '<|user|>\nWhat color is the sky?<|end|>\n<|endoftext|>'}</span><!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-17mqw6t"><li>The prompt-only output includes a <code>'<|assistant|>\n'</code>, indicating the beginning of the assistant’s turn and expecting the model to generate a completion.</li> <li>In contrast, the language modeling output treats the input as a complete sequence and terminates it with <code>'<|endoftext|>'</code>, signaling the end of the text and not expecting any additional content.</li></ul></blockquote> <h4 class="relative group"><a id="prompt-completion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prompt-completion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Prompt-completion</span></h4> <p data-svelte-h="svelte-1qh8h3u">A prompt-completion dataset includes a <code>"prompt"</code> and a <code>"completion"</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Standard format</span> | |
| prompt_completion_example = {<span class="hljs-string">"prompt"</span>: <span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"completion"</span>: <span class="hljs-string">" blue."</span>} | |
| <span class="hljs-comment"># Conversational format</span> | |
| prompt_completion_example = {<span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}], | |
| <span class="hljs-string">"completion"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-td5nru">For examples of prompt-completion datasets, refer to the <a href="https://huggingface.co/collections/trl-lib/prompt-completion-datasets-677ea2bb20bbb6bdccada216" rel="nofollow">Prompt-completion datasets collection</a>.</p> <h4 class="relative group"><a id="preference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Preference</span></h4> <p data-svelte-h="svelte-1aekpwq">A preference dataset is used for tasks where the model is trained to choose between two or more possible completions to the same prompt. This dataset includes a <code>"prompt"</code>, a <code>"chosen"</code> completion, and a <code>"rejected"</code> completion. The model is trained to select the <code>"chosen"</code> response over the <code>"rejected"</code> response. | |
| Some datasets may not include the <code>"prompt"</code> column, in which case the prompt is implicit and directly included in the <code>"chosen"</code> and <code>"rejected"</code> completions. We recommend using explicit prompts whenever possible.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Standard format</span> | |
| <span class="hljs-comment">## Explicit prompt (recommended)</span> | |
| preference_example = {<span class="hljs-string">"prompt"</span>: <span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"chosen"</span>: <span class="hljs-string">" blue."</span>, <span class="hljs-string">"rejected"</span>: <span class="hljs-string">" green."</span>} | |
| <span class="hljs-comment"># Implicit prompt</span> | |
| preference_example = {<span class="hljs-string">"chosen"</span>: <span class="hljs-string">"The sky is blue."</span>, <span class="hljs-string">"rejected"</span>: <span class="hljs-string">"The sky is green."</span>} | |
| <span class="hljs-comment"># Conversational format</span> | |
| <span class="hljs-comment">## Explicit prompt (recommended)</span> | |
| preference_example = {<span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}], | |
| <span class="hljs-string">"chosen"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| <span class="hljs-string">"rejected"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}]} | |
| <span class="hljs-comment">## Implicit prompt</span> | |
| preference_example = {<span class="hljs-string">"chosen"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| <span class="hljs-string">"rejected"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, | |
| {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-183i8gm">For examples of preference datasets, refer to the <a href="https://huggingface.co/collections/trl-lib/preference-datasets-677e99b581018fcad9abd82c" rel="nofollow">Preference datasets collection</a>.</p> <p data-svelte-h="svelte-144ttmo">Some preference datasets can be found with <a href="https://huggingface.co/datasets?other=dpo" rel="nofollow">the tag <code>dpo</code> on Hugging Face Hub</a>. You can also explore the <a href="https://huggingface.co/collections/librarian-bots/direct-preference-optimization-datasets-66964b12835f46289b6ef2fc" rel="nofollow">librarian-bots’ DPO Collections</a> to identify preference datasets.</p> <h4 class="relative group"><a id="unpaired-preference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unpaired-preference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Unpaired preference</span></h4> <p data-svelte-h="svelte-ds3631">An unpaired preference dataset is similar to a preference dataset but instead of having <code>"chosen"</code> and <code>"rejected"</code> completions for the same prompt, it includes a single <code>"completion"</code> and a <code>"label"</code> indicating whether the completion is preferred or not.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Standard format</span> | |
| unpaired_preference_example = {<span class="hljs-string">"prompt"</span>: <span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"completion"</span>: <span class="hljs-string">" blue."</span>, <span class="hljs-string">"label"</span>: <span class="hljs-literal">True</span>} | |
| <span class="hljs-comment"># Conversational format</span> | |
| unpaired_preference_example = {<span class="hljs-string">"prompt"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}], | |
| <span class="hljs-string">"completion"</span>: [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| <span class="hljs-string">"label"</span>: <span class="hljs-literal">True</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-2vlprm">For examples of unpaired preference datasets, refer to the <a href="https://huggingface.co/collections/trl-lib/unpaired-preference-datasets-677ea22bf5f528c125b0bcdf" rel="nofollow">Unpaired preference datasets collection</a>.</p> <h4 class="relative group"><a id="stepwise-supervision" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stepwise-supervision"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stepwise supervision</span></h4> <p data-svelte-h="svelte-wwnrua">A stepwise (or process) supervision dataset is similar to an <a href="#unpaired-preference">unpaired preference</a> dataset but includes multiple steps of completions, each with its own label. This structure is useful for tasks that need detailed, step-by-step labeling, such as reasoning tasks. By evaluating each step separately and providing targeted labels, this approach helps identify precisely where the reasoning is correct and where errors occur, allowing for targeted feedback on each part of the reasoning process.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->stepwise_example = { | |
| <span class="hljs-string">"prompt"</span>: <span class="hljs-string">"Which number is larger, 9.8 or 9.11?"</span>, | |
| <span class="hljs-string">"completions"</span>: [<span class="hljs-string">"The fractional part of 9.8 is 0.8, while the fractional part of 9.11 is 0.11."</span>, <span class="hljs-string">"Since 0.11 is greater than 0.8, the number 9.11 is larger than 9.8."</span>], | |
| <span class="hljs-string">"labels"</span>: [<span class="hljs-literal">True</span>, <span class="hljs-literal">False</span>] | |
| }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1r34it7">For examples of stepwise supervision datasets, refer to the <a href="https://huggingface.co/collections/trl-lib/stepwise-supervision-datasets-677ea27fd4c5941beed7a96e" rel="nofollow">Stepwise supervision datasets collection</a>.</p> <h2 class="relative group"><a id="which-dataset-type-to-use" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#which-dataset-type-to-use"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Which dataset type to use?</span></h2> <p data-svelte-h="svelte-vrn817">Choosing the right dataset type depends on the task you are working on and the specific requirements of the TRL trainer you are using. Below is a brief overview of the dataset types supported by each TRL trainer.</p> <table data-svelte-h="svelte-12ig5ir"><thead><tr><th>Trainer</th> <th>Expected dataset type</th></tr></thead> <tbody><tr><td><a href="/docs/trl/pr_5607/en/bema_for_reference_model#trl.DPOTrainer">DPOTrainer</a></td> <td><a href="#preference">Preference (explicit prompt recommended)</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/gspo_token#trl.GRPOTrainer">GRPOTrainer</a></td> <td><a href="#prompt-only">Prompt-only</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/reward_trainer#trl.RewardTrainer">RewardTrainer</a></td> <td><a href="#preference">Preference (implicit prompt recommended)</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/rloo_trainer#trl.RLOOTrainer">RLOOTrainer</a></td> <td><a href="#prompt-only">Prompt-only</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/sft_trainer#trl.SFTTrainer">SFTTrainer</a></td> <td><a href="#language-modeling">Language modeling</a> or <a href="#prompt-completion">Prompt-completion</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/bco_trainer#trl.experimental.bco.BCOTrainer">experimental.bco.BCOTrainer</a></td> <td><a href="#unpaired-preference">Unpaired preference</a> or <a href="#preference">Preference (explicit prompt recommended)</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/cpo_trainer#trl.experimental.cpo.CPOTrainer">experimental.cpo.CPOTrainer</a></td> <td><a href="#preference">Preference (explicit prompt recommended)</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/gkd_trainer#trl.experimental.gkd.GKDTrainer">experimental.gkd.GKDTrainer</a></td> <td><a href="#prompt-completion">Prompt-completion</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/kto_trainer#trl.KTOTrainer">experimental.kto.KTOTrainer</a></td> <td><a href="#unpaired-preference">Unpaired preference</a> or <a href="#preference">Preference (explicit prompt recommended)</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/nash_md_trainer#trl.experimental.nash_md.NashMDTrainer">experimental.nash_md.NashMDTrainer</a></td> <td><a href="#prompt-only">Prompt-only</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/online_dpo_trainer#trl.experimental.online_dpo.OnlineDPOTrainer">experimental.online_dpo.OnlineDPOTrainer</a></td> <td><a href="#prompt-only">Prompt-only</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/orpo_trainer#trl.experimental.orpo.ORPOTrainer">experimental.orpo.ORPOTrainer</a></td> <td><a href="#preference">Preference (explicit prompt recommended)</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/ppo_trainer#trl.experimental.ppo.PPOTrainer">experimental.ppo.PPOTrainer</a></td> <td>Tokenized language modeling</td></tr> <tr><td><a href="/docs/trl/pr_5607/en/prm_trainer#trl.experimental.prm.PRMTrainer">experimental.prm.PRMTrainer</a></td> <td><a href="#stepwise-supervision">Stepwise supervision</a></td></tr> <tr><td><a href="/docs/trl/pr_5607/en/xpo_trainer#trl.experimental.xpo.XPOTrainer">experimental.xpo.XPOTrainer</a></td> <td><a href="#prompt-only">Prompt-only</a></td></tr></tbody></table> <h2 class="relative group"><a id="using-any-dataset-with-trl-preprocessing-and-conversion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-any-dataset-with-trl-preprocessing-and-conversion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using any dataset with TRL: preprocessing and conversion</span></h2> <p data-svelte-h="svelte-n0cp2u">Many datasets come in formats tailored to specific tasks, which might not be directly compatible with TRL. To use such datasets with TRL, you may need to preprocess and convert them into the required format.</p> <p data-svelte-h="svelte-wbjjp8">To make this easier, we provide a set of <a href="https://github.com/huggingface/trl/tree/main/examples/datasets" rel="nofollow">example scripts</a> that cover common dataset conversions.</p> <h3 class="relative group"><a id="example-ultrafeedback-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-ultrafeedback-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example: UltraFeedback dataset</span></h3> <p data-svelte-h="svelte-dr0jz6">Let’s take the <a href="https://huggingface.co/datasets/openbmb/UltraFeedback" rel="nofollow">UltraFeedback dataset</a> as an example. Here’s a preview of the dataset:</p> <iframe src="https://huggingface.co/datasets/openbmb/UltraFeedback/embed/viewer/default/train" frameborder="0" width="100%" height="560px"></iframe> <p data-svelte-h="svelte-1f6n8q4">As shown above, the dataset format does not match the expected structure. It’s not in a conversational format, the column names differ, and the results pertain to different models (e.g., Bard, GPT-4) and aspects (e.g., “helpfulness”, “honesty”).</p> <p data-svelte-h="svelte-17l9hcf">By using the provided conversion script <a href="https://github.com/huggingface/trl/tree/main/examples/datasets/ultrafeedback.py" rel="nofollow"><code>examples/datasets/ultrafeedback.py</code></a>, you can transform this dataset into an unpaired preference type, and push it to the Hub:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->python examples/datasets/ultrafeedback.py --push_to_hub --repo_id trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hplhuu">Once converted, the dataset will look like this:</p> <iframe src="https://huggingface.co/datasets/trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness/embed/viewer/default/train?row=0" frameborder="0" width="100%" height="560px"></iframe> <p data-svelte-h="svelte-jt9cd3">Now, you can use this dataset with TRL!</p> <p data-svelte-h="svelte-1rvlfj0">By adapting the provided scripts or creating your own, you can convert any dataset into a format compatible with TRL.</p> <h2 class="relative group"><a id="utilities-for-converting-dataset-types" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#utilities-for-converting-dataset-types"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Utilities for converting dataset types</span></h2> <p data-svelte-h="svelte-1ttdd1s">This section provides example code to help you convert between different dataset types. While some conversions can be performed after applying the chat template (i.e., in the standard format), we recommend performing the conversion before applying the chat template to ensure it works consistently.</p> <p data-svelte-h="svelte-17l70ga">For simplicity, some of the examples below do not follow this recommendation and use the standard format. However, the conversions can be applied directly to the conversational format without modification.</p> <table data-svelte-h="svelte-byyzej"><thead><tr><th>From \ To</th> <th>Language modeling</th> <th>Prompt-completion</th> <th>Prompt-only</th> <th>Preference with implicit prompt</th> <th>Preference</th> <th>Unpaired preference</th> <th>Stepwise supervision</th></tr></thead> <tbody><tr><td>Language modeling</td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td></tr> <tr><td>Prompt-completion</td> <td><a href="#from-prompt-completion-to-language-modeling-dataset">🔗</a></td> <td>N/A</td> <td><a href="#from-prompt-completion-to-prompt-only-dataset">🔗</a></td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td></tr> <tr><td>Prompt-only</td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td></tr> <tr><td>Preference with implicit prompt</td> <td><a href="#from-preference-with-implicit-prompt-to-language-modeling-dataset">🔗</a></td> <td><a href="#from-preference-with-implicit-prompt-to-prompt-completion-dataset">🔗</a></td> <td><a href="#from-preference-with-implicit-prompt-to-prompt-only-dataset">🔗</a></td> <td>N/A</td> <td><a href="#from-implicit-to-explicit-prompt-preference-dataset">🔗</a></td> <td><a href="#from-preference-with-implicit-prompt-to-unpaired-preference-dataset">🔗</a></td> <td>N/A</td></tr> <tr><td>Preference</td> <td><a href="#from-preference-to-language-modeling-dataset">🔗</a></td> <td><a href="#from-preference-to-prompt-completion-dataset">🔗</a></td> <td><a href="#from-preference-to-prompt-only-dataset">🔗</a></td> <td><a href="#from-explicit-to-implicit-prompt-preference-dataset">🔗</a></td> <td>N/A</td> <td><a href="#from-preference-to-unpaired-preference-dataset">🔗</a></td> <td>N/A</td></tr> <tr><td>Unpaired preference</td> <td><a href="#from-unpaired-preference-to-language-modeling-dataset">🔗</a></td> <td><a href="#from-unpaired-preference-to-prompt-completion-dataset">🔗</a></td> <td><a href="#from-unpaired-preference-to-prompt-only-dataset">🔗</a></td> <td>N/A</td> <td>N/A</td> <td>N/A</td> <td>N/A</td></tr> <tr><td>Stepwise supervision</td> <td><a href="#from-stepwise-supervision-to-language-modeling-dataset">🔗</a></td> <td><a href="#from-stepwise-supervision-to-prompt-completion-dataset">🔗</a></td> <td><a href="#from-stepwise-supervision-to-prompt-only-dataset">🔗</a></td> <td>N/A</td> <td>N/A</td> <td><a href="#from-stepwise-supervision-to-unpaired-preference-dataset">🔗</a></td> <td>N/A</td></tr></tbody></table> <h3 class="relative group"><a id="from-prompt-completion-to-language-modeling-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-prompt-completion-to-language-modeling-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From prompt-completion to language modeling dataset</span></h3> <p data-svelte-h="svelte-q88m1s">To convert a prompt-completion dataset into a language modeling dataset, concatenate the prompt and the completion.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>], | |
| <span class="hljs-string">"completion"</span>: [<span class="hljs-string">" blue."</span>, <span class="hljs-string">" in the sky."</span>], | |
| }) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">concat_prompt_completion</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"text"</span>: example[<span class="hljs-string">"prompt"</span>] + example[<span class="hljs-string">"completion"</span>]} | |
| dataset = dataset.<span class="hljs-built_in">map</span>(concat_prompt_completion, remove_columns=[<span class="hljs-string">"prompt"</span>, <span class="hljs-string">"completion"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'text'</span>: <span class="hljs-string">'The sky is blue.'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-prompt-completion-to-prompt-only-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-prompt-completion-to-prompt-only-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From prompt-completion to prompt-only dataset</span></h3> <p data-svelte-h="svelte-dplhkb">To convert a prompt-completion dataset into a prompt-only dataset, remove the completion.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>], | |
| <span class="hljs-string">"completion"</span>: [<span class="hljs-string">" blue."</span>, <span class="hljs-string">" in the sky."</span>], | |
| }) | |
| dataset = dataset.remove_columns(<span class="hljs-string">"completion"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: <span class="hljs-string">'The sky is'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-preference-with-implicit-prompt-to-language-modeling-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-preference-with-implicit-prompt-to-language-modeling-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From preference with implicit prompt to language modeling dataset</span></h3> <p data-svelte-h="svelte-u8pmva">To convert a preference with implicit prompt dataset into a language modeling dataset, remove the rejected, and rename the column <code>"chosen"</code> to <code>"text"</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"chosen"</span>: [<span class="hljs-string">"The sky is blue."</span>, <span class="hljs-string">"The sun is in the sky."</span>], | |
| <span class="hljs-string">"rejected"</span>: [<span class="hljs-string">"The sky is green."</span>, <span class="hljs-string">"The sun is in the sea."</span>], | |
| }) | |
| dataset = dataset.rename_column(<span class="hljs-string">"chosen"</span>, <span class="hljs-string">"text"</span>).remove_columns(<span class="hljs-string">"rejected"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'text'</span>: <span class="hljs-string">'The sky is blue.'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-preference-with-implicit-prompt-to-prompt-completion-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-preference-with-implicit-prompt-to-prompt-completion-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From preference with implicit prompt to prompt-completion dataset</span></h3> <p data-svelte-h="svelte-1b9ufx4">To convert a preference dataset with implicit prompt into a prompt-completion dataset, extract the prompt with <a href="/docs/trl/pr_5607/en/data_utils#trl.extract_prompt">extract_prompt()</a>, remove the rejected, and rename the column <code>"chosen"</code> to <code>"completion"</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> extract_prompt | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"chosen"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sky."</span>}], | |
| ], | |
| <span class="hljs-string">"rejected"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sea."</span>}], | |
| ], | |
| }) | |
| dataset = dataset.<span class="hljs-built_in">map</span>(extract_prompt).remove_columns(<span class="hljs-string">"rejected"</span>).rename_column(<span class="hljs-string">"chosen"</span>, <span class="hljs-string">"completion"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'user'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'What color is the sky?'</span>}], <span class="hljs-string">'completion'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'assistant'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'It is blue.'</span>}]}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-preference-with-implicit-prompt-to-prompt-only-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-preference-with-implicit-prompt-to-prompt-only-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From preference with implicit prompt to prompt-only dataset</span></h3> <p data-svelte-h="svelte-dm99bu">To convert a preference dataset with implicit prompt into a prompt-only dataset, extract the prompt with <a href="/docs/trl/pr_5607/en/data_utils#trl.extract_prompt">extract_prompt()</a>, and remove the rejected and the chosen.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> extract_prompt | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"chosen"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sky."</span>}], | |
| ], | |
| <span class="hljs-string">"rejected"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sea."</span>}], | |
| ], | |
| }) | |
| dataset = dataset.<span class="hljs-built_in">map</span>(extract_prompt).remove_columns([<span class="hljs-string">"chosen"</span>, <span class="hljs-string">"rejected"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'user'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'What color is the sky?'</span>}]}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-implicit-to-explicit-prompt-preference-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-implicit-to-explicit-prompt-preference-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From implicit to explicit prompt preference dataset</span></h3> <p data-svelte-h="svelte-10h6f7u">To convert a preference dataset with implicit prompt into a preference dataset with explicit prompt, extract the prompt with <a href="/docs/trl/pr_5607/en/data_utils#trl.extract_prompt">extract_prompt()</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> extract_prompt | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"chosen"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sky."</span>}], | |
| ], | |
| <span class="hljs-string">"rejected"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sea."</span>}], | |
| ], | |
| }) | |
| dataset = dataset.<span class="hljs-built_in">map</span>(extract_prompt)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'user'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'What color is the sky?'</span>}], | |
| <span class="hljs-string">'chosen'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'assistant'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'It is blue.'</span>}], | |
| <span class="hljs-string">'rejected'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'assistant'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'It is green.'</span>}]}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-preference-with-implicit-prompt-to-unpaired-preference-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-preference-with-implicit-prompt-to-unpaired-preference-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From preference with implicit prompt to unpaired preference dataset</span></h3> <p data-svelte-h="svelte-xb4due">To convert a preference dataset with implicit prompt into an unpaired preference dataset, extract the prompt with <a href="/docs/trl/pr_5607/en/data_utils#trl.extract_prompt">extract_prompt()</a>, and unpair the dataset with <a href="/docs/trl/pr_5607/en/data_utils#trl.unpair_preference_dataset">unpair_preference_dataset()</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> extract_prompt, unpair_preference_dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"chosen"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sky."</span>}], | |
| ], | |
| <span class="hljs-string">"rejected"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}, {<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sea."</span>}], | |
| ], | |
| }) | |
| dataset = dataset.<span class="hljs-built_in">map</span>(extract_prompt) | |
| dataset = unpair_preference_dataset(dataset)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'user'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'What color is the sky?'</span>}], | |
| <span class="hljs-string">'completion'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'assistant'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'It is blue.'</span>}], | |
| <span class="hljs-string">'label'</span>: <span class="hljs-literal">True</span>}<!-- HTML_TAG_END --></pre></div> <blockquote class="warning" data-svelte-h="svelte-j1wemz"><p>Keep in mind that the <code>"chosen"</code> and <code>"rejected"</code> completions in a preference dataset can be both good or bad. | |
| Before applying <a href="/docs/trl/pr_5607/en/data_utils#trl.unpair_preference_dataset">unpair_preference_dataset()</a>, please ensure that all <code>"chosen"</code> completions can be labeled as good and all <code>"rejected"</code> completions as bad. | |
| This can be ensured by checking absolute rating of each completion, e.g. from a reward model.</p></blockquote> <h3 class="relative group"><a id="from-preference-to-language-modeling-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-preference-to-language-modeling-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From preference to language modeling dataset</span></h3> <p data-svelte-h="svelte-1azgh8u">To convert a preference dataset into a language modeling dataset, remove the rejected, concatenate the prompt and the chosen into the <code>"text"</code> column.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>], | |
| <span class="hljs-string">"chosen"</span>: [<span class="hljs-string">" blue."</span>, <span class="hljs-string">" in the sky."</span>], | |
| <span class="hljs-string">"rejected"</span>: [<span class="hljs-string">" green."</span>, <span class="hljs-string">" in the sea."</span>], | |
| }) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">concat_prompt_chosen</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"text"</span>: example[<span class="hljs-string">"prompt"</span>] + example[<span class="hljs-string">"chosen"</span>]} | |
| dataset = dataset.<span class="hljs-built_in">map</span>(concat_prompt_chosen, remove_columns=[<span class="hljs-string">"prompt"</span>, <span class="hljs-string">"chosen"</span>, <span class="hljs-string">"rejected"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'text'</span>: <span class="hljs-string">'The sky is blue.'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-preference-to-prompt-completion-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-preference-to-prompt-completion-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From preference to prompt-completion dataset</span></h3> <p data-svelte-h="svelte-thhn9e">To convert a preference dataset into a prompt-completion dataset, remove the rejected, and rename the column <code>"chosen"</code> to <code>"completion"</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>], | |
| <span class="hljs-string">"chosen"</span>: [<span class="hljs-string">" blue."</span>, <span class="hljs-string">" in the sky."</span>], | |
| <span class="hljs-string">"rejected"</span>: [<span class="hljs-string">" green."</span>, <span class="hljs-string">" in the sea."</span>], | |
| }) | |
| dataset = dataset.remove_columns(<span class="hljs-string">"rejected"</span>).rename_column(<span class="hljs-string">"chosen"</span>, <span class="hljs-string">"completion"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: <span class="hljs-string">'The sky is'</span>, <span class="hljs-string">'completion'</span>: <span class="hljs-string">' blue.'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-preference-to-prompt-only-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-preference-to-prompt-only-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From preference to prompt-only dataset</span></h3> <p data-svelte-h="svelte-yu2uu5">To convert a preference dataset into a prompt-only dataset, remove the rejected and the chosen.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>], | |
| <span class="hljs-string">"chosen"</span>: [<span class="hljs-string">" blue."</span>, <span class="hljs-string">" in the sky."</span>], | |
| <span class="hljs-string">"rejected"</span>: [<span class="hljs-string">" green."</span>, <span class="hljs-string">" in the sea."</span>], | |
| }) | |
| dataset = dataset.remove_columns([<span class="hljs-string">"chosen"</span>, <span class="hljs-string">"rejected"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: <span class="hljs-string">'The sky is'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-explicit-to-implicit-prompt-preference-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-explicit-to-implicit-prompt-preference-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From explicit to implicit prompt preference dataset</span></h3> <p data-svelte-h="svelte-slr0da">To convert a preference dataset with explicit prompt into a preference dataset with implicit prompt, concatenate the prompt to both chosen and rejected, and remove the prompt.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}], | |
| ], | |
| <span class="hljs-string">"chosen"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sky."</span>}], | |
| ], | |
| <span class="hljs-string">"rejected"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sea."</span>}], | |
| ], | |
| }) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">concat_prompt_to_completions</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"chosen"</span>: example[<span class="hljs-string">"prompt"</span>] + example[<span class="hljs-string">"chosen"</span>], <span class="hljs-string">"rejected"</span>: example[<span class="hljs-string">"prompt"</span>] + example[<span class="hljs-string">"rejected"</span>]} | |
| dataset = dataset.<span class="hljs-built_in">map</span>(concat_prompt_to_completions, remove_columns=<span class="hljs-string">"prompt"</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'chosen'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'user'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'What color is the sky?'</span>}, {<span class="hljs-string">'role'</span>: <span class="hljs-string">'assistant'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'It is blue.'</span>}], | |
| <span class="hljs-string">'rejected'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'user'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'What color is the sky?'</span>}, {<span class="hljs-string">'role'</span>: <span class="hljs-string">'assistant'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'It is green.'</span>}]}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-preference-to-unpaired-preference-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-preference-to-unpaired-preference-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From preference to unpaired preference dataset</span></h3> <p data-svelte-h="svelte-99arln">To convert dataset into an unpaired preference dataset, unpair the dataset with <a href="/docs/trl/pr_5607/en/data_utils#trl.unpair_preference_dataset">unpair_preference_dataset()</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| <span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> unpair_preference_dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Where is the sun?"</span>}], | |
| ], | |
| <span class="hljs-string">"chosen"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is blue."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sky."</span>}], | |
| ], | |
| <span class="hljs-string">"rejected"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"It is green."</span>}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"In the sea."</span>}], | |
| ], | |
| }) | |
| dataset = unpair_preference_dataset(dataset)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'user'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'What color is the sky?'</span>}], | |
| <span class="hljs-string">'completion'</span>: [{<span class="hljs-string">'role'</span>: <span class="hljs-string">'assistant'</span>, <span class="hljs-string">'content'</span>: <span class="hljs-string">'It is blue.'</span>}], | |
| <span class="hljs-string">'label'</span>: <span class="hljs-literal">True</span>}<!-- HTML_TAG_END --></pre></div> <blockquote class="warning" data-svelte-h="svelte-j1wemz"><p>Keep in mind that the <code>"chosen"</code> and <code>"rejected"</code> completions in a preference dataset can be both good or bad. | |
| Before applying <a href="/docs/trl/pr_5607/en/data_utils#trl.unpair_preference_dataset">unpair_preference_dataset()</a>, please ensure that all <code>"chosen"</code> completions can be labeled as good and all <code>"rejected"</code> completions as bad. | |
| This can be ensured by checking absolute rating of each completion, e.g. from a reward model.</p></blockquote> <h3 class="relative group"><a id="from-unpaired-preference-to-language-modeling-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-unpaired-preference-to-language-modeling-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From unpaired preference to language modeling dataset</span></h3> <p data-svelte-h="svelte-1rtwiuz">To convert an unpaired preference dataset into a language modeling dataset, concatenate prompts with good completions into the <code>"text"</code> column, and remove the prompt, completion and label columns.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>, <span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>], | |
| <span class="hljs-string">"completion"</span>: [<span class="hljs-string">" blue."</span>, <span class="hljs-string">" in the sky."</span>, <span class="hljs-string">" green."</span>, <span class="hljs-string">" in the sea."</span>], | |
| <span class="hljs-string">"label"</span>: [<span class="hljs-literal">True</span>, <span class="hljs-literal">True</span>, <span class="hljs-literal">False</span>, <span class="hljs-literal">False</span>], | |
| }) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">concatenate_prompt_completion</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"text"</span>: example[<span class="hljs-string">"prompt"</span>] + example[<span class="hljs-string">"completion"</span>]} | |
| dataset = dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: x[<span class="hljs-string">"label"</span>]).<span class="hljs-built_in">map</span>(concatenate_prompt_completion).remove_columns([<span class="hljs-string">"prompt"</span>, <span class="hljs-string">"completion"</span>, <span class="hljs-string">"label"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'text'</span>: <span class="hljs-string">'The sky is blue.'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-unpaired-preference-to-prompt-completion-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-unpaired-preference-to-prompt-completion-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From unpaired preference to prompt-completion dataset</span></h3> <p data-svelte-h="svelte-1mntuxe">To convert an unpaired preference dataset into a prompt-completion dataset, filter for good labels, then remove the label columns.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>, <span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>], | |
| <span class="hljs-string">"completion"</span>: [<span class="hljs-string">" blue."</span>, <span class="hljs-string">" in the sky."</span>, <span class="hljs-string">" green."</span>, <span class="hljs-string">" in the sea."</span>], | |
| <span class="hljs-string">"label"</span>: [<span class="hljs-literal">True</span>, <span class="hljs-literal">True</span>, <span class="hljs-literal">False</span>, <span class="hljs-literal">False</span>], | |
| }) | |
| dataset = dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: x[<span class="hljs-string">"label"</span>]).remove_columns([<span class="hljs-string">"label"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: <span class="hljs-string">'The sky is'</span>, <span class="hljs-string">'completion'</span>: <span class="hljs-string">' blue.'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-unpaired-preference-to-prompt-only-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-unpaired-preference-to-prompt-only-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From unpaired preference to prompt-only dataset</span></h3> <p data-svelte-h="svelte-1x58tju">To convert an unpaired preference dataset into a prompt-only dataset, remove the completion and the label columns.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>, <span class="hljs-string">"The sky is"</span>, <span class="hljs-string">"The sun is"</span>], | |
| <span class="hljs-string">"completion"</span>: [<span class="hljs-string">" blue."</span>, <span class="hljs-string">" in the sky."</span>, <span class="hljs-string">" green."</span>, <span class="hljs-string">" in the sea."</span>], | |
| <span class="hljs-string">"label"</span>: [<span class="hljs-literal">True</span>, <span class="hljs-literal">True</span>, <span class="hljs-literal">False</span>, <span class="hljs-literal">False</span>], | |
| }) | |
| dataset = dataset.remove_columns([<span class="hljs-string">"completion"</span>, <span class="hljs-string">"label"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: <span class="hljs-string">'The sky is'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-stepwise-supervision-to-language-modeling-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-stepwise-supervision-to-language-modeling-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From stepwise supervision to language modeling dataset</span></h3> <p data-svelte-h="svelte-usvjfj">To convert a stepwise supervision dataset into a language modeling dataset, concatenate prompts with good completions into the <code>"text"</code> column.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"Blue light"</span>, <span class="hljs-string">"Water"</span>], | |
| <span class="hljs-string">"completions"</span>: [[<span class="hljs-string">" scatters more in the atmosphere,"</span>, <span class="hljs-string">" so the sky is green."</span>], | |
| [<span class="hljs-string">" forms a less dense structure in ice,"</span>, <span class="hljs-string">" which causes it to expand when it freezes."</span>]], | |
| <span class="hljs-string">"labels"</span>: [[<span class="hljs-literal">True</span>, <span class="hljs-literal">False</span>], [<span class="hljs-literal">True</span>, <span class="hljs-literal">True</span>]], | |
| }) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">concatenate_prompt_completions</span>(<span class="hljs-params">example</span>): | |
| completion = <span class="hljs-string">""</span>.join(example[<span class="hljs-string">"completions"</span>]) | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"text"</span>: example[<span class="hljs-string">"prompt"</span>] + completion} | |
| dataset = dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: <span class="hljs-built_in">all</span>(x[<span class="hljs-string">"labels"</span>])).<span class="hljs-built_in">map</span>(concatenate_prompt_completions, remove_columns=[<span class="hljs-string">"prompt"</span>, <span class="hljs-string">"completions"</span>, <span class="hljs-string">"labels"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'text'</span>: <span class="hljs-string">'Blue light scatters more in the atmosphere, so the sky is green.'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-stepwise-supervision-to-prompt-completion-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-stepwise-supervision-to-prompt-completion-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From stepwise supervision to prompt-completion dataset</span></h3> <p data-svelte-h="svelte-1wvawda">To convert a stepwise supervision dataset into a prompt-completion dataset, join the good completions and remove the labels.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"Blue light"</span>, <span class="hljs-string">"Water"</span>], | |
| <span class="hljs-string">"completions"</span>: [[<span class="hljs-string">" scatters more in the atmosphere,"</span>, <span class="hljs-string">" so the sky is green."</span>], | |
| [<span class="hljs-string">" forms a less dense structure in ice,"</span>, <span class="hljs-string">" which causes it to expand when it freezes."</span>]], | |
| <span class="hljs-string">"labels"</span>: [[<span class="hljs-literal">True</span>, <span class="hljs-literal">False</span>], [<span class="hljs-literal">True</span>, <span class="hljs-literal">True</span>]], | |
| }) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">join_completions</span>(<span class="hljs-params">example</span>): | |
| completion = <span class="hljs-string">""</span>.join(example[<span class="hljs-string">"completions"</span>]) | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"completion"</span>: completion} | |
| dataset = dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: <span class="hljs-built_in">all</span>(x[<span class="hljs-string">"labels"</span>])).<span class="hljs-built_in">map</span>(join_completions, remove_columns=[<span class="hljs-string">"completions"</span>, <span class="hljs-string">"labels"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: <span class="hljs-string">'Blue light'</span>, <span class="hljs-string">'completion'</span>: <span class="hljs-string">' scatters more in the atmosphere, so the sky is green.'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-stepwise-supervision-to-prompt-only-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-stepwise-supervision-to-prompt-only-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From stepwise supervision to prompt-only dataset</span></h3> <p data-svelte-h="svelte-1fb8htv">To convert a stepwise supervision dataset into a prompt-only dataset, remove the completions and the labels.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"Blue light"</span>, <span class="hljs-string">"Water"</span>], | |
| <span class="hljs-string">"completions"</span>: [[<span class="hljs-string">" scatters more in the atmosphere,"</span>, <span class="hljs-string">" so the sky is green."</span>], | |
| [<span class="hljs-string">" forms a less dense structure in ice,"</span>, <span class="hljs-string">" which causes it to expand when it freezes."</span>]], | |
| <span class="hljs-string">"labels"</span>: [[<span class="hljs-literal">True</span>, <span class="hljs-literal">False</span>], [<span class="hljs-literal">True</span>, <span class="hljs-literal">True</span>]], | |
| }) | |
| dataset = dataset.remove_columns([<span class="hljs-string">"completions"</span>, <span class="hljs-string">"labels"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: <span class="hljs-string">'Blue light'</span>}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="from-stepwise-supervision-to-unpaired-preference-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#from-stepwise-supervision-to-unpaired-preference-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>From stepwise supervision to unpaired preference dataset</span></h3> <p data-svelte-h="svelte-1p3ncex">To convert a stepwise supervision dataset into an unpaired preference dataset, join the completions and merge the labels.</p> <p data-svelte-h="svelte-wppc03">The method for merging the labels depends on the specific task. In this example, we use the logical AND operation. This means that if the step labels indicate the correctness of individual steps, the resulting label will reflect the correctness of the entire sequence.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset | |
| dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [<span class="hljs-string">"Blue light"</span>, <span class="hljs-string">"Water"</span>], | |
| <span class="hljs-string">"completions"</span>: [[<span class="hljs-string">" scatters more in the atmosphere,"</span>, <span class="hljs-string">" so the sky is green."</span>], | |
| [<span class="hljs-string">" forms a less dense structure in ice,"</span>, <span class="hljs-string">" which causes it to expand when it freezes."</span>]], | |
| <span class="hljs-string">"labels"</span>: [[<span class="hljs-literal">True</span>, <span class="hljs-literal">False</span>], [<span class="hljs-literal">True</span>, <span class="hljs-literal">True</span>]], | |
| }) | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">merge_completions_and_labels</span>(<span class="hljs-params">example</span>): | |
| <span class="hljs-keyword">return</span> {<span class="hljs-string">"prompt"</span>: example[<span class="hljs-string">"prompt"</span>], <span class="hljs-string">"completion"</span>: <span class="hljs-string">""</span>.join(example[<span class="hljs-string">"completions"</span>]), <span class="hljs-string">"label"</span>: <span class="hljs-built_in">all</span>(example[<span class="hljs-string">"labels"</span>])} | |
| dataset = dataset.<span class="hljs-built_in">map</span>(merge_completions_and_labels, remove_columns=[<span class="hljs-string">"completions"</span>, <span class="hljs-string">"labels"</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'prompt'</span>: <span class="hljs-string">'Blue light'</span>, <span class="hljs-string">'completion'</span>: <span class="hljs-string">' scatters more in the atmosphere, so the sky is green.'</span>, <span class="hljs-string">'label'</span>: <span class="hljs-literal">False</span>}<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="vision-datasets" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#vision-datasets"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Vision datasets</span></h2> <p data-svelte-h="svelte-1ue491b">Some trainers also support fine-tuning vision-language models (VLMs) using image-text pairs. In this scenario, it’s recommended to use a conversational format, as each model handles image placeholders in text differently.</p> <p data-svelte-h="svelte-1dkity6">A conversational vision dataset differs from a standard conversational dataset in two key ways:</p> <ol data-svelte-h="svelte-wr9qud"><li>The dataset must contain the key <code>images</code> with the image data (as lists of PIL images) or <code>image</code> with a single PIL image.</li> <li>The <code>"content"</code> field in messages must be a list of dictionaries, where each dictionary specifies the type of data: <code>"image"</code> or <code>"text"</code>.</li></ol> <p data-svelte-h="svelte-11lpom8">Example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Textual dataset:</span> | |
| <span class="hljs-string">"content"</span>: <span class="hljs-string">"What color is the sky?"</span> | |
| <span class="hljs-comment"># Vision dataset:</span> | |
| <span class="hljs-string">"content"</span>: [ | |
| {<span class="hljs-string">"type"</span>: <span class="hljs-string">"image"</span>}, | |
| {<span class="hljs-string">"type"</span>: <span class="hljs-string">"text"</span>, <span class="hljs-string">"text"</span>: <span class="hljs-string">"What color is the sky in the image?"</span>} | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11ot8l1">An example of a conversational vision dataset is the <a href="https://huggingface.co/datasets/openbmb/RLAIF-V-Dataset" rel="nofollow">openbmb/RLAIF-V-Dataset</a>. Below is an embedded view of the dataset’s training data, allowing you to explore it directly:</p> <iframe src="https://huggingface.co/datasets/trl-lib/rlaif-v/embed/viewer/default/train" frameborder="0" width="100%" height="560px"></iframe> <blockquote class="note"><p data-svelte-h="svelte-dhwe6f">Mixing text-only and vision-language data in the dataset is possible, but it requires <code>transformers</code> version 4.57.0 or later. Example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->dataset = Dataset.from_dict({ | |
| <span class="hljs-string">"prompt"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: [{<span class="hljs-string">"type"</span>: <span class="hljs-string">"image"</span>}, {<span class="hljs-string">"type"</span>: <span class="hljs-string">"text"</span>, <span class="hljs-string">"text"</span>: <span class="hljs-string">"What color is the sky in the image?"</span>}]}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: [{<span class="hljs-string">"type"</span>: <span class="hljs-string">"text"</span>, <span class="hljs-string">"text"</span>: <span class="hljs-string">"What is the capital of France?"</span>}]}], | |
| ], | |
| <span class="hljs-string">"completion"</span>: [ | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: [{<span class="hljs-string">"type"</span>: <span class="hljs-string">"text"</span>, <span class="hljs-string">"text"</span>: <span class="hljs-string">"It is blue."</span>}]}], | |
| [{<span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: [{<span class="hljs-string">"type"</span>: <span class="hljs-string">"text"</span>, <span class="hljs-string">"text"</span>: <span class="hljs-string">"Paris."</span>}]}], | |
| ], | |
| <span class="hljs-string">"images"</span>: [ | |
| [PIL.Image.<span class="hljs-built_in">open</span>(<span class="hljs-string">"path/to/sky_image1.png"</span>)], | |
| [], | |
| ], | |
| })<!-- HTML_TAG_END --></pre></div></blockquote> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/trl/blob/main/docs/source/dataset_formats.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1hqaf25 = { | |
| assets: "/docs/trl/pr_5607/en", | |
| base: "/docs/trl/pr_5607/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/trl/pr_5607/en/_app/immutable/entry/start.151d81bd.js"), | |
| import("/docs/trl/pr_5607/en/_app/immutable/entry/app.3d9a91c0.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 12], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 223 kB
- Xet hash:
- 2bbccd57360b7b66a837436f8270aae597342b86e31c7018aa63b328312c1603
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.