Buckets:

rtrm's picture
download
raw
29.5 kB
import{s as Ns,o as Zs,n as ns}from"../chunks/scheduler.7b731bd4.js";import{S as zs,i as Ss,e as T,s as r,c as M,h as Xs,a as U,d as l,b as c,f as O,g as h,j as $,k as ss,l as J,m as i,n as u,t as d,o as j,p as y}from"../chunks/index.cc268345.js";import{C as Rs,H as es,E as Ds}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.6a2cd520.js";import{D as ys}from"../chunks/Docstring.54119fd0.js";import{C as ls}from"../chunks/CodeBlock.f01cadde.js";import{E as as}from"../chunks/ExampleCodeBlock.e3c8a01b.js";function Vs(C){let e,g="Examples:",o,a,p;return a=new ls({props:{code:"ZXhhbXBsZSUyMCUzRCUyMCU3QiUyMnByb21wdCUyMiUzQSUyMCU1QiU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBjb2xvciUyMGlzJTIwdGhlJTIwc2t5JTNGJTIyJTdEJTVEJTdEJTBBaXNfY29udmVyc2F0aW9uYWwoZXhhbXBsZSklMEElMEFleGFtcGxlJTIwJTNEJTIwJTdCJTIycHJvbXB0JTIyJTNBJTIwJTIyVGhlJTIwc2t5JTIwaXMlMjIlN0QlMEFpc19jb252ZXJzYXRpb25hbChleGFtcGxlKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>example = {<span class="hljs-string">&quot;prompt&quot;</span>: [{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What color is the sky?&quot;</span>}]}
<span class="hljs-meta">&gt;&gt;&gt; </span>is_conversational(example)
<span class="hljs-literal">True</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>example = {<span class="hljs-string">&quot;prompt&quot;</span>: <span class="hljs-string">&quot;The sky is&quot;</span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>is_conversational(example)
<span class="hljs-literal">False</span>`,wrap:!1}}),{c(){e=T("p"),e.textContent=g,o=r(),M(a.$$.fragment)},l(t){e=U(t,"P",{"data-svelte-h":!0}),$(e)!=="svelte-kvfsh7"&&(e.textContent=g),o=c(t),h(a.$$.fragment,t)},m(t,m){i(t,e,m),i(t,o,m),u(a,t,m),p=!0},p:ns,i(t){p||(d(a.$$.fragment,t),p=!0)},o(t){j(a.$$.fragment,t),p=!1},d(t){t&&(l(e),l(o)),y(a,t)}}}function Fs(C){let e,g="Example:",o,a,p;return a=new ls({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMG1heWJlX2NvbnZlcnRfdG9fY2hhdG1sJTBBJTBBZXhhbXBsZSUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMmNvbnZlcnNhdGlvbnMlMjIlM0ElMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJmcm9tJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMnZhbHVlJTIyJTNBJTIwJTIyV2hhdCUyMGNvbG9yJTIwaXMlMjB0aGUlMjBza3klM0YlMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJmcm9tJTIyJTNBJTIwJTIyYXNzaXN0YW50JTIyJTJDJTIwJTIydmFsdWUlMjIlM0ElMjAlMjJJdCUyMGlzJTIwYmx1ZS4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMEElN0QlMEFtYXliZV9jb252ZXJ0X3RvX2NoYXRtbChleGFtcGxlKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> maybe_convert_to_chatml
<span class="hljs-meta">&gt;&gt;&gt; </span>example = {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;conversations&quot;</span>: [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;from&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;value&quot;</span>: <span class="hljs-string">&quot;What color is the sky?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;from&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;value&quot;</span>: <span class="hljs-string">&quot;It is blue.&quot;</span>},
<span class="hljs-meta">... </span> ]
<span class="hljs-meta">... </span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>maybe_convert_to_chatml(example)
{<span class="hljs-string">&#x27;messages&#x27;</span>: [{<span class="hljs-string">&#x27;role&#x27;</span>: <span class="hljs-string">&#x27;user&#x27;</span>, <span class="hljs-string">&#x27;content&#x27;</span>: <span class="hljs-string">&#x27;What color is the sky?&#x27;</span>},
{<span class="hljs-string">&#x27;role&#x27;</span>: <span class="hljs-string">&#x27;assistant&#x27;</span>, <span class="hljs-string">&#x27;content&#x27;</span>: <span class="hljs-string">&#x27;It is blue.&#x27;</span>}]}`,wrap:!1}}),{c(){e=T("p"),e.textContent=g,o=r(),M(a.$$.fragment)},l(t){e=U(t,"P",{"data-svelte-h":!0}),$(e)!=="svelte-11lpom8"&&(e.textContent=g),o=c(t),h(a.$$.fragment,t)},m(t,m){i(t,e,m),i(t,o,m),u(a,t,m),p=!0},p:ns,i(t){p||(d(a.$$.fragment,t),p=!0)},o(t){j(a.$$.fragment,t),p=!1},d(t){t&&(l(e),l(o)),y(a,t)}}}function Ys(C){let e,g="Examples:",o,a,p;return a=new ls({props:{code:"ZXhhbXBsZSUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMmNob3NlbiUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBjb2xvciUyMGlzJTIwdGhlJTIwc2t5JTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJJdCUyMGlzJTIwYmx1ZS4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjJyZWplY3RlZCUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBjb2xvciUyMGlzJTIwdGhlJTIwc2t5JTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJJdCUyMGlzJTIwZ3JlZW4uJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTdEJTBBZXh0cmFjdF9wcm9tcHQoZXhhbXBsZSk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>example = {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;chosen&quot;</span>: [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What color is the sky?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;It is blue.&quot;</span>},
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;rejected&quot;</span>: [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What color is the sky?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;It is green.&quot;</span>},
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>extract_prompt(example)
{<span class="hljs-string">&#x27;prompt&#x27;</span>: [{<span class="hljs-string">&#x27;role&#x27;</span>: <span class="hljs-string">&#x27;user&#x27;</span>, <span class="hljs-string">&#x27;content&#x27;</span>: <span class="hljs-string">&#x27;What color is the sky?&#x27;</span>}],
<span class="hljs-string">&#x27;chosen&#x27;</span>: [{<span class="hljs-string">&#x27;role&#x27;</span>: <span class="hljs-string">&#x27;assistant&#x27;</span>, <span class="hljs-string">&#x27;content&#x27;</span>: <span class="hljs-string">&#x27;It is blue.&#x27;</span>}],
<span class="hljs-string">&#x27;rejected&#x27;</span>: [{<span class="hljs-string">&#x27;role&#x27;</span>: <span class="hljs-string">&#x27;assistant&#x27;</span>, <span class="hljs-string">&#x27;content&#x27;</span>: <span class="hljs-string">&#x27;It is green.&#x27;</span>}]}`,wrap:!1}}),{c(){e=T("p"),e.textContent=g,o=r(),M(a.$$.fragment)},l(t){e=U(t,"P",{"data-svelte-h":!0}),$(e)!=="svelte-kvfsh7"&&(e.textContent=g),o=c(t),h(a.$$.fragment,t)},m(t,m){i(t,e,m),i(t,o,m),u(a,t,m),p=!0},p:ns,i(t){p||(d(a.$$.fragment,t),p=!0)},o(t){j(a.$$.fragment,t),p=!1},d(t){t&&(l(e),l(o)),y(a,t)}}}function Ws(C){let e,g='Or, with the <code>map</code> method of <a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset" rel="nofollow">Dataset</a>:',o,a,p;return a=new ls({props:{code:"ZnJvbSUyMHRybCUyMGltcG9ydCUyMGV4dHJhY3RfcHJvbXB0JTBBZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQSUwQWRhdGFzZXRfZGljdCUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMmNob3NlbiUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBjb2xvciUyMGlzJTIwdGhlJTIwc2t5JTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJJdCUyMGlzJTIwYmx1ZS4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlNUIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0IlMjJyb2xlJTIyJTNBJTIwJTIydXNlciUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJXaGVyZSUyMGlzJTIwdGhlJTIwc3VuJTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJJbiUyMHRoZSUyMHNreS4lMjIlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjAlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjJyZWplY3RlZCUyMiUzQSUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJ1c2VyJTIyJTJDJTIwJTIyY29udGVudCUyMiUzQSUyMCUyMldoYXQlMjBjb2xvciUyMGlzJTIwdGhlJTIwc2t5JTNGJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMmFzc2lzdGFudCUyMiUyQyUyMCUyMmNvbnRlbnQlMjIlM0ElMjAlMjJJdCUyMGlzJTIwZ3JlZW4uJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdCJTIycm9sZSUyMiUzQSUyMCUyMnVzZXIlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIyV2hlcmUlMjBpcyUyMHRoZSUyMHN1biUzRiUyMiU3RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QiUyMnJvbGUlMjIlM0ElMjAlMjJhc3Npc3RhbnQlMjIlMkMlMjAlMjJjb250ZW50JTIyJTNBJTIwJTIySW4lMjB0aGUlMjBzZWEuJTIyJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTdEJTBBZGF0YXNldCUyMCUzRCUyMERhdGFzZXQuZnJvbV9kaWN0KGRhdGFzZXRfZGljdCklMEFkYXRhc2V0JTIwJTNEJTIwZGF0YXNldC5tYXAoZXh0cmFjdF9wcm9tcHQpJTBBZGF0YXNldCU1QjAlNUQ=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> trl <span class="hljs-keyword">import</span> extract_prompt
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset_dict = {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;chosen&quot;</span>: [
<span class="hljs-meta">... </span> [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What color is the sky?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;It is blue.&quot;</span>},
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span> [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Where is the sun?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;In the sky.&quot;</span>},
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;rejected&quot;</span>: [
<span class="hljs-meta">... </span> [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;What color is the sky?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;It is green.&quot;</span>},
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span> [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Where is the sun?&quot;</span>},
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;In the sea.&quot;</span>},
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = Dataset.from_dict(dataset_dict)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = dataset.<span class="hljs-built_in">map</span>(extract_prompt)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>]
{<span class="hljs-string">&#x27;prompt&#x27;</span>: [{<span class="hljs-string">&#x27;role&#x27;</span>: <span class="hljs-string">&#x27;user&#x27;</span>, <span class="hljs-string">&#x27;content&#x27;</span>: <span class="hljs-string">&#x27;What color is the sky?&#x27;</span>}],
<span class="hljs-string">&#x27;chosen&#x27;</span>: [{<span class="hljs-string">&#x27;role&#x27;</span>: <span class="hljs-string">&#x27;assistant&#x27;</span>, <span class="hljs-string">&#x27;content&#x27;</span>: <span class="hljs-string">&#x27;It is blue.&#x27;</span>}],
<span class="hljs-string">&#x27;rejected&#x27;</span>: [{<span class="hljs-string">&#x27;role&#x27;</span>: <span class="hljs-string">&#x27;assistant&#x27;</span>, <span class="hljs-string">&#x27;content&#x27;</span>: <span class="hljs-string">&#x27;It is green.&#x27;</span>}]}`,wrap:!1}}),{c(){e=T("p"),e.innerHTML=g,o=r(),M(a.$$.fragment)},l(t){e=U(t,"P",{"data-svelte-h":!0}),$(e)!=="svelte-12uxrnv"&&(e.innerHTML=g),o=c(t),h(a.$$.fragment,t)},m(t,m){i(t,e,m),i(t,o,m),u(a,t,m),p=!0},p:ns,i(t){p||(d(a.$$.fragment,t),p=!0)},o(t){j(a.$$.fragment,t),p=!1},d(t){t&&(l(e),l(o)),y(a,t)}}}function Hs(C){let e,g="Example:",o,a,p;return a=new ls({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwRGF0YXNldCUwQSUwQWRhdGFzZXRfZGljdCUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMnByb21wdCUyMiUzQSUyMCU1QiUyMlRoZSUyMHNreSUyMGlzJTIyJTJDJTIwJTIyVGhlJTIwc3VuJTIwaXMlMjIlNUQlMkMlMEElMjAlMjAlMjAlMjAlMjJjaG9zZW4lMjIlM0ElMjAlNUIlMjIlMjBibHVlLiUyMiUyQyUyMCUyMmluJTIwdGhlJTIwc2t5LiUyMiU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMnJlamVjdGVkJTIyJTNBJTIwJTVCJTIyJTIwZ3JlZW4uJTIyJTJDJTIwJTIyJTIwaW4lMjB0aGUlMjBzZWEuJTIyJTVEJTJDJTBBJTdEJTBBZGF0YXNldCUyMCUzRCUyMERhdGFzZXQuZnJvbV9kaWN0KGRhdGFzZXRfZGljdCklMEFkYXRhc2V0JTIwJTNEJTIwdW5wYWlyX3ByZWZlcmVuY2VfZGF0YXNldChkYXRhc2V0KSUwQWRhdGFzZXQlMEElMEFkYXRhc2V0JTVCMCU1RA==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset_dict = {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;prompt&quot;</span>: [<span class="hljs-string">&quot;The sky is&quot;</span>, <span class="hljs-string">&quot;The sun is&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;chosen&quot;</span>: [<span class="hljs-string">&quot; blue.&quot;</span>, <span class="hljs-string">&quot;in the sky.&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;rejected&quot;</span>: [<span class="hljs-string">&quot; green.&quot;</span>, <span class="hljs-string">&quot; in the sea.&quot;</span>],
<span class="hljs-meta">... </span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = Dataset.from_dict(dataset_dict)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = unpair_preference_dataset(dataset)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset
Dataset({
features: [<span class="hljs-string">&#x27;prompt&#x27;</span>, <span class="hljs-string">&#x27;completion&#x27;</span>, <span class="hljs-string">&#x27;label&#x27;</span>],
num_rows: <span class="hljs-number">4</span>
})
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>]
{<span class="hljs-string">&#x27;prompt&#x27;</span>: <span class="hljs-string">&#x27;The sky is&#x27;</span>, <span class="hljs-string">&#x27;completion&#x27;</span>: <span class="hljs-string">&#x27; blue.&#x27;</span>, <span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-literal">True</span>}`,wrap:!1}}),{c(){e=T("p"),e.textContent=g,o=r(),M(a.$$.fragment)},l(t){e=U(t,"P",{"data-svelte-h":!0}),$(e)!=="svelte-11lpom8"&&(e.textContent=g),o=c(t),h(a.$$.fragment,t)},m(t,m){i(t,e,m),i(t,o,m),u(a,t,m),p=!0},p:ns,i(t){p||(d(a.$$.fragment,t),p=!0)},o(t){j(a.$$.fragment,t),p=!1},d(t){t&&(l(e),l(o)),y(a,t)}}}function Ls(C){let e,g,o,a,p,t,m,ps,k,os,q,N,Js,F,_s="Check if the example is in a conversational format.",gs,A,rs,Z,cs,f,z,Ts,Y,vs="Convert a conversational dataset with fields <code>from</code> and <code>value</code> to ChatML format.",Us,W,As="This function modifies conversational data to align with OpenAI’s ChatML format:",fs,H,Qs="<li>Replaces the key <code>&quot;from&quot;</code> with <code>&quot;role&quot;</code> in message dictionaries.</li> <li>Replaces the key <code>&quot;value&quot;</code> with <code>&quot;content&quot;</code> in message dictionaries.</li> <li>Renames <code>&quot;conversations&quot;</code> to <code>&quot;messages&quot;</code> for consistency with ChatML.</li>",Is,Q,is,S,ms,I,X,ws,L,Es=`Extracts the shared prompt from a preference data example, where the prompt is implicit within both the chosen and
rejected completions.`,xs,P,Gs=`The function identifies the longest common sequence (prefix) of conversation turns between the “chosen” and
“rejected” completions and extracts this as the prompt. It then removes this prompt from the respective “chosen”
and “rejected” completions.`,Cs,E,$s,G,Ms,R,hs,b,D,qs,K,Bs="Unpair a preference dataset.",bs,B,us,V,ds,ts,js;return p=new Rs({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),m=new es({props:{title:"Data Utilities",local:"data-utilities",headingTag:"h1"}}),k=new es({props:{title:"is_conversational",local:"trl.is_conversational",headingTag:"h2"}}),N=new ys({props:{name:"trl.is_conversational",anchor:"trl.is_conversational",parameters:[{name:"example",val:": dict"}],parametersDescription:[{anchor:"trl.is_conversational.example",description:`<strong>example</strong> (<code>dict[str, Any]</code>) &#x2014;
A single data entry of a dataset. The example can have different keys depending on the dataset type.`,name:"example"}],source:"https://github.com/huggingface/trl/blob/vr_5321/trl/data_utils.py#L159",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>True</code> if the data is in a conversational format, <code>False</code> otherwise.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>bool</code></p>
`}}),A=new as({props:{anchor:"trl.is_conversational.example",$$slots:{default:[Vs]},$$scope:{ctx:C}}}),Z=new es({props:{title:"maybe_convert_to_chatml",local:"trl.maybe_convert_to_chatml",headingTag:"h2"}}),z=new ys({props:{name:"trl.maybe_convert_to_chatml",anchor:"trl.maybe_convert_to_chatml",parameters:[{name:"example",val:": dict"}],parametersDescription:[{anchor:"trl.maybe_convert_to_chatml.example",description:`<strong>example</strong> (<code>dict[str, list]</code>) &#x2014;
A single data entry containing a list of messages.`,name:"example"}],source:"https://github.com/huggingface/trl/blob/vr_5321/trl/data_utils.py#L984",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>Example reformatted to ChatML style.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>dict[str, list]</code></p>
`}}),Q=new as({props:{anchor:"trl.maybe_convert_to_chatml.example",$$slots:{default:[Fs]},$$scope:{ctx:C}}}),S=new es({props:{title:"extract_prompt",local:"trl.extract_prompt",headingTag:"h2"}}),X=new ys({props:{name:"trl.extract_prompt",anchor:"trl.extract_prompt",parameters:[{name:"example",val:": dict"}],parametersDescription:[{anchor:"trl.extract_prompt.example",description:`<strong>example</strong> (<code>dict[str, list]</code>) &#x2014;
A dictionary representing a single data entry in the preference dataset. It must contain the keys
<code>&quot;chosen&quot;</code> and <code>&quot;rejected&quot;</code>, where each value is either conversational or standard (<code>str</code>).`,name:"example"}],source:"https://github.com/huggingface/trl/blob/vr_5321/trl/data_utils.py#L502",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A dictionary containing:</p>
<ul>
<li><code>"prompt"</code>: The longest common prefix between the “chosen” and “rejected” completions.</li>
<li><code>"chosen"</code>: The remainder of the “chosen” completion, with the prompt removed.</li>
<li><code>"rejected"</code>: The remainder of the “rejected” completion, with the prompt removed.</li>
</ul>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>dict[str, list]</code></p>
`}}),E=new as({props:{anchor:"trl.extract_prompt.example",$$slots:{default:[Ys]},$$scope:{ctx:C}}}),G=new as({props:{anchor:"trl.extract_prompt.example-2",$$slots:{default:[Ws]},$$scope:{ctx:C}}}),R=new es({props:{title:"unpair_preference_dataset",local:"trl.unpair_preference_dataset",headingTag:"h2"}}),D=new ys({props:{name:"trl.unpair_preference_dataset",anchor:"trl.unpair_preference_dataset",parameters:[{name:"dataset",val:": ~DatasetType"},{name:"num_proc",val:": int | None = None"},{name:"desc",val:": str | None = None"}],parametersDescription:[{anchor:"trl.unpair_preference_dataset.dataset",description:`<strong>dataset</strong> (<a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset" rel="nofollow">Dataset</a> or <a href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.DatasetDict" rel="nofollow">DatasetDict</a>) &#x2014;
Preference dataset to unpair. The dataset must have columns <code>&quot;chosen&quot;</code>, <code>&quot;rejected&quot;</code> and optionally
<code>&quot;prompt&quot;</code>.`,name:"dataset"},{anchor:"trl.unpair_preference_dataset.num_proc",description:`<strong>num_proc</strong> (<code>int</code>, <em>optional</em>) &#x2014;
Number of processes to use for processing the dataset.`,name:"num_proc"},{anchor:"trl.unpair_preference_dataset.desc",description:`<strong>desc</strong> (<code>str</code>, <em>optional</em>) &#x2014;
Meaningful description to be displayed alongside with the progress bar while mapping examples.`,name:"desc"}],source:"https://github.com/huggingface/trl/blob/vr_5321/trl/data_utils.py#L408",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The unpaired preference dataset.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><a
href="https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset"
rel="nofollow"
>Dataset</a></p>
`}}),B=new as({props:{anchor:"trl.unpair_preference_dataset.example",$$slots:{default:[Hs]},$$scope:{ctx:C}}}),V=new Ds({props:{source:"https://github.com/huggingface/trl/blob/main/docs/source/data_utils.md"}}),{c(){e=T("meta"),g=r(),o=T("p"),a=r(),M(p.$$.fragment),t=r(),M(m.$$.fragment),ps=r(),M(k.$$.fragment),os=r(),q=T("div"),M(N.$$.fragment),Js=r(),F=T("p"),F.textContent=_s,gs=r(),M(A.$$.fragment),rs=r(),M(Z.$$.fragment),cs=r(),f=T("div"),M(z.$$.fragment),Ts=r(),Y=T("p"),Y.innerHTML=vs,Us=r(),W=T("p"),W.textContent=As,fs=r(),H=T("ul"),H.innerHTML=Qs,Is=r(),M(Q.$$.fragment),is=r(),M(S.$$.fragment),ms=r(),I=T("div"),M(X.$$.fragment),ws=r(),L=T("p"),L.textContent=Es,xs=r(),P=T("p"),P.textContent=Gs,Cs=r(),M(E.$$.fragment),$s=r(),M(G.$$.fragment),Ms=r(),M(R.$$.fragment),hs=r(),b=T("div"),M(D.$$.fragment),qs=r(),K=T("p"),K.textContent=Bs,bs=r(),M(B.$$.fragment),us=r(),M(V.$$.fragment),ds=r(),ts=T("p"),this.h()},l(s){const n=Xs("svelte-u9bgzb",document.head);e=U(n,"META",{name:!0,content:!0}),n.forEach(l),g=c(s),o=U(s,"P",{}),O(o).forEach(l),a=c(s),h(p.$$.fragment,s),t=c(s),h(m.$$.fragment,s),ps=c(s),h(k.$$.fragment,s),os=c(s),q=U(s,"DIV",{class:!0});var _=O(q);h(N.$$.fragment,_),Js=c(_),F=U(_,"P",{"data-svelte-h":!0}),$(F)!=="svelte-q00sz7"&&(F.textContent=_s),gs=c(_),h(A.$$.fragment,_),_.forEach(l),rs=c(s),h(Z.$$.fragment,s),cs=c(s),f=U(s,"DIV",{class:!0});var w=O(f);h(z.$$.fragment,w),Ts=c(w),Y=U(w,"P",{"data-svelte-h":!0}),$(Y)!=="svelte-1fnifvu"&&(Y.innerHTML=vs),Us=c(w),W=U(w,"P",{"data-svelte-h":!0}),$(W)!=="svelte-1ibbdsi"&&(W.textContent=As),fs=c(w),H=U(w,"UL",{"data-svelte-h":!0}),$(H)!=="svelte-mbqtod"&&(H.innerHTML=Qs),Is=c(w),h(Q.$$.fragment,w),w.forEach(l),is=c(s),h(S.$$.fragment,s),ms=c(s),I=U(s,"DIV",{class:!0});var x=O(I);h(X.$$.fragment,x),ws=c(x),L=U(x,"P",{"data-svelte-h":!0}),$(L)!=="svelte-1stxkbe"&&(L.textContent=Es),xs=c(x),P=U(x,"P",{"data-svelte-h":!0}),$(P)!=="svelte-13vngma"&&(P.textContent=Gs),Cs=c(x),h(E.$$.fragment,x),$s=c(x),h(G.$$.fragment,x),x.forEach(l),Ms=c(s),h(R.$$.fragment,s),hs=c(s),b=U(s,"DIV",{class:!0});var v=O(b);h(D.$$.fragment,v),qs=c(v),K=U(v,"P",{"data-svelte-h":!0}),$(K)!=="svelte-xsd4cz"&&(K.textContent=Bs),bs=c(v),h(B.$$.fragment,v),v.forEach(l),us=c(s),h(V.$$.fragment,s),ds=c(s),ts=U(s,"P",{}),O(ts).forEach(l),this.h()},h(){ss(e,"name","hf:doc:metadata"),ss(e,"content",Ps),ss(q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),ss(f,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),ss(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),ss(b,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(s,n){J(document.head,e),i(s,g,n),i(s,o,n),i(s,a,n),u(p,s,n),i(s,t,n),u(m,s,n),i(s,ps,n),u(k,s,n),i(s,os,n),i(s,q,n),u(N,q,null),J(q,Js),J(q,F),J(q,gs),u(A,q,null),i(s,rs,n),u(Z,s,n),i(s,cs,n),i(s,f,n),u(z,f,null),J(f,Ts),J(f,Y),J(f,Us),J(f,W),J(f,fs),J(f,H),J(f,Is),u(Q,f,null),i(s,is,n),u(S,s,n),i(s,ms,n),i(s,I,n),u(X,I,null),J(I,ws),J(I,L),J(I,xs),J(I,P),J(I,Cs),u(E,I,null),J(I,$s),u(G,I,null),i(s,Ms,n),u(R,s,n),i(s,hs,n),i(s,b,n),u(D,b,null),J(b,qs),J(b,K),J(b,bs),u(B,b,null),i(s,us,n),u(V,s,n),i(s,ds,n),i(s,ts,n),js=!0},p(s,[n]){const _={};n&2&&(_.$$scope={dirty:n,ctx:s}),A.$set(_);const w={};n&2&&(w.$$scope={dirty:n,ctx:s}),Q.$set(w);const x={};n&2&&(x.$$scope={dirty:n,ctx:s}),E.$set(x);const v={};n&2&&(v.$$scope={dirty:n,ctx:s}),G.$set(v);const ks={};n&2&&(ks.$$scope={dirty:n,ctx:s}),B.$set(ks)},i(s){js||(d(p.$$.fragment,s),d(m.$$.fragment,s),d(k.$$.fragment,s),d(N.$$.fragment,s),d(A.$$.fragment,s),d(Z.$$.fragment,s),d(z.$$.fragment,s),d(Q.$$.fragment,s),d(S.$$.fragment,s),d(X.$$.fragment,s),d(E.$$.fragment,s),d(G.$$.fragment,s),d(R.$$.fragment,s),d(D.$$.fragment,s),d(B.$$.fragment,s),d(V.$$.fragment,s),js=!0)},o(s){j(p.$$.fragment,s),j(m.$$.fragment,s),j(k.$$.fragment,s),j(N.$$.fragment,s),j(A.$$.fragment,s),j(Z.$$.fragment,s),j(z.$$.fragment,s),j(Q.$$.fragment,s),j(S.$$.fragment,s),j(X.$$.fragment,s),j(E.$$.fragment,s),j(G.$$.fragment,s),j(R.$$.fragment,s),j(D.$$.fragment,s),j(B.$$.fragment,s),j(V.$$.fragment,s),js=!1},d(s){s&&(l(g),l(o),l(a),l(t),l(ps),l(os),l(q),l(rs),l(cs),l(f),l(is),l(ms),l(I),l(Ms),l(hs),l(b),l(us),l(ds),l(ts)),l(e),y(p,s),y(m,s),y(k,s),y(N),y(A),y(Z,s),y(z),y(Q),y(S,s),y(X),y(E),y(G),y(R,s),y(D),y(B),y(V,s)}}}const Ps='{"title":"Data Utilities","local":"data-utilities","sections":[{"title":"is_conversational","local":"trl.is_conversational","sections":[],"depth":2},{"title":"maybe_convert_to_chatml","local":"trl.maybe_convert_to_chatml","sections":[],"depth":2},{"title":"extract_prompt","local":"trl.extract_prompt","sections":[],"depth":2},{"title":"unpair_preference_dataset","local":"trl.unpair_preference_dataset","sections":[],"depth":2}],"depth":1}';function Ks(C){return Zs(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class lt extends zs{constructor(e){super(),Ss(this,e,Ks,Ls,Ns,{})}}export{lt as component};

Xet Storage Details

Size:
29.5 kB
·
Xet hash:
8e32a88393dd9dcdec7cd93a432cf83c8d83175f1e8c660fad3f58241be4162d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.