Buckets:

rtrm's picture
download
raw
40.2 kB
import{s as En,o as In,n as O}from"../chunks/scheduler.7c59faff.js";import{S as Zn,i as Dn,e as d,s,c as g,h as Ln,a as m,d as i,b as l,f as W,g as h,j as C,k as a,l as _,m as $,t as v,n as b,o as k,p as y}from"../chunks/index.09bb5655.js";import{C as Vn,H as dt,E as qn}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.e401c7fc.js";import{D as J}from"../chunks/Docstring.aa4c9b58.js";import{C as Be}from"../chunks/CodeBlock.48b6767c.js";import{T as An,M as ln}from"../chunks/TokenizersLanguageContent.0fc17a7a.js";import{E as Je}from"../chunks/ExampleCodeBlock.37eeb49e.js";function Qn(M){let e,p="Example:",o,r,c;return r=new Be({props:{code:"ZnJvbSUyMHRva2VuaXplcnMubW9kZWxzJTIwaW1wb3J0JTIwQlBFJTBBJTIzJTIwQnVpbGQlMjBhbiUyMGVtcHR5JTIwbW9kZWwlMjAodG8lMjBiZSUyMHRyYWluZWQpJTBBbW9kZWwlMjAlM0QlMjBCUEUodW5rX3Rva2VuJTNEJTIyJTNDdW5rJTNFJTIyKSUwQSUyMyUyMExvYWQlMjBmcm9tJTIwdm9jYWJ1bGFyeSUyMGFuZCUyMG1lcmdlcyUyMGZpbGVzJTBBbW9kZWwlMjAlM0QlMjBCUEUuZnJvbV9maWxlKCUyMnZvY2FiLmpzb24lMjIlMkMlMjAlMjJtZXJnZXMudHh0JTIyKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> BPE
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Build an empty model (to be trained)</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>model = BPE(unk_token=<span class="hljs-string">&quot;&lt;unk&gt;&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Load from vocabulary and merges files</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>model = BPE.from_file(<span class="hljs-string">&quot;vocab.json&quot;</span>, <span class="hljs-string">&quot;merges.txt&quot;</span>)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-11lpom8"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Hn(M){let e,p="This method is roughly equivalent to doing:",o,r,c;return r=new Be({props:{code:"dm9jYWIlMkMlMjBtZXJnZXMlMjAlM0QlMjBCUEUucmVhZF9maWxlKHZvY2FiX2ZpbGVuYW1lJTJDJTIwbWVyZ2VzX2ZpbGVuYW1lKSUwQWJwZSUyMCUzRCUyMEJQRSh2b2NhYiUyQyUyMG1lcmdlcyk=",highlighted:`vocab, merges = BPE.read_file(vocab_filename, merges_filename)
bpe = BPE(vocab, merges)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-1d0d6oj"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Sn(M){let e,p="Example:",o,r,c;return r=new Be({props:{code:"ZnJvbSUyMHRva2VuaXplcnMubW9kZWxzJTIwaW1wb3J0JTIwVW5pZ3JhbSUwQSUyMyUyMEJ1aWxkJTIwYW4lMjBlbXB0eSUyMG1vZGVsJTIwKHRvJTIwYmUlMjB0cmFpbmVkKSUwQW1vZGVsJTIwJTNEJTIwVW5pZ3JhbSgpJTBBJTIzJTIwQnVpbGQlMjBmcm9tJTIwYSUyMHZvY2FidWxhcnklMjBsaXN0JTBBdm9jYWIlMjAlM0QlMjAlNUIoJTIyJTNDdW5rJTNFJTIyJTJDJTIwMC4wKSUyQyUyMCglMjJoZWxsbyUyMiUyQyUyMC0xLjApJTJDJTIwKCUyMndvcmxkJTIyJTJDJTIwLTEuNSklNUQlMEFtb2RlbCUyMCUzRCUyMFVuaWdyYW0odm9jYWIlM0R2b2NhYiUyQyUyMHVua19pZCUzRDAp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> Unigram
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Build an empty model (to be trained)</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>model = Unigram()
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Build from a vocabulary list</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>vocab = [(<span class="hljs-string">&quot;&lt;unk&gt;&quot;</span>, <span class="hljs-number">0.0</span>), (<span class="hljs-string">&quot;hello&quot;</span>, -<span class="hljs-number">1.0</span>), (<span class="hljs-string">&quot;world&quot;</span>, -<span class="hljs-number">1.5</span>)]
<span class="hljs-meta">&gt;&gt;&gt; </span>model = Unigram(vocab=vocab, unk_id=<span class="hljs-number">0</span>)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-11lpom8"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Yn(M){let e,p="Example:",o,r,c;return r=new Be({props:{code:"ZnJvbSUyMHRva2VuaXplcnMubW9kZWxzJTIwaW1wb3J0JTIwV29yZExldmVsJTBBJTIzJTIwQnVpbGQlMjBmcm9tJTIwYSUyMHZvY2FidWxhcnklMjBkaWN0aW9uYXJ5JTBBdm9jYWIlMjAlM0QlMjAlN0IlMjJoZWxsbyUyMiUzQSUyMDAlMkMlMjAlMjJ3b3JsZCUyMiUzQSUyMDElMkMlMjAlMjIlM0N1bmslM0UlMjIlM0ElMjAyJTdEJTBBbW9kZWwlMjAlM0QlMjBXb3JkTGV2ZWwodm9jYWIlM0R2b2NhYiUyQyUyMHVua190b2tlbiUzRCUyMiUzQ3VuayUzRSUyMiklMEElMjMlMjBMb2FkJTIwZnJvbSUyMGZpbGUlMEFtb2RlbCUyMCUzRCUyMFdvcmRMZXZlbC5mcm9tX2ZpbGUoJTIydm9jYWIuanNvbiUyMiUyQyUyMHVua190b2tlbiUzRCUyMiUzQ3VuayUzRSUyMik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> WordLevel
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Build from a vocabulary dictionary</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>vocab = {<span class="hljs-string">&quot;hello&quot;</span>: <span class="hljs-number">0</span>, <span class="hljs-string">&quot;world&quot;</span>: <span class="hljs-number">1</span>, <span class="hljs-string">&quot;&lt;unk&gt;&quot;</span>: <span class="hljs-number">2</span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>model = WordLevel(vocab=vocab, unk_token=<span class="hljs-string">&quot;&lt;unk&gt;&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Load from file</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>model = WordLevel.from_file(<span class="hljs-string">&quot;vocab.json&quot;</span>, unk_token=<span class="hljs-string">&quot;&lt;unk&gt;&quot;</span>)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-11lpom8"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Rn(M){let e,p="This method is roughly equivalent to doing:",o,r,c;return r=new Be({props:{code:"dm9jYWIlMjAlM0QlMjBXb3JkTGV2ZWwucmVhZF9maWxlKHZvY2FiX2ZpbGVuYW1lKSUwQXdvcmRsZXZlbCUyMCUzRCUyMFdvcmRMZXZlbCh2b2NhYik=",highlighted:`vocab = WordLevel.read_file(vocab_filename)
wordlevel = WordLevel(vocab)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-1d0d6oj"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Nn(M){let e,p="Example:",o,r,c;return r=new Be({props:{code:"ZnJvbSUyMHRva2VuaXplcnMubW9kZWxzJTIwaW1wb3J0JTIwV29yZFBpZWNlJTBBJTIzJTIwQnVpbGQlMjBhbiUyMGVtcHR5JTIwbW9kZWwlMjAodG8lMjBiZSUyMHRyYWluZWQpJTBBbW9kZWwlMjAlM0QlMjBXb3JkUGllY2UodW5rX3Rva2VuJTNEJTIyJTVCVU5LJTVEJTIyKSUwQSUyMyUyMExvYWQlMjBmcm9tJTIwYSUyMHZvY2FidWxhcnklMjBmaWxlJTBBbW9kZWwlMjAlM0QlMjBXb3JkUGllY2UuZnJvbV9maWxlKCUyMnZvY2FiLnR4dCUyMik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> WordPiece
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Build an empty model (to be trained)</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>model = WordPiece(unk_token=<span class="hljs-string">&quot;[UNK]&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Load from a vocabulary file</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>model = WordPiece.from_file(<span class="hljs-string">&quot;vocab.txt&quot;</span>)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-11lpom8"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Gn(M){let e,p="This method is roughly equivalent to doing:",o,r,c;return r=new Be({props:{code:"dm9jYWIlMjAlM0QlMjBXb3JkUGllY2UucmVhZF9maWxlKHZvY2FiX2ZpbGVuYW1lKSUwQXdvcmRwaWVjZSUyMCUzRCUyMFdvcmRQaWVjZSh2b2NhYik=",highlighted:`vocab = WordPiece.read_file(vocab_filename)
wordpiece = WordPiece(vocab)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-1d0d6oj"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Fn(M){let e,p,o,r,c,t,x="An implementation of the BPE (Byte-Pair Encoding) algorithm",ee,P,te,j,I,G,V,f="Instantiate a BPE model from the given files.",T,q,kt,Ee,dn=`If you don’t need to keep the <code>vocab, merges</code> values lying around,
this method is more optimized than manually calling
<code>read_file()</code> to initialize a <a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.BPE">BPE</a>`,xt,Q,fe,_t,Ie,mn="Read a <code>vocab.json</code> and a <code>merges.txt</code> files",yt,Ze,cn=`This method provides a way to read and parse the content of these files,
returning the relevant data structures. If you want to instantiate some BPE models
from memory, this method gives you the expected input from the standard files.`,mt,ue,ct,w,ge,Mt,De,pn="Base class for all models",wt,Le,fn=`The model represents the actual tokenization algorithm. This is the part that
will contain and manage the learned vocabulary.`,Tt,Ve,un="This class cannot be constructed directly. Please use one of the concrete models.",zt,H,he,jt,qe,gn="Get the associated <code>Trainer</code>",Wt,Ae,hn=`Retrieve the <code>Trainer</code> associated to this
<a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.Model">Model</a>.`,Ct,ne,$e,Pt,Qe,$n="Get the token associated to an ID",Ut,S,ve,Jt,He,vn="Save the current model",Bt,Se,bn=`Save the current model in the given folder, using the given prefix for the various
files that will get created.
Any file with the same name that already exists in this folder will be overwritten.`,Et,oe,be,It,Ye,kn="Get the ID associated to a token",Zt,re,ke,Dt,Re,xn="Tokenize a sequence",pt,xe,ft,E,_e,Lt,Ne,_n="An implementation of the Unigram algorithm",Vt,Ge,yn=`The Unigram algorithm is a subword tokenization algorithm based on unigram language
models, as used in SentencePiece. It learns a vocabulary by starting with a large
initial vocabulary and iteratively pruning it using the EM algorithm.`,qt,ae,ut,ye,gt,U,Me,At,Fe,Mn="An implementation of the WordLevel algorithm",Qt,Xe,wn="Most simple tokenizer model based on mapping tokens to their corresponding id.",Ht,se,St,Z,we,Yt,Ke,Tn="Instantiate a WordLevel model from the given file",Rt,le,Nt,Oe,zn=`If you don’t need to keep the <code>vocab</code> values lying around, this method is
more optimized than manually calling <code>read_file()</code> to
initialize a <a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.WordLevel">WordLevel</a>`,Gt,Y,Te,Ft,et,jn="Read a <code>vocab.json</code>",Xt,tt,Wn=`This method provides a way to read and parse the content of a vocabulary file,
returning the relevant data structures. If you want to instantiate some WordLevel models
from memory, this method gives you the expected input from the standard files.`,ht,ze,$t,B,je,Kt,nt,Cn="An implementation of the WordPiece algorithm",Ot,ie,en,D,We,tn,ot,Pn="Instantiate a WordPiece model from the given file",nn,de,on,rt,Un=`If you don’t need to keep the <code>vocab</code> values lying around, this method is
more optimized than manually calling <code>read_file()</code> to
initialize a <a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.WordPiece">WordPiece</a>`,rn,R,Ce,an,at,Jn="Read a <code>vocab.txt</code> file",sn,st,Bn=`This method provides a way to read and parse the content of a standard <em>vocab.txt</em>
file as used by the WordPiece Model, returning the relevant data structures. If you
want to instantiate some WordPiece models from memory, this method gives you the
expected input from the standard files.`,vt;return e=new dt({props:{title:"BPE",local:"tokenizers.models.BPE",headingTag:"h2"}}),r=new J({props:{name:"class tokenizers.models.BPE",anchor:"tokenizers.models.BPE",parameters:[{name:"vocab",val:" = None"},{name:"merges",val:" = None"},{name:"cache_capacity",val:" = None"},{name:"dropout",val:" = None"},{name:"unk_token",val:" = None"},{name:"continuing_subword_prefix",val:" = None"},{name:"end_of_word_suffix",val:" = None"},{name:"fuse_unk",val:" = None"},{name:"byte_fallback",val:" = False"},{name:"ignore_merges",val:" = False"}],parametersDescription:[{anchor:"tokenizers.models.BPE.vocab",description:`<strong>vocab</strong> (<code>Dict[str, int]</code>, <em>optional</em>) &#x2014;
A dictionary of string keys and their ids <code>{&quot;am&quot;: 0,...}</code>`,name:"vocab"},{anchor:"tokenizers.models.BPE.merges",description:`<strong>merges</strong> (<code>List[Tuple[str, str]]</code>, <em>optional</em>) &#x2014;
A list of pairs of tokens (<code>Tuple[str, str]</code>) <code>[(&quot;a&quot;, &quot;b&quot;),...]</code>`,name:"merges"},{anchor:"tokenizers.models.BPE.cache_capacity",description:`<strong>cache_capacity</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The number of words that the BPE cache can contain. The cache allows
to speed-up the process by keeping the result of the merge operations
for a number of words.`,name:"cache_capacity"},{anchor:"tokenizers.models.BPE.dropout",description:`<strong>dropout</strong> (<code>float</code>, <em>optional</em>) &#x2014;
A float between 0 and 1 that represents the BPE dropout to use.`,name:"dropout"},{anchor:"tokenizers.models.BPE.unk_token",description:`<strong>unk_token</strong> (<code>str</code>, <em>optional</em>) &#x2014;
The unknown token to be used by the model.`,name:"unk_token"},{anchor:"tokenizers.models.BPE.continuing_subword_prefix",description:`<strong>continuing_subword_prefix</strong> (<code>str</code>, <em>optional</em>) &#x2014;
The prefix to attach to subword units that don&#x2019;t represent a beginning of word.`,name:"continuing_subword_prefix"},{anchor:"tokenizers.models.BPE.end_of_word_suffix",description:`<strong>end_of_word_suffix</strong> (<code>str</code>, <em>optional</em>) &#x2014;
The suffix to attach to subword units that represent an end of word.`,name:"end_of_word_suffix"},{anchor:"tokenizers.models.BPE.fuse_unk",description:`<strong>fuse_unk</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether to fuse any subsequent unknown tokens into a single one`,name:"fuse_unk"},{anchor:"tokenizers.models.BPE.byte_fallback",description:`<strong>byte_fallback</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether to use spm byte-fallback trick (defaults to False)`,name:"byte_fallback"},{anchor:"tokenizers.models.BPE.ignore_merges",description:`<strong>ignore_merges</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether or not to match tokens with the vocab before using merges.`,name:"ignore_merges"}]}}),P=new Je({props:{anchor:"tokenizers.models.BPE.example",$$slots:{default:[Qn]},$$scope:{ctx:M}}}),I=new J({props:{name:"from_file",anchor:"tokenizers.models.BPE.from_file",parameters:[{name:"vocab",val:""},{name:"merges",val:""},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"tokenizers.models.BPE.from_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) &#x2014;
The path to a <code>vocab.json</code> file`,name:"vocab"},{anchor:"tokenizers.models.BPE.from_file.merges",description:`<strong>merges</strong> (<code>str</code>) &#x2014;
The path to a <code>merges.txt</code> file`,name:"merges"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>An instance of BPE loaded from these files</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.BPE">BPE</a></p>
`}}),q=new Je({props:{anchor:"tokenizers.models.BPE.from_file.example",$$slots:{default:[Hn]},$$scope:{ctx:M}}}),fe=new J({props:{name:"read_file",anchor:"tokenizers.models.BPE.read_file",parameters:[{name:"vocab",val:""},{name:"merges",val:""}],parametersDescription:[{anchor:"tokenizers.models.BPE.read_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) &#x2014;
The path to a <code>vocab.json</code> file`,name:"vocab"},{anchor:"tokenizers.models.BPE.read_file.merges",description:`<strong>merges</strong> (<code>str</code>) &#x2014;
The path to a <code>merges.txt</code> file`,name:"merges"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The vocabulary and merges loaded into memory</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A <code>Tuple</code> with the vocab and the merges</p>
`}}),ue=new dt({props:{title:"Model",local:"tokenizers.models.Model",headingTag:"h2"}}),ge=new J({props:{name:"class tokenizers.models.Model",anchor:"tokenizers.models.Model",parameters:[]}}),he=new J({props:{name:"get_trainer",anchor:"tokenizers.models.Model.get_trainer",parameters:[],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The Trainer used to train this model</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>Trainer</code></p>
`}}),$e=new J({props:{name:"id_to_token",anchor:"tokenizers.models.Model.id_to_token",parameters:[{name:"id",val:""}],parametersDescription:[{anchor:"tokenizers.models.Model.id_to_token.id",description:`<strong>id</strong> (<code>int</code>) &#x2014;
An ID to convert to a token`,name:"id"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The token associated to the ID</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>str</code></p>
`}}),ve=new J({props:{name:"save",anchor:"tokenizers.models.Model.save",parameters:[{name:"folder",val:""},{name:"prefix",val:""}],parametersDescription:[{anchor:"tokenizers.models.Model.save.folder",description:`<strong>folder</strong> (<code>str</code>) &#x2014;
The path to the target folder in which to save the various files`,name:"folder"},{anchor:"tokenizers.models.Model.save.prefix",description:`<strong>prefix</strong> (<code>str</code>, <em>optional</em>) &#x2014;
An optional prefix, used to prefix each file name`,name:"prefix"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The list of saved files</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>List[str]</code></p>
`}}),be=new J({props:{name:"token_to_id",anchor:"tokenizers.models.Model.token_to_id",parameters:[{name:"tokens",val:""}],parametersDescription:[{anchor:"tokenizers.models.Model.token_to_id.token",description:`<strong>token</strong> (<code>str</code>) &#x2014;
A token to convert to an ID`,name:"token"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The ID associated to the token</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>int</code></p>
`}}),ke=new J({props:{name:"tokenize",anchor:"tokenizers.models.Model.tokenize",parameters:[{name:"sequence",val:""}],parametersDescription:[{anchor:"tokenizers.models.Model.tokenize.sequence",description:`<strong>sequence</strong> (<code>str</code>) &#x2014;
A sequence to tokenize`,name:"sequence"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The generated tokens</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A <code>List</code> of <code>Token</code></p>
`}}),xe=new dt({props:{title:"Unigram",local:"tokenizers.models.Unigram",headingTag:"h2"}}),_e=new J({props:{name:"class tokenizers.models.Unigram",anchor:"tokenizers.models.Unigram",parameters:[{name:"vocab",val:" = None"},{name:"unk_id",val:" = None"},{name:"byte_fallback",val:" = None"}],parametersDescription:[{anchor:"tokenizers.models.Unigram.vocab",description:`<strong>vocab</strong> (<code>List[Tuple[str, float]]</code>, <em>optional</em>) &#x2014;
A list of vocabulary items and their log-probability scores,
e.g. <code>[(&quot;am&quot;, -0.2442), ...]</code>. If not provided, an empty model is created.`,name:"vocab"},{anchor:"tokenizers.models.Unigram.unk_id",description:`<strong>unk_id</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The index of the unknown token in the vocabulary list.`,name:"unk_id"},{anchor:"tokenizers.models.Unigram.byte_fallback",description:`<strong>byte_fallback</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether to use SentencePiece byte fallback for characters not in the vocabulary.`,name:"byte_fallback"}]}}),ae=new Je({props:{anchor:"tokenizers.models.Unigram.example",$$slots:{default:[Sn]},$$scope:{ctx:M}}}),ye=new dt({props:{title:"WordLevel",local:"tokenizers.models.WordLevel",headingTag:"h2"}}),Me=new J({props:{name:"class tokenizers.models.WordLevel",anchor:"tokenizers.models.WordLevel",parameters:[{name:"vocab",val:" = None"},{name:"unk_token",val:" = None"}],parametersDescription:[{anchor:"tokenizers.models.WordLevel.vocab",description:`<strong>vocab</strong> (<code>str</code>, <em>optional</em>) &#x2014;
A dictionary of string keys and their ids <code>{&quot;am&quot;: 0,...}</code>`,name:"vocab"},{anchor:"tokenizers.models.WordLevel.unk_token",description:`<strong>unk_token</strong> (<code>str</code>, <em>optional</em>) &#x2014;
The unknown token to be used by the model.`,name:"unk_token"}]}}),se=new Je({props:{anchor:"tokenizers.models.WordLevel.example",$$slots:{default:[Yn]},$$scope:{ctx:M}}}),we=new J({props:{name:"from_file",anchor:"tokenizers.models.WordLevel.from_file",parameters:[{name:"vocab",val:""},{name:"unk_token",val:" = None"}],parametersDescription:[{anchor:"tokenizers.models.WordLevel.from_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) &#x2014;
The path to a <code>vocab.json</code> file`,name:"vocab"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>An instance of WordLevel loaded from file</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><a
href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.WordLevel"
>WordLevel</a></p>
`}}),le=new Je({props:{anchor:"tokenizers.models.WordLevel.from_file.example",$$slots:{default:[Rn]},$$scope:{ctx:M}}}),Te=new J({props:{name:"read_file",anchor:"tokenizers.models.WordLevel.read_file",parameters:[{name:"vocab",val:""}],parametersDescription:[{anchor:"tokenizers.models.WordLevel.read_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) &#x2014;
The path to a <code>vocab.json</code> file`,name:"vocab"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The vocabulary as a <code>dict</code></p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>Dict[str, int]</code></p>
`}}),ze=new dt({props:{title:"WordPiece",local:"tokenizers.models.WordPiece",headingTag:"h2"}}),je=new J({props:{name:"class tokenizers.models.WordPiece",anchor:"tokenizers.models.WordPiece",parameters:[{name:"vocab",val:" = None"},{name:"unk_token",val:" = '[UNK]'"},{name:"max_input_chars_per_word",val:" = 100"},{name:"continuing_subword_prefix",val:" = '##'"}],parametersDescription:[{anchor:"tokenizers.models.WordPiece.vocab",description:`<strong>vocab</strong> (<code>Dict[str, int]</code>, <em>optional</em>) &#x2014;
A dictionary of string keys and their ids <code>{&quot;am&quot;: 0,...}</code>`,name:"vocab"},{anchor:"tokenizers.models.WordPiece.unk_token",description:`<strong>unk_token</strong> (<code>str</code>, <em>optional</em>) &#x2014;
The unknown token to be used by the model.`,name:"unk_token"},{anchor:"tokenizers.models.WordPiece.max_input_chars_per_word",description:`<strong>max_input_chars_per_word</strong> (<code>int</code>, <em>optional</em>) &#x2014;
The maximum number of characters to authorize in a single word.`,name:"max_input_chars_per_word"}]}}),ie=new Je({props:{anchor:"tokenizers.models.WordPiece.example",$$slots:{default:[Nn]},$$scope:{ctx:M}}}),We=new J({props:{name:"from_file",anchor:"tokenizers.models.WordPiece.from_file",parameters:[{name:"vocab",val:""},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"tokenizers.models.WordPiece.from_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) &#x2014;
The path to a <code>vocab.txt</code> file`,name:"vocab"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>An instance of WordPiece loaded from file</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><a
href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.WordPiece"
>WordPiece</a></p>
`}}),de=new Je({props:{anchor:"tokenizers.models.WordPiece.from_file.example",$$slots:{default:[Gn]},$$scope:{ctx:M}}}),Ce=new J({props:{name:"read_file",anchor:"tokenizers.models.WordPiece.read_file",parameters:[{name:"vocab",val:""}],parametersDescription:[{anchor:"tokenizers.models.WordPiece.read_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) &#x2014;
The path to a <code>vocab.txt</code> file`,name:"vocab"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>The vocabulary as a <code>dict</code></p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>Dict[str, int]</code></p>
`}}),{c(){g(e.$$.fragment),p=s(),o=d("div"),g(r.$$.fragment),c=s(),t=d("p"),t.textContent=x,ee=s(),g(P.$$.fragment),te=s(),j=d("div"),g(I.$$.fragment),G=s(),V=d("p"),V.textContent=f,T=s(),g(q.$$.fragment),kt=s(),Ee=d("p"),Ee.innerHTML=dn,xt=s(),Q=d("div"),g(fe.$$.fragment),_t=s(),Ie=d("p"),Ie.innerHTML=mn,yt=s(),Ze=d("p"),Ze.textContent=cn,mt=s(),g(ue.$$.fragment),ct=s(),w=d("div"),g(ge.$$.fragment),Mt=s(),De=d("p"),De.textContent=pn,wt=s(),Le=d("p"),Le.textContent=fn,Tt=s(),Ve=d("p"),Ve.textContent=un,zt=s(),H=d("div"),g(he.$$.fragment),jt=s(),qe=d("p"),qe.innerHTML=gn,Wt=s(),Ae=d("p"),Ae.innerHTML=hn,Ct=s(),ne=d("div"),g($e.$$.fragment),Pt=s(),Qe=d("p"),Qe.textContent=$n,Ut=s(),S=d("div"),g(ve.$$.fragment),Jt=s(),He=d("p"),He.textContent=vn,Bt=s(),Se=d("p"),Se.textContent=bn,Et=s(),oe=d("div"),g(be.$$.fragment),It=s(),Ye=d("p"),Ye.textContent=kn,Zt=s(),re=d("div"),g(ke.$$.fragment),Dt=s(),Re=d("p"),Re.textContent=xn,pt=s(),g(xe.$$.fragment),ft=s(),E=d("div"),g(_e.$$.fragment),Lt=s(),Ne=d("p"),Ne.textContent=_n,Vt=s(),Ge=d("p"),Ge.textContent=yn,qt=s(),g(ae.$$.fragment),ut=s(),g(ye.$$.fragment),gt=s(),U=d("div"),g(Me.$$.fragment),At=s(),Fe=d("p"),Fe.textContent=Mn,Qt=s(),Xe=d("p"),Xe.textContent=wn,Ht=s(),g(se.$$.fragment),St=s(),Z=d("div"),g(we.$$.fragment),Yt=s(),Ke=d("p"),Ke.textContent=Tn,Rt=s(),g(le.$$.fragment),Nt=s(),Oe=d("p"),Oe.innerHTML=zn,Gt=s(),Y=d("div"),g(Te.$$.fragment),Ft=s(),et=d("p"),et.innerHTML=jn,Xt=s(),tt=d("p"),tt.textContent=Wn,ht=s(),g(ze.$$.fragment),$t=s(),B=d("div"),g(je.$$.fragment),Kt=s(),nt=d("p"),nt.textContent=Cn,Ot=s(),g(ie.$$.fragment),en=s(),D=d("div"),g(We.$$.fragment),tn=s(),ot=d("p"),ot.textContent=Pn,nn=s(),g(de.$$.fragment),on=s(),rt=d("p"),rt.innerHTML=Un,rn=s(),R=d("div"),g(Ce.$$.fragment),an=s(),at=d("p"),at.innerHTML=Jn,sn=s(),st=d("p"),st.innerHTML=Bn,this.h()},l(n){h(e.$$.fragment,n),p=l(n),o=m(n,"DIV",{class:!0});var u=W(o);h(r.$$.fragment,u),c=l(u),t=m(u,"P",{"data-svelte-h":!0}),y(t)!=="svelte-jqu2by"&&(t.textContent=x),ee=l(u),h(P.$$.fragment,u),te=l(u),j=m(u,"DIV",{class:!0});var A=W(j);h(I.$$.fragment,A),G=l(A),V=m(A,"P",{"data-svelte-h":!0}),y(V)!=="svelte-16lw8hq"&&(V.textContent=f),T=l(A),h(q.$$.fragment,A),kt=l(A),Ee=m(A,"P",{"data-svelte-h":!0}),y(Ee)!=="svelte-1dxoty"&&(Ee.innerHTML=dn),A.forEach(i),xt=l(u),Q=m(u,"DIV",{class:!0});var F=W(Q);h(fe.$$.fragment,F),_t=l(F),Ie=m(F,"P",{"data-svelte-h":!0}),y(Ie)!=="svelte-1ij48x8"&&(Ie.innerHTML=mn),yt=l(F),Ze=m(F,"P",{"data-svelte-h":!0}),y(Ze)!=="svelte-1pzh26o"&&(Ze.textContent=cn),F.forEach(i),u.forEach(i),mt=l(n),h(ue.$$.fragment,n),ct=l(n),w=m(n,"DIV",{class:!0});var z=W(w);h(ge.$$.fragment,z),Mt=l(z),De=m(z,"P",{"data-svelte-h":!0}),y(De)!=="svelte-1gdk29l"&&(De.textContent=pn),wt=l(z),Le=m(z,"P",{"data-svelte-h":!0}),y(Le)!=="svelte-12e0a03"&&(Le.textContent=fn),Tt=l(z),Ve=m(z,"P",{"data-svelte-h":!0}),y(Ve)!=="svelte-1mzdlb8"&&(Ve.textContent=un),zt=l(z),H=m(z,"DIV",{class:!0});var X=W(H);h(he.$$.fragment,X),jt=l(X),qe=m(X,"P",{"data-svelte-h":!0}),y(qe)!=="svelte-1qaioy1"&&(qe.innerHTML=gn),Wt=l(X),Ae=m(X,"P",{"data-svelte-h":!0}),y(Ae)!=="svelte-xkw4ns"&&(Ae.innerHTML=hn),X.forEach(i),Ct=l(z),ne=m(z,"DIV",{class:!0});var Pe=W(ne);h($e.$$.fragment,Pe),Pt=l(Pe),Qe=m(Pe,"P",{"data-svelte-h":!0}),y(Qe)!=="svelte-az7nm5"&&(Qe.textContent=$n),Pe.forEach(i),Ut=l(z),S=m(z,"DIV",{class:!0});var K=W(S);h(ve.$$.fragment,K),Jt=l(K),He=m(K,"P",{"data-svelte-h":!0}),y(He)!=="svelte-jcck96"&&(He.textContent=vn),Bt=l(K),Se=m(K,"P",{"data-svelte-h":!0}),y(Se)!=="svelte-nzj26u"&&(Se.textContent=bn),K.forEach(i),Et=l(z),oe=m(z,"DIV",{class:!0});var Ue=W(oe);h(be.$$.fragment,Ue),It=l(Ue),Ye=m(Ue,"P",{"data-svelte-h":!0}),y(Ye)!=="svelte-sshy57"&&(Ye.textContent=kn),Ue.forEach(i),Zt=l(z),re=m(z,"DIV",{class:!0});var bt=W(re);h(ke.$$.fragment,bt),Dt=l(bt),Re=m(bt,"P",{"data-svelte-h":!0}),y(Re)!=="svelte-1oba4lj"&&(Re.textContent=xn),bt.forEach(i),z.forEach(i),pt=l(n),h(xe.$$.fragment,n),ft=l(n),E=m(n,"DIV",{class:!0});var me=W(E);h(_e.$$.fragment,me),Lt=l(me),Ne=m(me,"P",{"data-svelte-h":!0}),y(Ne)!=="svelte-17w8a7l"&&(Ne.textContent=_n),Vt=l(me),Ge=m(me,"P",{"data-svelte-h":!0}),y(Ge)!=="svelte-11dc2z1"&&(Ge.textContent=yn),qt=l(me),h(ae.$$.fragment,me),me.forEach(i),ut=l(n),h(ye.$$.fragment,n),gt=l(n),U=m(n,"DIV",{class:!0});var L=W(U);h(Me.$$.fragment,L),At=l(L),Fe=m(L,"P",{"data-svelte-h":!0}),y(Fe)!=="svelte-182qy7o"&&(Fe.textContent=Mn),Qt=l(L),Xe=m(L,"P",{"data-svelte-h":!0}),y(Xe)!=="svelte-lmtfje"&&(Xe.textContent=wn),Ht=l(L),h(se.$$.fragment,L),St=l(L),Z=m(L,"DIV",{class:!0});var ce=W(Z);h(we.$$.fragment,ce),Yt=l(ce),Ke=m(ce,"P",{"data-svelte-h":!0}),y(Ke)!=="svelte-161r26o"&&(Ke.textContent=Tn),Rt=l(ce),h(le.$$.fragment,ce),Nt=l(ce),Oe=m(ce,"P",{"data-svelte-h":!0}),y(Oe)!=="svelte-17l30mf"&&(Oe.innerHTML=zn),ce.forEach(i),Gt=l(L),Y=m(L,"DIV",{class:!0});var lt=W(Y);h(Te.$$.fragment,lt),Ft=l(lt),et=m(lt,"P",{"data-svelte-h":!0}),y(et)!=="svelte-136ms8v"&&(et.innerHTML=jn),Xt=l(lt),tt=m(lt,"P",{"data-svelte-h":!0}),y(tt)!=="svelte-vd1o34"&&(tt.textContent=Wn),lt.forEach(i),L.forEach(i),ht=l(n),h(ze.$$.fragment,n),$t=l(n),B=m(n,"DIV",{class:!0});var N=W(B);h(je.$$.fragment,N),Kt=l(N),nt=m(N,"P",{"data-svelte-h":!0}),y(nt)!=="svelte-14ugsd0"&&(nt.textContent=Cn),Ot=l(N),h(ie.$$.fragment,N),en=l(N),D=m(N,"DIV",{class:!0});var pe=W(D);h(We.$$.fragment,pe),tn=l(pe),ot=m(pe,"P",{"data-svelte-h":!0}),y(ot)!=="svelte-zpd6cg"&&(ot.textContent=Pn),nn=l(pe),h(de.$$.fragment,pe),on=l(pe),rt=m(pe,"P",{"data-svelte-h":!0}),y(rt)!=="svelte-6vti13"&&(rt.innerHTML=Un),pe.forEach(i),rn=l(N),R=m(N,"DIV",{class:!0});var it=W(R);h(Ce.$$.fragment,it),an=l(it),at=m(it,"P",{"data-svelte-h":!0}),y(at)!=="svelte-bml31z"&&(at.innerHTML=Jn),sn=l(it),st=m(it,"P",{"data-svelte-h":!0}),y(st)!=="svelte-1iix2ev"&&(st.innerHTML=Bn),it.forEach(i),N.forEach(i),this.h()},h(){C(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(o,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(ne,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(oe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(re,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(n,u){$(e,n,u),_(n,p,u),_(n,o,u),$(r,o,null),a(o,c),a(o,t),a(o,ee),$(P,o,null),a(o,te),a(o,j),$(I,j,null),a(j,G),a(j,V),a(j,T),$(q,j,null),a(j,kt),a(j,Ee),a(o,xt),a(o,Q),$(fe,Q,null),a(Q,_t),a(Q,Ie),a(Q,yt),a(Q,Ze),_(n,mt,u),$(ue,n,u),_(n,ct,u),_(n,w,u),$(ge,w,null),a(w,Mt),a(w,De),a(w,wt),a(w,Le),a(w,Tt),a(w,Ve),a(w,zt),a(w,H),$(he,H,null),a(H,jt),a(H,qe),a(H,Wt),a(H,Ae),a(w,Ct),a(w,ne),$($e,ne,null),a(ne,Pt),a(ne,Qe),a(w,Ut),a(w,S),$(ve,S,null),a(S,Jt),a(S,He),a(S,Bt),a(S,Se),a(w,Et),a(w,oe),$(be,oe,null),a(oe,It),a(oe,Ye),a(w,Zt),a(w,re),$(ke,re,null),a(re,Dt),a(re,Re),_(n,pt,u),$(xe,n,u),_(n,ft,u),_(n,E,u),$(_e,E,null),a(E,Lt),a(E,Ne),a(E,Vt),a(E,Ge),a(E,qt),$(ae,E,null),_(n,ut,u),$(ye,n,u),_(n,gt,u),_(n,U,u),$(Me,U,null),a(U,At),a(U,Fe),a(U,Qt),a(U,Xe),a(U,Ht),$(se,U,null),a(U,St),a(U,Z),$(we,Z,null),a(Z,Yt),a(Z,Ke),a(Z,Rt),$(le,Z,null),a(Z,Nt),a(Z,Oe),a(U,Gt),a(U,Y),$(Te,Y,null),a(Y,Ft),a(Y,et),a(Y,Xt),a(Y,tt),_(n,ht,u),$(ze,n,u),_(n,$t,u),_(n,B,u),$(je,B,null),a(B,Kt),a(B,nt),a(B,Ot),$(ie,B,null),a(B,en),a(B,D),$(We,D,null),a(D,tn),a(D,ot),a(D,nn),$(de,D,null),a(D,on),a(D,rt),a(B,rn),a(B,R),$(Ce,R,null),a(R,an),a(R,at),a(R,sn),a(R,st),vt=!0},p(n,u){const A={};u&2&&(A.$$scope={dirty:u,ctx:n}),P.$set(A);const F={};u&2&&(F.$$scope={dirty:u,ctx:n}),q.$set(F);const z={};u&2&&(z.$$scope={dirty:u,ctx:n}),ae.$set(z);const X={};u&2&&(X.$$scope={dirty:u,ctx:n}),se.$set(X);const Pe={};u&2&&(Pe.$$scope={dirty:u,ctx:n}),le.$set(Pe);const K={};u&2&&(K.$$scope={dirty:u,ctx:n}),ie.$set(K);const Ue={};u&2&&(Ue.$$scope={dirty:u,ctx:n}),de.$set(Ue)},i(n){vt||(v(e.$$.fragment,n),v(r.$$.fragment,n),v(P.$$.fragment,n),v(I.$$.fragment,n),v(q.$$.fragment,n),v(fe.$$.fragment,n),v(ue.$$.fragment,n),v(ge.$$.fragment,n),v(he.$$.fragment,n),v($e.$$.fragment,n),v(ve.$$.fragment,n),v(be.$$.fragment,n),v(ke.$$.fragment,n),v(xe.$$.fragment,n),v(_e.$$.fragment,n),v(ae.$$.fragment,n),v(ye.$$.fragment,n),v(Me.$$.fragment,n),v(se.$$.fragment,n),v(we.$$.fragment,n),v(le.$$.fragment,n),v(Te.$$.fragment,n),v(ze.$$.fragment,n),v(je.$$.fragment,n),v(ie.$$.fragment,n),v(We.$$.fragment,n),v(de.$$.fragment,n),v(Ce.$$.fragment,n),vt=!0)},o(n){b(e.$$.fragment,n),b(r.$$.fragment,n),b(P.$$.fragment,n),b(I.$$.fragment,n),b(q.$$.fragment,n),b(fe.$$.fragment,n),b(ue.$$.fragment,n),b(ge.$$.fragment,n),b(he.$$.fragment,n),b($e.$$.fragment,n),b(ve.$$.fragment,n),b(be.$$.fragment,n),b(ke.$$.fragment,n),b(xe.$$.fragment,n),b(_e.$$.fragment,n),b(ae.$$.fragment,n),b(ye.$$.fragment,n),b(Me.$$.fragment,n),b(se.$$.fragment,n),b(we.$$.fragment,n),b(le.$$.fragment,n),b(Te.$$.fragment,n),b(ze.$$.fragment,n),b(je.$$.fragment,n),b(ie.$$.fragment,n),b(We.$$.fragment,n),b(de.$$.fragment,n),b(Ce.$$.fragment,n),vt=!1},d(n){n&&(i(p),i(o),i(mt),i(ct),i(w),i(pt),i(ft),i(E),i(ut),i(gt),i(U),i(ht),i($t),i(B)),k(e,n),k(r),k(P),k(I),k(q),k(fe),k(ue,n),k(ge),k(he),k($e),k(ve),k(be),k(ke),k(xe,n),k(_e),k(ae),k(ye,n),k(Me),k(se),k(we),k(le),k(Te),k(ze,n),k(je),k(ie),k(We),k(de),k(Ce)}}}function Xn(M){let e,p;return e=new ln({props:{$$slots:{default:[Fn]},$$scope:{ctx:M}}}),{c(){g(e.$$.fragment)},l(o){h(e.$$.fragment,o)},m(o,r){$(e,o,r),p=!0},p(o,r){const c={};r&2&&(c.$$scope={dirty:r,ctx:o}),e.$set(c)},i(o){p||(v(e.$$.fragment,o),p=!0)},o(o){b(e.$$.fragment,o),p=!1},d(o){k(e,o)}}}function Kn(M){let e,p='The Rust API Reference is available directly on the <a href="https://docs.rs/tokenizers/latest/tokenizers/" rel="nofollow">Docs.rs</a> website.';return{c(){e=d("p"),e.innerHTML=p},l(o){e=m(o,"P",{"data-svelte-h":!0}),y(e)!=="svelte-4ytcyb"&&(e.innerHTML=p)},m(o,r){_(o,e,r)},p:O,d(o){o&&i(e)}}}function On(M){let e,p;return e=new ln({props:{$$slots:{default:[Kn]},$$scope:{ctx:M}}}),{c(){g(e.$$.fragment)},l(o){h(e.$$.fragment,o)},m(o,r){$(e,o,r),p=!0},p(o,r){const c={};r&2&&(c.$$scope={dirty:r,ctx:o}),e.$set(c)},i(o){p||(v(e.$$.fragment,o),p=!0)},o(o){b(e.$$.fragment,o),p=!1},d(o){k(e,o)}}}function eo(M){let e,p="The node API has not been documented yet.";return{c(){e=d("p"),e.textContent=p},l(o){e=m(o,"P",{"data-svelte-h":!0}),y(e)!=="svelte-1mrchm6"&&(e.textContent=p)},m(o,r){_(o,e,r)},p:O,d(o){o&&i(e)}}}function to(M){let e,p;return e=new ln({props:{$$slots:{default:[eo]},$$scope:{ctx:M}}}),{c(){g(e.$$.fragment)},l(o){h(e.$$.fragment,o)},m(o,r){$(e,o,r),p=!0},p(o,r){const c={};r&2&&(c.$$scope={dirty:r,ctx:o}),e.$set(c)},i(o){p||(v(e.$$.fragment,o),p=!0)},o(o){b(e.$$.fragment,o),p=!1},d(o){k(e,o)}}}function no(M){let e,p,o,r,c,t,x,ee,P,te,j,I,G,V;return c=new Vn({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),x=new dt({props:{title:"Models",local:"models",headingTag:"h1"}}),P=new An({props:{python:!0,rust:!0,node:!0,$$slots:{node:[to],rust:[On],python:[Xn]},$$scope:{ctx:M}}}),j=new qn({props:{source:"https://github.com/huggingface/tokenizers/blob/main/docs/source-doc-builder/api/models.mdx"}}),{c(){e=d("meta"),p=s(),o=d("p"),r=s(),g(c.$$.fragment),t=s(),g(x.$$.fragment),ee=s(),g(P.$$.fragment),te=s(),g(j.$$.fragment),I=s(),G=d("p"),this.h()},l(f){const T=Ln("svelte-u9bgzb",document.head);e=m(T,"META",{name:!0,content:!0}),T.forEach(i),p=l(f),o=m(f,"P",{}),W(o).forEach(i),r=l(f),h(c.$$.fragment,f),t=l(f),h(x.$$.fragment,f),ee=l(f),h(P.$$.fragment,f),te=l(f),h(j.$$.fragment,f),I=l(f),G=m(f,"P",{}),W(G).forEach(i),this.h()},h(){C(e,"name","hf:doc:metadata"),C(e,"content",oo)},m(f,T){a(document.head,e),_(f,p,T),_(f,o,T),_(f,r,T),$(c,f,T),_(f,t,T),$(x,f,T),_(f,ee,T),$(P,f,T),_(f,te,T),$(j,f,T),_(f,I,T),_(f,G,T),V=!0},p(f,[T]){const q={};T&2&&(q.$$scope={dirty:T,ctx:f}),P.$set(q)},i(f){V||(v(c.$$.fragment,f),v(x.$$.fragment,f),v(P.$$.fragment,f),v(j.$$.fragment,f),V=!0)},o(f){b(c.$$.fragment,f),b(x.$$.fragment,f),b(P.$$.fragment,f),b(j.$$.fragment,f),V=!1},d(f){f&&(i(p),i(o),i(r),i(t),i(ee),i(te),i(I),i(G)),i(e),k(c,f),k(x,f),k(P,f),k(j,f)}}}const oo='{"title":"Models","local":"models","sections":[{"title":"BPE","local":"tokenizers.models.BPE","sections":[],"depth":2},{"title":"Model","local":"tokenizers.models.Model","sections":[],"depth":2},{"title":"Unigram","local":"tokenizers.models.Unigram","sections":[],"depth":2},{"title":"WordLevel","local":"tokenizers.models.WordLevel","sections":[],"depth":2},{"title":"WordPiece","local":"tokenizers.models.WordPiece","sections":[],"depth":2}],"depth":1}';function ro(M){return In(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class fo extends Zn{constructor(e){super(),Dn(this,e,ro,no,En,{})}}export{fo as component};

Xet Storage Details

Size:
40.2 kB
·
Xet hash:
27e941efbaf86fd691455b7b08354fe28fcdbbe0ccce2f7f22413a0e9fa56bde

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.