Buckets:
| import{s as En,o as In,n as O}from"../chunks/scheduler.7c59faff.js";import{S as Zn,i as Dn,e as d,s,c as g,h as Ln,a as m,d as i,b as l,f as W,g as h,j as C,k as a,l as _,m as $,t as v,n as b,o as k,p as y}from"../chunks/index.09bb5655.js";import{C as Vn,H as dt,E as qn}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.e401c7fc.js";import{D as J}from"../chunks/Docstring.aa4c9b58.js";import{C as Be}from"../chunks/CodeBlock.48b6767c.js";import{T as An,M as ln}from"../chunks/TokenizersLanguageContent.0fc17a7a.js";import{E as Je}from"../chunks/ExampleCodeBlock.37eeb49e.js";function Qn(M){let e,p="Example:",o,r,c;return r=new Be({props:{code:"ZnJvbSUyMHRva2VuaXplcnMubW9kZWxzJTIwaW1wb3J0JTIwQlBFJTBBJTIzJTIwQnVpbGQlMjBhbiUyMGVtcHR5JTIwbW9kZWwlMjAodG8lMjBiZSUyMHRyYWluZWQpJTBBbW9kZWwlMjAlM0QlMjBCUEUodW5rX3Rva2VuJTNEJTIyJTNDdW5rJTNFJTIyKSUwQSUyMyUyMExvYWQlMjBmcm9tJTIwdm9jYWJ1bGFyeSUyMGFuZCUyMG1lcmdlcyUyMGZpbGVzJTBBbW9kZWwlMjAlM0QlMjBCUEUuZnJvbV9maWxlKCUyMnZvY2FiLmpzb24lMjIlMkMlMjAlMjJtZXJnZXMudHh0JTIyKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> BPE | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Build an empty model (to be trained)</span> | |
| <span class="hljs-meta">>>> </span>model = BPE(unk_token=<span class="hljs-string">"<unk>"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Load from vocabulary and merges files</span> | |
| <span class="hljs-meta">>>> </span>model = BPE.from_file(<span class="hljs-string">"vocab.json"</span>, <span class="hljs-string">"merges.txt"</span>)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-11lpom8"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Hn(M){let e,p="This method is roughly equivalent to doing:",o,r,c;return r=new Be({props:{code:"dm9jYWIlMkMlMjBtZXJnZXMlMjAlM0QlMjBCUEUucmVhZF9maWxlKHZvY2FiX2ZpbGVuYW1lJTJDJTIwbWVyZ2VzX2ZpbGVuYW1lKSUwQWJwZSUyMCUzRCUyMEJQRSh2b2NhYiUyQyUyMG1lcmdlcyk=",highlighted:`vocab, merges = BPE.read_file(vocab_filename, merges_filename) | |
| bpe = BPE(vocab, merges)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-1d0d6oj"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Sn(M){let e,p="Example:",o,r,c;return r=new Be({props:{code:"ZnJvbSUyMHRva2VuaXplcnMubW9kZWxzJTIwaW1wb3J0JTIwVW5pZ3JhbSUwQSUyMyUyMEJ1aWxkJTIwYW4lMjBlbXB0eSUyMG1vZGVsJTIwKHRvJTIwYmUlMjB0cmFpbmVkKSUwQW1vZGVsJTIwJTNEJTIwVW5pZ3JhbSgpJTBBJTIzJTIwQnVpbGQlMjBmcm9tJTIwYSUyMHZvY2FidWxhcnklMjBsaXN0JTBBdm9jYWIlMjAlM0QlMjAlNUIoJTIyJTNDdW5rJTNFJTIyJTJDJTIwMC4wKSUyQyUyMCglMjJoZWxsbyUyMiUyQyUyMC0xLjApJTJDJTIwKCUyMndvcmxkJTIyJTJDJTIwLTEuNSklNUQlMEFtb2RlbCUyMCUzRCUyMFVuaWdyYW0odm9jYWIlM0R2b2NhYiUyQyUyMHVua19pZCUzRDAp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> Unigram | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Build an empty model (to be trained)</span> | |
| <span class="hljs-meta">>>> </span>model = Unigram() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Build from a vocabulary list</span> | |
| <span class="hljs-meta">>>> </span>vocab = [(<span class="hljs-string">"<unk>"</span>, <span class="hljs-number">0.0</span>), (<span class="hljs-string">"hello"</span>, -<span class="hljs-number">1.0</span>), (<span class="hljs-string">"world"</span>, -<span class="hljs-number">1.5</span>)] | |
| <span class="hljs-meta">>>> </span>model = Unigram(vocab=vocab, unk_id=<span class="hljs-number">0</span>)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-11lpom8"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Yn(M){let e,p="Example:",o,r,c;return r=new Be({props:{code:"ZnJvbSUyMHRva2VuaXplcnMubW9kZWxzJTIwaW1wb3J0JTIwV29yZExldmVsJTBBJTIzJTIwQnVpbGQlMjBmcm9tJTIwYSUyMHZvY2FidWxhcnklMjBkaWN0aW9uYXJ5JTBBdm9jYWIlMjAlM0QlMjAlN0IlMjJoZWxsbyUyMiUzQSUyMDAlMkMlMjAlMjJ3b3JsZCUyMiUzQSUyMDElMkMlMjAlMjIlM0N1bmslM0UlMjIlM0ElMjAyJTdEJTBBbW9kZWwlMjAlM0QlMjBXb3JkTGV2ZWwodm9jYWIlM0R2b2NhYiUyQyUyMHVua190b2tlbiUzRCUyMiUzQ3VuayUzRSUyMiklMEElMjMlMjBMb2FkJTIwZnJvbSUyMGZpbGUlMEFtb2RlbCUyMCUzRCUyMFdvcmRMZXZlbC5mcm9tX2ZpbGUoJTIydm9jYWIuanNvbiUyMiUyQyUyMHVua190b2tlbiUzRCUyMiUzQ3VuayUzRSUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> WordLevel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Build from a vocabulary dictionary</span> | |
| <span class="hljs-meta">>>> </span>vocab = {<span class="hljs-string">"hello"</span>: <span class="hljs-number">0</span>, <span class="hljs-string">"world"</span>: <span class="hljs-number">1</span>, <span class="hljs-string">"<unk>"</span>: <span class="hljs-number">2</span>} | |
| <span class="hljs-meta">>>> </span>model = WordLevel(vocab=vocab, unk_token=<span class="hljs-string">"<unk>"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Load from file</span> | |
| <span class="hljs-meta">>>> </span>model = WordLevel.from_file(<span class="hljs-string">"vocab.json"</span>, unk_token=<span class="hljs-string">"<unk>"</span>)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-11lpom8"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Rn(M){let e,p="This method is roughly equivalent to doing:",o,r,c;return r=new Be({props:{code:"dm9jYWIlMjAlM0QlMjBXb3JkTGV2ZWwucmVhZF9maWxlKHZvY2FiX2ZpbGVuYW1lKSUwQXdvcmRsZXZlbCUyMCUzRCUyMFdvcmRMZXZlbCh2b2NhYik=",highlighted:`vocab = WordLevel.read_file(vocab_filename) | |
| wordlevel = WordLevel(vocab)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-1d0d6oj"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Nn(M){let e,p="Example:",o,r,c;return r=new Be({props:{code:"ZnJvbSUyMHRva2VuaXplcnMubW9kZWxzJTIwaW1wb3J0JTIwV29yZFBpZWNlJTBBJTIzJTIwQnVpbGQlMjBhbiUyMGVtcHR5JTIwbW9kZWwlMjAodG8lMjBiZSUyMHRyYWluZWQpJTBBbW9kZWwlMjAlM0QlMjBXb3JkUGllY2UodW5rX3Rva2VuJTNEJTIyJTVCVU5LJTVEJTIyKSUwQSUyMyUyMExvYWQlMjBmcm9tJTIwYSUyMHZvY2FidWxhcnklMjBmaWxlJTBBbW9kZWwlMjAlM0QlMjBXb3JkUGllY2UuZnJvbV9maWxlKCUyMnZvY2FiLnR4dCUyMik=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> WordPiece | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Build an empty model (to be trained)</span> | |
| <span class="hljs-meta">>>> </span>model = WordPiece(unk_token=<span class="hljs-string">"[UNK]"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Load from a vocabulary file</span> | |
| <span class="hljs-meta">>>> </span>model = WordPiece.from_file(<span class="hljs-string">"vocab.txt"</span>)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-11lpom8"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Gn(M){let e,p="This method is roughly equivalent to doing:",o,r,c;return r=new Be({props:{code:"dm9jYWIlMjAlM0QlMjBXb3JkUGllY2UucmVhZF9maWxlKHZvY2FiX2ZpbGVuYW1lKSUwQXdvcmRwaWVjZSUyMCUzRCUyMFdvcmRQaWVjZSh2b2NhYik=",highlighted:`vocab = WordPiece.read_file(vocab_filename) | |
| wordpiece = WordPiece(vocab)`,wrap:!1}}),{c(){e=d("p"),e.textContent=p,o=s(),g(r.$$.fragment)},l(t){e=m(t,"P",{"data-svelte-h":!0}),y(e)!=="svelte-1d0d6oj"&&(e.textContent=p),o=l(t),h(r.$$.fragment,t)},m(t,x){_(t,e,x),_(t,o,x),$(r,t,x),c=!0},p:O,i(t){c||(v(r.$$.fragment,t),c=!0)},o(t){b(r.$$.fragment,t),c=!1},d(t){t&&(i(e),i(o)),k(r,t)}}}function Fn(M){let e,p,o,r,c,t,x="An implementation of the BPE (Byte-Pair Encoding) algorithm",ee,P,te,j,I,G,V,f="Instantiate a BPE model from the given files.",T,q,kt,Ee,dn=`If you don’t need to keep the <code>vocab, merges</code> values lying around, | |
| this method is more optimized than manually calling | |
| <code>read_file()</code> to initialize a <a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.BPE">BPE</a>`,xt,Q,fe,_t,Ie,mn="Read a <code>vocab.json</code> and a <code>merges.txt</code> files",yt,Ze,cn=`This method provides a way to read and parse the content of these files, | |
| returning the relevant data structures. If you want to instantiate some BPE models | |
| from memory, this method gives you the expected input from the standard files.`,mt,ue,ct,w,ge,Mt,De,pn="Base class for all models",wt,Le,fn=`The model represents the actual tokenization algorithm. This is the part that | |
| will contain and manage the learned vocabulary.`,Tt,Ve,un="This class cannot be constructed directly. Please use one of the concrete models.",zt,H,he,jt,qe,gn="Get the associated <code>Trainer</code>",Wt,Ae,hn=`Retrieve the <code>Trainer</code> associated to this | |
| <a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.Model">Model</a>.`,Ct,ne,$e,Pt,Qe,$n="Get the token associated to an ID",Ut,S,ve,Jt,He,vn="Save the current model",Bt,Se,bn=`Save the current model in the given folder, using the given prefix for the various | |
| files that will get created. | |
| Any file with the same name that already exists in this folder will be overwritten.`,Et,oe,be,It,Ye,kn="Get the ID associated to a token",Zt,re,ke,Dt,Re,xn="Tokenize a sequence",pt,xe,ft,E,_e,Lt,Ne,_n="An implementation of the Unigram algorithm",Vt,Ge,yn=`The Unigram algorithm is a subword tokenization algorithm based on unigram language | |
| models, as used in SentencePiece. It learns a vocabulary by starting with a large | |
| initial vocabulary and iteratively pruning it using the EM algorithm.`,qt,ae,ut,ye,gt,U,Me,At,Fe,Mn="An implementation of the WordLevel algorithm",Qt,Xe,wn="Most simple tokenizer model based on mapping tokens to their corresponding id.",Ht,se,St,Z,we,Yt,Ke,Tn="Instantiate a WordLevel model from the given file",Rt,le,Nt,Oe,zn=`If you don’t need to keep the <code>vocab</code> values lying around, this method is | |
| more optimized than manually calling <code>read_file()</code> to | |
| initialize a <a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.WordLevel">WordLevel</a>`,Gt,Y,Te,Ft,et,jn="Read a <code>vocab.json</code>",Xt,tt,Wn=`This method provides a way to read and parse the content of a vocabulary file, | |
| returning the relevant data structures. If you want to instantiate some WordLevel models | |
| from memory, this method gives you the expected input from the standard files.`,ht,ze,$t,B,je,Kt,nt,Cn="An implementation of the WordPiece algorithm",Ot,ie,en,D,We,tn,ot,Pn="Instantiate a WordPiece model from the given file",nn,de,on,rt,Un=`If you don’t need to keep the <code>vocab</code> values lying around, this method is | |
| more optimized than manually calling <code>read_file()</code> to | |
| initialize a <a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.WordPiece">WordPiece</a>`,rn,R,Ce,an,at,Jn="Read a <code>vocab.txt</code> file",sn,st,Bn=`This method provides a way to read and parse the content of a standard <em>vocab.txt</em> | |
| file as used by the WordPiece Model, returning the relevant data structures. If you | |
| want to instantiate some WordPiece models from memory, this method gives you the | |
| expected input from the standard files.`,vt;return e=new dt({props:{title:"BPE",local:"tokenizers.models.BPE",headingTag:"h2"}}),r=new J({props:{name:"class tokenizers.models.BPE",anchor:"tokenizers.models.BPE",parameters:[{name:"vocab",val:" = None"},{name:"merges",val:" = None"},{name:"cache_capacity",val:" = None"},{name:"dropout",val:" = None"},{name:"unk_token",val:" = None"},{name:"continuing_subword_prefix",val:" = None"},{name:"end_of_word_suffix",val:" = None"},{name:"fuse_unk",val:" = None"},{name:"byte_fallback",val:" = False"},{name:"ignore_merges",val:" = False"}],parametersDescription:[{anchor:"tokenizers.models.BPE.vocab",description:`<strong>vocab</strong> (<code>Dict[str, int]</code>, <em>optional</em>) — | |
| A dictionary of string keys and their ids <code>{"am": 0,...}</code>`,name:"vocab"},{anchor:"tokenizers.models.BPE.merges",description:`<strong>merges</strong> (<code>List[Tuple[str, str]]</code>, <em>optional</em>) — | |
| A list of pairs of tokens (<code>Tuple[str, str]</code>) <code>[("a", "b"),...]</code>`,name:"merges"},{anchor:"tokenizers.models.BPE.cache_capacity",description:`<strong>cache_capacity</strong> (<code>int</code>, <em>optional</em>) — | |
| The number of words that the BPE cache can contain. The cache allows | |
| to speed-up the process by keeping the result of the merge operations | |
| for a number of words.`,name:"cache_capacity"},{anchor:"tokenizers.models.BPE.dropout",description:`<strong>dropout</strong> (<code>float</code>, <em>optional</em>) — | |
| A float between 0 and 1 that represents the BPE dropout to use.`,name:"dropout"},{anchor:"tokenizers.models.BPE.unk_token",description:`<strong>unk_token</strong> (<code>str</code>, <em>optional</em>) — | |
| The unknown token to be used by the model.`,name:"unk_token"},{anchor:"tokenizers.models.BPE.continuing_subword_prefix",description:`<strong>continuing_subword_prefix</strong> (<code>str</code>, <em>optional</em>) — | |
| The prefix to attach to subword units that don’t represent a beginning of word.`,name:"continuing_subword_prefix"},{anchor:"tokenizers.models.BPE.end_of_word_suffix",description:`<strong>end_of_word_suffix</strong> (<code>str</code>, <em>optional</em>) — | |
| The suffix to attach to subword units that represent an end of word.`,name:"end_of_word_suffix"},{anchor:"tokenizers.models.BPE.fuse_unk",description:`<strong>fuse_unk</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether to fuse any subsequent unknown tokens into a single one`,name:"fuse_unk"},{anchor:"tokenizers.models.BPE.byte_fallback",description:`<strong>byte_fallback</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether to use spm byte-fallback trick (defaults to False)`,name:"byte_fallback"},{anchor:"tokenizers.models.BPE.ignore_merges",description:`<strong>ignore_merges</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to match tokens with the vocab before using merges.`,name:"ignore_merges"}]}}),P=new Je({props:{anchor:"tokenizers.models.BPE.example",$$slots:{default:[Qn]},$$scope:{ctx:M}}}),I=new J({props:{name:"from_file",anchor:"tokenizers.models.BPE.from_file",parameters:[{name:"vocab",val:""},{name:"merges",val:""},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"tokenizers.models.BPE.from_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) — | |
| The path to a <code>vocab.json</code> file`,name:"vocab"},{anchor:"tokenizers.models.BPE.from_file.merges",description:`<strong>merges</strong> (<code>str</code>) — | |
| The path to a <code>merges.txt</code> file`,name:"merges"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>An instance of BPE loaded from these files</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.BPE">BPE</a></p> | |
| `}}),q=new Je({props:{anchor:"tokenizers.models.BPE.from_file.example",$$slots:{default:[Hn]},$$scope:{ctx:M}}}),fe=new J({props:{name:"read_file",anchor:"tokenizers.models.BPE.read_file",parameters:[{name:"vocab",val:""},{name:"merges",val:""}],parametersDescription:[{anchor:"tokenizers.models.BPE.read_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) — | |
| The path to a <code>vocab.json</code> file`,name:"vocab"},{anchor:"tokenizers.models.BPE.read_file.merges",description:`<strong>merges</strong> (<code>str</code>) — | |
| The path to a <code>merges.txt</code> file`,name:"merges"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The vocabulary and merges loaded into memory</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>Tuple</code> with the vocab and the merges</p> | |
| `}}),ue=new dt({props:{title:"Model",local:"tokenizers.models.Model",headingTag:"h2"}}),ge=new J({props:{name:"class tokenizers.models.Model",anchor:"tokenizers.models.Model",parameters:[]}}),he=new J({props:{name:"get_trainer",anchor:"tokenizers.models.Model.get_trainer",parameters:[],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The Trainer used to train this model</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>Trainer</code></p> | |
| `}}),$e=new J({props:{name:"id_to_token",anchor:"tokenizers.models.Model.id_to_token",parameters:[{name:"id",val:""}],parametersDescription:[{anchor:"tokenizers.models.Model.id_to_token.id",description:`<strong>id</strong> (<code>int</code>) — | |
| An ID to convert to a token`,name:"id"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The token associated to the ID</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>str</code></p> | |
| `}}),ve=new J({props:{name:"save",anchor:"tokenizers.models.Model.save",parameters:[{name:"folder",val:""},{name:"prefix",val:""}],parametersDescription:[{anchor:"tokenizers.models.Model.save.folder",description:`<strong>folder</strong> (<code>str</code>) — | |
| The path to the target folder in which to save the various files`,name:"folder"},{anchor:"tokenizers.models.Model.save.prefix",description:`<strong>prefix</strong> (<code>str</code>, <em>optional</em>) — | |
| An optional prefix, used to prefix each file name`,name:"prefix"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The list of saved files</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[str]</code></p> | |
| `}}),be=new J({props:{name:"token_to_id",anchor:"tokenizers.models.Model.token_to_id",parameters:[{name:"tokens",val:""}],parametersDescription:[{anchor:"tokenizers.models.Model.token_to_id.token",description:`<strong>token</strong> (<code>str</code>) — | |
| A token to convert to an ID`,name:"token"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The ID associated to the token</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>int</code></p> | |
| `}}),ke=new J({props:{name:"tokenize",anchor:"tokenizers.models.Model.tokenize",parameters:[{name:"sequence",val:""}],parametersDescription:[{anchor:"tokenizers.models.Model.tokenize.sequence",description:`<strong>sequence</strong> (<code>str</code>) — | |
| A sequence to tokenize`,name:"sequence"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The generated tokens</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <code>List</code> of <code>Token</code></p> | |
| `}}),xe=new dt({props:{title:"Unigram",local:"tokenizers.models.Unigram",headingTag:"h2"}}),_e=new J({props:{name:"class tokenizers.models.Unigram",anchor:"tokenizers.models.Unigram",parameters:[{name:"vocab",val:" = None"},{name:"unk_id",val:" = None"},{name:"byte_fallback",val:" = None"}],parametersDescription:[{anchor:"tokenizers.models.Unigram.vocab",description:`<strong>vocab</strong> (<code>List[Tuple[str, float]]</code>, <em>optional</em>) — | |
| A list of vocabulary items and their log-probability scores, | |
| e.g. <code>[("am", -0.2442), ...]</code>. If not provided, an empty model is created.`,name:"vocab"},{anchor:"tokenizers.models.Unigram.unk_id",description:`<strong>unk_id</strong> (<code>int</code>, <em>optional</em>) — | |
| The index of the unknown token in the vocabulary list.`,name:"unk_id"},{anchor:"tokenizers.models.Unigram.byte_fallback",description:`<strong>byte_fallback</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to use SentencePiece byte fallback for characters not in the vocabulary.`,name:"byte_fallback"}]}}),ae=new Je({props:{anchor:"tokenizers.models.Unigram.example",$$slots:{default:[Sn]},$$scope:{ctx:M}}}),ye=new dt({props:{title:"WordLevel",local:"tokenizers.models.WordLevel",headingTag:"h2"}}),Me=new J({props:{name:"class tokenizers.models.WordLevel",anchor:"tokenizers.models.WordLevel",parameters:[{name:"vocab",val:" = None"},{name:"unk_token",val:" = None"}],parametersDescription:[{anchor:"tokenizers.models.WordLevel.vocab",description:`<strong>vocab</strong> (<code>str</code>, <em>optional</em>) — | |
| A dictionary of string keys and their ids <code>{"am": 0,...}</code>`,name:"vocab"},{anchor:"tokenizers.models.WordLevel.unk_token",description:`<strong>unk_token</strong> (<code>str</code>, <em>optional</em>) — | |
| The unknown token to be used by the model.`,name:"unk_token"}]}}),se=new Je({props:{anchor:"tokenizers.models.WordLevel.example",$$slots:{default:[Yn]},$$scope:{ctx:M}}}),we=new J({props:{name:"from_file",anchor:"tokenizers.models.WordLevel.from_file",parameters:[{name:"vocab",val:""},{name:"unk_token",val:" = None"}],parametersDescription:[{anchor:"tokenizers.models.WordLevel.from_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) — | |
| The path to a <code>vocab.json</code> file`,name:"vocab"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>An instance of WordLevel loaded from file</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.WordLevel" | |
| >WordLevel</a></p> | |
| `}}),le=new Je({props:{anchor:"tokenizers.models.WordLevel.from_file.example",$$slots:{default:[Rn]},$$scope:{ctx:M}}}),Te=new J({props:{name:"read_file",anchor:"tokenizers.models.WordLevel.read_file",parameters:[{name:"vocab",val:""}],parametersDescription:[{anchor:"tokenizers.models.WordLevel.read_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) — | |
| The path to a <code>vocab.json</code> file`,name:"vocab"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The vocabulary as a <code>dict</code></p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>Dict[str, int]</code></p> | |
| `}}),ze=new dt({props:{title:"WordPiece",local:"tokenizers.models.WordPiece",headingTag:"h2"}}),je=new J({props:{name:"class tokenizers.models.WordPiece",anchor:"tokenizers.models.WordPiece",parameters:[{name:"vocab",val:" = None"},{name:"unk_token",val:" = '[UNK]'"},{name:"max_input_chars_per_word",val:" = 100"},{name:"continuing_subword_prefix",val:" = '##'"}],parametersDescription:[{anchor:"tokenizers.models.WordPiece.vocab",description:`<strong>vocab</strong> (<code>Dict[str, int]</code>, <em>optional</em>) — | |
| A dictionary of string keys and their ids <code>{"am": 0,...}</code>`,name:"vocab"},{anchor:"tokenizers.models.WordPiece.unk_token",description:`<strong>unk_token</strong> (<code>str</code>, <em>optional</em>) — | |
| The unknown token to be used by the model.`,name:"unk_token"},{anchor:"tokenizers.models.WordPiece.max_input_chars_per_word",description:`<strong>max_input_chars_per_word</strong> (<code>int</code>, <em>optional</em>) — | |
| The maximum number of characters to authorize in a single word.`,name:"max_input_chars_per_word"}]}}),ie=new Je({props:{anchor:"tokenizers.models.WordPiece.example",$$slots:{default:[Nn]},$$scope:{ctx:M}}}),We=new J({props:{name:"from_file",anchor:"tokenizers.models.WordPiece.from_file",parameters:[{name:"vocab",val:""},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"tokenizers.models.WordPiece.from_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) — | |
| The path to a <code>vocab.txt</code> file`,name:"vocab"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>An instance of WordPiece loaded from file</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/tokenizers/pr_2001/en/api/models#tokenizers.models.WordPiece" | |
| >WordPiece</a></p> | |
| `}}),de=new Je({props:{anchor:"tokenizers.models.WordPiece.from_file.example",$$slots:{default:[Gn]},$$scope:{ctx:M}}}),Ce=new J({props:{name:"read_file",anchor:"tokenizers.models.WordPiece.read_file",parameters:[{name:"vocab",val:""}],parametersDescription:[{anchor:"tokenizers.models.WordPiece.read_file.vocab",description:`<strong>vocab</strong> (<code>str</code>) — | |
| The path to a <code>vocab.txt</code> file`,name:"vocab"}],returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>The vocabulary as a <code>dict</code></p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>Dict[str, int]</code></p> | |
| `}}),{c(){g(e.$$.fragment),p=s(),o=d("div"),g(r.$$.fragment),c=s(),t=d("p"),t.textContent=x,ee=s(),g(P.$$.fragment),te=s(),j=d("div"),g(I.$$.fragment),G=s(),V=d("p"),V.textContent=f,T=s(),g(q.$$.fragment),kt=s(),Ee=d("p"),Ee.innerHTML=dn,xt=s(),Q=d("div"),g(fe.$$.fragment),_t=s(),Ie=d("p"),Ie.innerHTML=mn,yt=s(),Ze=d("p"),Ze.textContent=cn,mt=s(),g(ue.$$.fragment),ct=s(),w=d("div"),g(ge.$$.fragment),Mt=s(),De=d("p"),De.textContent=pn,wt=s(),Le=d("p"),Le.textContent=fn,Tt=s(),Ve=d("p"),Ve.textContent=un,zt=s(),H=d("div"),g(he.$$.fragment),jt=s(),qe=d("p"),qe.innerHTML=gn,Wt=s(),Ae=d("p"),Ae.innerHTML=hn,Ct=s(),ne=d("div"),g($e.$$.fragment),Pt=s(),Qe=d("p"),Qe.textContent=$n,Ut=s(),S=d("div"),g(ve.$$.fragment),Jt=s(),He=d("p"),He.textContent=vn,Bt=s(),Se=d("p"),Se.textContent=bn,Et=s(),oe=d("div"),g(be.$$.fragment),It=s(),Ye=d("p"),Ye.textContent=kn,Zt=s(),re=d("div"),g(ke.$$.fragment),Dt=s(),Re=d("p"),Re.textContent=xn,pt=s(),g(xe.$$.fragment),ft=s(),E=d("div"),g(_e.$$.fragment),Lt=s(),Ne=d("p"),Ne.textContent=_n,Vt=s(),Ge=d("p"),Ge.textContent=yn,qt=s(),g(ae.$$.fragment),ut=s(),g(ye.$$.fragment),gt=s(),U=d("div"),g(Me.$$.fragment),At=s(),Fe=d("p"),Fe.textContent=Mn,Qt=s(),Xe=d("p"),Xe.textContent=wn,Ht=s(),g(se.$$.fragment),St=s(),Z=d("div"),g(we.$$.fragment),Yt=s(),Ke=d("p"),Ke.textContent=Tn,Rt=s(),g(le.$$.fragment),Nt=s(),Oe=d("p"),Oe.innerHTML=zn,Gt=s(),Y=d("div"),g(Te.$$.fragment),Ft=s(),et=d("p"),et.innerHTML=jn,Xt=s(),tt=d("p"),tt.textContent=Wn,ht=s(),g(ze.$$.fragment),$t=s(),B=d("div"),g(je.$$.fragment),Kt=s(),nt=d("p"),nt.textContent=Cn,Ot=s(),g(ie.$$.fragment),en=s(),D=d("div"),g(We.$$.fragment),tn=s(),ot=d("p"),ot.textContent=Pn,nn=s(),g(de.$$.fragment),on=s(),rt=d("p"),rt.innerHTML=Un,rn=s(),R=d("div"),g(Ce.$$.fragment),an=s(),at=d("p"),at.innerHTML=Jn,sn=s(),st=d("p"),st.innerHTML=Bn,this.h()},l(n){h(e.$$.fragment,n),p=l(n),o=m(n,"DIV",{class:!0});var u=W(o);h(r.$$.fragment,u),c=l(u),t=m(u,"P",{"data-svelte-h":!0}),y(t)!=="svelte-jqu2by"&&(t.textContent=x),ee=l(u),h(P.$$.fragment,u),te=l(u),j=m(u,"DIV",{class:!0});var A=W(j);h(I.$$.fragment,A),G=l(A),V=m(A,"P",{"data-svelte-h":!0}),y(V)!=="svelte-16lw8hq"&&(V.textContent=f),T=l(A),h(q.$$.fragment,A),kt=l(A),Ee=m(A,"P",{"data-svelte-h":!0}),y(Ee)!=="svelte-1dxoty"&&(Ee.innerHTML=dn),A.forEach(i),xt=l(u),Q=m(u,"DIV",{class:!0});var F=W(Q);h(fe.$$.fragment,F),_t=l(F),Ie=m(F,"P",{"data-svelte-h":!0}),y(Ie)!=="svelte-1ij48x8"&&(Ie.innerHTML=mn),yt=l(F),Ze=m(F,"P",{"data-svelte-h":!0}),y(Ze)!=="svelte-1pzh26o"&&(Ze.textContent=cn),F.forEach(i),u.forEach(i),mt=l(n),h(ue.$$.fragment,n),ct=l(n),w=m(n,"DIV",{class:!0});var z=W(w);h(ge.$$.fragment,z),Mt=l(z),De=m(z,"P",{"data-svelte-h":!0}),y(De)!=="svelte-1gdk29l"&&(De.textContent=pn),wt=l(z),Le=m(z,"P",{"data-svelte-h":!0}),y(Le)!=="svelte-12e0a03"&&(Le.textContent=fn),Tt=l(z),Ve=m(z,"P",{"data-svelte-h":!0}),y(Ve)!=="svelte-1mzdlb8"&&(Ve.textContent=un),zt=l(z),H=m(z,"DIV",{class:!0});var X=W(H);h(he.$$.fragment,X),jt=l(X),qe=m(X,"P",{"data-svelte-h":!0}),y(qe)!=="svelte-1qaioy1"&&(qe.innerHTML=gn),Wt=l(X),Ae=m(X,"P",{"data-svelte-h":!0}),y(Ae)!=="svelte-xkw4ns"&&(Ae.innerHTML=hn),X.forEach(i),Ct=l(z),ne=m(z,"DIV",{class:!0});var Pe=W(ne);h($e.$$.fragment,Pe),Pt=l(Pe),Qe=m(Pe,"P",{"data-svelte-h":!0}),y(Qe)!=="svelte-az7nm5"&&(Qe.textContent=$n),Pe.forEach(i),Ut=l(z),S=m(z,"DIV",{class:!0});var K=W(S);h(ve.$$.fragment,K),Jt=l(K),He=m(K,"P",{"data-svelte-h":!0}),y(He)!=="svelte-jcck96"&&(He.textContent=vn),Bt=l(K),Se=m(K,"P",{"data-svelte-h":!0}),y(Se)!=="svelte-nzj26u"&&(Se.textContent=bn),K.forEach(i),Et=l(z),oe=m(z,"DIV",{class:!0});var Ue=W(oe);h(be.$$.fragment,Ue),It=l(Ue),Ye=m(Ue,"P",{"data-svelte-h":!0}),y(Ye)!=="svelte-sshy57"&&(Ye.textContent=kn),Ue.forEach(i),Zt=l(z),re=m(z,"DIV",{class:!0});var bt=W(re);h(ke.$$.fragment,bt),Dt=l(bt),Re=m(bt,"P",{"data-svelte-h":!0}),y(Re)!=="svelte-1oba4lj"&&(Re.textContent=xn),bt.forEach(i),z.forEach(i),pt=l(n),h(xe.$$.fragment,n),ft=l(n),E=m(n,"DIV",{class:!0});var me=W(E);h(_e.$$.fragment,me),Lt=l(me),Ne=m(me,"P",{"data-svelte-h":!0}),y(Ne)!=="svelte-17w8a7l"&&(Ne.textContent=_n),Vt=l(me),Ge=m(me,"P",{"data-svelte-h":!0}),y(Ge)!=="svelte-11dc2z1"&&(Ge.textContent=yn),qt=l(me),h(ae.$$.fragment,me),me.forEach(i),ut=l(n),h(ye.$$.fragment,n),gt=l(n),U=m(n,"DIV",{class:!0});var L=W(U);h(Me.$$.fragment,L),At=l(L),Fe=m(L,"P",{"data-svelte-h":!0}),y(Fe)!=="svelte-182qy7o"&&(Fe.textContent=Mn),Qt=l(L),Xe=m(L,"P",{"data-svelte-h":!0}),y(Xe)!=="svelte-lmtfje"&&(Xe.textContent=wn),Ht=l(L),h(se.$$.fragment,L),St=l(L),Z=m(L,"DIV",{class:!0});var ce=W(Z);h(we.$$.fragment,ce),Yt=l(ce),Ke=m(ce,"P",{"data-svelte-h":!0}),y(Ke)!=="svelte-161r26o"&&(Ke.textContent=Tn),Rt=l(ce),h(le.$$.fragment,ce),Nt=l(ce),Oe=m(ce,"P",{"data-svelte-h":!0}),y(Oe)!=="svelte-17l30mf"&&(Oe.innerHTML=zn),ce.forEach(i),Gt=l(L),Y=m(L,"DIV",{class:!0});var lt=W(Y);h(Te.$$.fragment,lt),Ft=l(lt),et=m(lt,"P",{"data-svelte-h":!0}),y(et)!=="svelte-136ms8v"&&(et.innerHTML=jn),Xt=l(lt),tt=m(lt,"P",{"data-svelte-h":!0}),y(tt)!=="svelte-vd1o34"&&(tt.textContent=Wn),lt.forEach(i),L.forEach(i),ht=l(n),h(ze.$$.fragment,n),$t=l(n),B=m(n,"DIV",{class:!0});var N=W(B);h(je.$$.fragment,N),Kt=l(N),nt=m(N,"P",{"data-svelte-h":!0}),y(nt)!=="svelte-14ugsd0"&&(nt.textContent=Cn),Ot=l(N),h(ie.$$.fragment,N),en=l(N),D=m(N,"DIV",{class:!0});var pe=W(D);h(We.$$.fragment,pe),tn=l(pe),ot=m(pe,"P",{"data-svelte-h":!0}),y(ot)!=="svelte-zpd6cg"&&(ot.textContent=Pn),nn=l(pe),h(de.$$.fragment,pe),on=l(pe),rt=m(pe,"P",{"data-svelte-h":!0}),y(rt)!=="svelte-6vti13"&&(rt.innerHTML=Un),pe.forEach(i),rn=l(N),R=m(N,"DIV",{class:!0});var it=W(R);h(Ce.$$.fragment,it),an=l(it),at=m(it,"P",{"data-svelte-h":!0}),y(at)!=="svelte-bml31z"&&(at.innerHTML=Jn),sn=l(it),st=m(it,"P",{"data-svelte-h":!0}),y(st)!=="svelte-1iix2ev"&&(st.innerHTML=Bn),it.forEach(i),N.forEach(i),this.h()},h(){C(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(o,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(ne,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(oe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(re,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(w,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(Z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),C(B,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(n,u){$(e,n,u),_(n,p,u),_(n,o,u),$(r,o,null),a(o,c),a(o,t),a(o,ee),$(P,o,null),a(o,te),a(o,j),$(I,j,null),a(j,G),a(j,V),a(j,T),$(q,j,null),a(j,kt),a(j,Ee),a(o,xt),a(o,Q),$(fe,Q,null),a(Q,_t),a(Q,Ie),a(Q,yt),a(Q,Ze),_(n,mt,u),$(ue,n,u),_(n,ct,u),_(n,w,u),$(ge,w,null),a(w,Mt),a(w,De),a(w,wt),a(w,Le),a(w,Tt),a(w,Ve),a(w,zt),a(w,H),$(he,H,null),a(H,jt),a(H,qe),a(H,Wt),a(H,Ae),a(w,Ct),a(w,ne),$($e,ne,null),a(ne,Pt),a(ne,Qe),a(w,Ut),a(w,S),$(ve,S,null),a(S,Jt),a(S,He),a(S,Bt),a(S,Se),a(w,Et),a(w,oe),$(be,oe,null),a(oe,It),a(oe,Ye),a(w,Zt),a(w,re),$(ke,re,null),a(re,Dt),a(re,Re),_(n,pt,u),$(xe,n,u),_(n,ft,u),_(n,E,u),$(_e,E,null),a(E,Lt),a(E,Ne),a(E,Vt),a(E,Ge),a(E,qt),$(ae,E,null),_(n,ut,u),$(ye,n,u),_(n,gt,u),_(n,U,u),$(Me,U,null),a(U,At),a(U,Fe),a(U,Qt),a(U,Xe),a(U,Ht),$(se,U,null),a(U,St),a(U,Z),$(we,Z,null),a(Z,Yt),a(Z,Ke),a(Z,Rt),$(le,Z,null),a(Z,Nt),a(Z,Oe),a(U,Gt),a(U,Y),$(Te,Y,null),a(Y,Ft),a(Y,et),a(Y,Xt),a(Y,tt),_(n,ht,u),$(ze,n,u),_(n,$t,u),_(n,B,u),$(je,B,null),a(B,Kt),a(B,nt),a(B,Ot),$(ie,B,null),a(B,en),a(B,D),$(We,D,null),a(D,tn),a(D,ot),a(D,nn),$(de,D,null),a(D,on),a(D,rt),a(B,rn),a(B,R),$(Ce,R,null),a(R,an),a(R,at),a(R,sn),a(R,st),vt=!0},p(n,u){const A={};u&2&&(A.$$scope={dirty:u,ctx:n}),P.$set(A);const F={};u&2&&(F.$$scope={dirty:u,ctx:n}),q.$set(F);const z={};u&2&&(z.$$scope={dirty:u,ctx:n}),ae.$set(z);const X={};u&2&&(X.$$scope={dirty:u,ctx:n}),se.$set(X);const Pe={};u&2&&(Pe.$$scope={dirty:u,ctx:n}),le.$set(Pe);const K={};u&2&&(K.$$scope={dirty:u,ctx:n}),ie.$set(K);const Ue={};u&2&&(Ue.$$scope={dirty:u,ctx:n}),de.$set(Ue)},i(n){vt||(v(e.$$.fragment,n),v(r.$$.fragment,n),v(P.$$.fragment,n),v(I.$$.fragment,n),v(q.$$.fragment,n),v(fe.$$.fragment,n),v(ue.$$.fragment,n),v(ge.$$.fragment,n),v(he.$$.fragment,n),v($e.$$.fragment,n),v(ve.$$.fragment,n),v(be.$$.fragment,n),v(ke.$$.fragment,n),v(xe.$$.fragment,n),v(_e.$$.fragment,n),v(ae.$$.fragment,n),v(ye.$$.fragment,n),v(Me.$$.fragment,n),v(se.$$.fragment,n),v(we.$$.fragment,n),v(le.$$.fragment,n),v(Te.$$.fragment,n),v(ze.$$.fragment,n),v(je.$$.fragment,n),v(ie.$$.fragment,n),v(We.$$.fragment,n),v(de.$$.fragment,n),v(Ce.$$.fragment,n),vt=!0)},o(n){b(e.$$.fragment,n),b(r.$$.fragment,n),b(P.$$.fragment,n),b(I.$$.fragment,n),b(q.$$.fragment,n),b(fe.$$.fragment,n),b(ue.$$.fragment,n),b(ge.$$.fragment,n),b(he.$$.fragment,n),b($e.$$.fragment,n),b(ve.$$.fragment,n),b(be.$$.fragment,n),b(ke.$$.fragment,n),b(xe.$$.fragment,n),b(_e.$$.fragment,n),b(ae.$$.fragment,n),b(ye.$$.fragment,n),b(Me.$$.fragment,n),b(se.$$.fragment,n),b(we.$$.fragment,n),b(le.$$.fragment,n),b(Te.$$.fragment,n),b(ze.$$.fragment,n),b(je.$$.fragment,n),b(ie.$$.fragment,n),b(We.$$.fragment,n),b(de.$$.fragment,n),b(Ce.$$.fragment,n),vt=!1},d(n){n&&(i(p),i(o),i(mt),i(ct),i(w),i(pt),i(ft),i(E),i(ut),i(gt),i(U),i(ht),i($t),i(B)),k(e,n),k(r),k(P),k(I),k(q),k(fe),k(ue,n),k(ge),k(he),k($e),k(ve),k(be),k(ke),k(xe,n),k(_e),k(ae),k(ye,n),k(Me),k(se),k(we),k(le),k(Te),k(ze,n),k(je),k(ie),k(We),k(de),k(Ce)}}}function Xn(M){let e,p;return e=new ln({props:{$$slots:{default:[Fn]},$$scope:{ctx:M}}}),{c(){g(e.$$.fragment)},l(o){h(e.$$.fragment,o)},m(o,r){$(e,o,r),p=!0},p(o,r){const c={};r&2&&(c.$$scope={dirty:r,ctx:o}),e.$set(c)},i(o){p||(v(e.$$.fragment,o),p=!0)},o(o){b(e.$$.fragment,o),p=!1},d(o){k(e,o)}}}function Kn(M){let e,p='The Rust API Reference is available directly on the <a href="https://docs.rs/tokenizers/latest/tokenizers/" rel="nofollow">Docs.rs</a> website.';return{c(){e=d("p"),e.innerHTML=p},l(o){e=m(o,"P",{"data-svelte-h":!0}),y(e)!=="svelte-4ytcyb"&&(e.innerHTML=p)},m(o,r){_(o,e,r)},p:O,d(o){o&&i(e)}}}function On(M){let e,p;return e=new ln({props:{$$slots:{default:[Kn]},$$scope:{ctx:M}}}),{c(){g(e.$$.fragment)},l(o){h(e.$$.fragment,o)},m(o,r){$(e,o,r),p=!0},p(o,r){const c={};r&2&&(c.$$scope={dirty:r,ctx:o}),e.$set(c)},i(o){p||(v(e.$$.fragment,o),p=!0)},o(o){b(e.$$.fragment,o),p=!1},d(o){k(e,o)}}}function eo(M){let e,p="The node API has not been documented yet.";return{c(){e=d("p"),e.textContent=p},l(o){e=m(o,"P",{"data-svelte-h":!0}),y(e)!=="svelte-1mrchm6"&&(e.textContent=p)},m(o,r){_(o,e,r)},p:O,d(o){o&&i(e)}}}function to(M){let e,p;return e=new ln({props:{$$slots:{default:[eo]},$$scope:{ctx:M}}}),{c(){g(e.$$.fragment)},l(o){h(e.$$.fragment,o)},m(o,r){$(e,o,r),p=!0},p(o,r){const c={};r&2&&(c.$$scope={dirty:r,ctx:o}),e.$set(c)},i(o){p||(v(e.$$.fragment,o),p=!0)},o(o){b(e.$$.fragment,o),p=!1},d(o){k(e,o)}}}function no(M){let e,p,o,r,c,t,x,ee,P,te,j,I,G,V;return c=new Vn({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),x=new dt({props:{title:"Models",local:"models",headingTag:"h1"}}),P=new An({props:{python:!0,rust:!0,node:!0,$$slots:{node:[to],rust:[On],python:[Xn]},$$scope:{ctx:M}}}),j=new qn({props:{source:"https://github.com/huggingface/tokenizers/blob/main/docs/source-doc-builder/api/models.mdx"}}),{c(){e=d("meta"),p=s(),o=d("p"),r=s(),g(c.$$.fragment),t=s(),g(x.$$.fragment),ee=s(),g(P.$$.fragment),te=s(),g(j.$$.fragment),I=s(),G=d("p"),this.h()},l(f){const T=Ln("svelte-u9bgzb",document.head);e=m(T,"META",{name:!0,content:!0}),T.forEach(i),p=l(f),o=m(f,"P",{}),W(o).forEach(i),r=l(f),h(c.$$.fragment,f),t=l(f),h(x.$$.fragment,f),ee=l(f),h(P.$$.fragment,f),te=l(f),h(j.$$.fragment,f),I=l(f),G=m(f,"P",{}),W(G).forEach(i),this.h()},h(){C(e,"name","hf:doc:metadata"),C(e,"content",oo)},m(f,T){a(document.head,e),_(f,p,T),_(f,o,T),_(f,r,T),$(c,f,T),_(f,t,T),$(x,f,T),_(f,ee,T),$(P,f,T),_(f,te,T),$(j,f,T),_(f,I,T),_(f,G,T),V=!0},p(f,[T]){const q={};T&2&&(q.$$scope={dirty:T,ctx:f}),P.$set(q)},i(f){V||(v(c.$$.fragment,f),v(x.$$.fragment,f),v(P.$$.fragment,f),v(j.$$.fragment,f),V=!0)},o(f){b(c.$$.fragment,f),b(x.$$.fragment,f),b(P.$$.fragment,f),b(j.$$.fragment,f),V=!1},d(f){f&&(i(p),i(o),i(r),i(t),i(ee),i(te),i(I),i(G)),i(e),k(c,f),k(x,f),k(P,f),k(j,f)}}}const oo='{"title":"Models","local":"models","sections":[{"title":"BPE","local":"tokenizers.models.BPE","sections":[],"depth":2},{"title":"Model","local":"tokenizers.models.Model","sections":[],"depth":2},{"title":"Unigram","local":"tokenizers.models.Unigram","sections":[],"depth":2},{"title":"WordLevel","local":"tokenizers.models.WordLevel","sections":[],"depth":2},{"title":"WordPiece","local":"tokenizers.models.WordPiece","sections":[],"depth":2}],"depth":1}';function ro(M){return In(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class fo extends Zn{constructor(e){super(),Dn(this,e,ro,no,En,{})}}export{fo as component}; | |
Xet Storage Details
- Size:
- 40.2 kB
- Xet hash:
- 27e941efbaf86fd691455b7b08354fe28fcdbbe0ccce2f7f22413a0e9fa56bde
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.