Buckets:
| import{s as ns,o as ss,n as d}from"../chunks/scheduler.7c59faff.js";import{S as ls,i as as,e as T,s as y,c as u,h as rs,a as w,d as M,b as g,f as es,g as p,p as j,j as ts,k as os,l as m,m as i,t as c,n as $,o as f}from"../chunks/index.09bb5655.js";import{T as us}from"../chunks/Tip.25c348e8.js";import{C as ps,H as A,E as is}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.e176492a.js";import{C as J}from"../chunks/CodeBlock.6879f932.js";import{T as _,M as h}from"../chunks/TokenizersLanguageContent.0fc17a7a.js";function cs(a){let t,n;return t=new J({props:{code:"ZnJvbSUyMHRva2VuaXplcnMlMjBpbXBvcnQlMjBUb2tlbml6ZXIlMEFmcm9tJTIwdG9rZW5pemVycy5tb2RlbHMlMjBpbXBvcnQlMjBCUEUlMEF0b2tlbml6ZXIlMjAlM0QlMjBUb2tlbml6ZXIoQlBFKHVua190b2tlbiUzRCUyMiU1QlVOSyU1RCUyMikp",highlighted:`<span class="hljs-keyword">from</span> tokenizers <span class="hljs-keyword">import</span> Tokenizer | |
| <span class="hljs-keyword">from</span> tokenizers.models <span class="hljs-keyword">import</span> BPE | |
| tokenizer = Tokenizer(BPE(unk_token=<span class="hljs-string">"[UNK]"</span>))`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function $s(a){let t,n;return t=new h({props:{$$slots:{default:[cs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function fs(a){let t,n;return t=new J({props:{code:"dXNlJTIwdG9rZW5pemVycyUzQSUzQW1vZGVscyUzQSUzQWJwZSUzQSUzQUJQRSUzQiUwQWxldCUyMG11dCUyMHRva2VuaXplciUzQSUyMFRva2VuaXplckltcGwlM0MlMEElMjAlMjAlMjAlMjBCUEUlMkMlMEElMjAlMjAlMjAlMjBOb3JtYWxpemVyV3JhcHBlciUyQyUwQSUyMCUyMCUyMCUyMFByZVRva2VuaXplcldyYXBwZXIlMkMlMEElMjAlMjAlMjAlMjBQb3N0UHJvY2Vzc29yV3JhcHBlciUyQyUwQSUyMCUyMCUyMCUyMERlY29kZXJXcmFwcGVyJTJDJTBBJTNFJTIwJTNEJTIwVG9rZW5pemVySW1wbCUzQSUzQW5ldyglMEElMjAlMjAlMjAlMjBCUEUlM0ElM0FidWlsZGVyKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAudW5rX3Rva2VuKCUyMiU1QlVOSyU1RCUyMi50b19zdHJpbmcoKSklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAuYnVpbGQoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC51bndyYXAoKSUyQyUwQSklM0I=",highlighted:`<span class="hljs-keyword">use</span> tokenizers::models::bpe::BPE; | |
| <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut </span><span class="hljs-variable">tokenizer</span>: TokenizerImpl< | |
| BPE, | |
| NormalizerWrapper, | |
| PreTokenizerWrapper, | |
| PostProcessorWrapper, | |
| DecoderWrapper, | |
| > = TokenizerImpl::<span class="hljs-title function_ invoke__">new</span>( | |
| BPE::<span class="hljs-title function_ invoke__">builder</span>() | |
| .<span class="hljs-title function_ invoke__">unk_token</span>(<span class="hljs-string">"[UNK]"</span>.<span class="hljs-title function_ invoke__">to_string</span>()) | |
| .<span class="hljs-title function_ invoke__">build</span>() | |
| .<span class="hljs-title function_ invoke__">unwrap</span>(), | |
| );`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ms(a){let t,n;return t=new h({props:{$$slots:{default:[fs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ms(a){let t,n;return t=new J({props:{code:"JTdCJTIwVG9rZW5pemVyJTIwJTdEJTIwJTNEJTIwcmVxdWlyZSgndG9rZW5pemVycycpJTBBJTdCJTIwQlBFJTIwJTdEJTIwJTNEJTIwcmVxdWlyZSgndG9rZW5pemVycycpJTBBdG9rZW5pemVyJTIwJTNEJTIwbmV3JTIwVG9rZW5pemVyKEJQRS5pbml0KCU3QiU3RCUyQyUyMCU1QiU1RCUyQyUyMCU3QiUyMHVua1Rva2VuJTNBJTIwJyU1QlVOSyU1RCclMjAlN0QpKQ==",highlighted:`{ <span class="hljs-title class_">Tokenizer</span> } = <span class="hljs-title function_">require</span>(<span class="hljs-params"><span class="hljs-string">'tokenizers'</span></span>) | |
| { <span class="hljs-variable constant_">BPE</span> } = <span class="hljs-built_in">require</span>(<span class="hljs-string">'tokenizers'</span>) | |
| tokenizer = <span class="hljs-keyword">new</span> <span class="hljs-title class_">Tokenizer</span>(<span class="hljs-variable constant_">BPE</span>.<span class="hljs-title function_">init</span>({}, [], { <span class="hljs-attr">unkToken</span>: <span class="hljs-string">'[UNK]'</span> }))`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ys(a){let t,n;return t=new h({props:{$$slots:{default:[ms]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function gs(a){let t,n;return t=new J({props:{code:"ZnJvbSUyMHRva2VuaXplcnMudHJhaW5lcnMlMjBpbXBvcnQlMjBCcGVUcmFpbmVyJTBBdHJhaW5lciUyMCUzRCUyMEJwZVRyYWluZXIoc3BlY2lhbF90b2tlbnMlM0QlNUIlMjIlNUJVTkslNUQlMjIlMkMlMjAlMjIlNUJDTFMlNUQlMjIlMkMlMjAlMjIlNUJTRVAlNUQlMjIlMkMlMjAlMjIlNUJQQUQlNUQlMjIlMkMlMjAlMjIlNUJNQVNLJTVEJTIyJTVEKQ==",highlighted:`<span class="hljs-keyword">from</span> tokenizers.trainers <span class="hljs-keyword">import</span> BpeTrainer | |
| trainer = BpeTrainer(special_tokens=[<span class="hljs-string">"[UNK]"</span>, <span class="hljs-string">"[CLS]"</span>, <span class="hljs-string">"[SEP]"</span>, <span class="hljs-string">"[PAD]"</span>, <span class="hljs-string">"[MASK]"</span>])`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Js(a){let t,n;return t=new h({props:{$$slots:{default:[gs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ds(a){let t,n;return t=new J({props:{code:"dXNlJTIwdG9rZW5pemVycyUzQSUzQW1vZGVscyUzQSUzQWJwZSUzQSUzQUJwZVRyYWluZXIlM0IlMEFsZXQlMjBtdXQlMjB0cmFpbmVyJTIwJTNEJTIwQnBlVHJhaW5lciUzQSUzQWJ1aWxkZXIoKSUwQSUyMCUyMCUyMCUyMC5zcGVjaWFsX3Rva2Vucyh2ZWMhJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwQWRkZWRUb2tlbiUzQSUzQWZyb20oJTIyJTVCVU5LJTVEJTIyJTJDJTIwdHJ1ZSklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBBZGRlZFRva2VuJTNBJTNBZnJvbSglMjIlNUJDTFMlNUQlMjIlMkMlMjB0cnVlKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMEFkZGVkVG9rZW4lM0ElM0Fmcm9tKCUyMiU1QlNFUCU1RCUyMiUyQyUyMHRydWUpJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwQWRkZWRUb2tlbiUzQSUzQWZyb20oJTIyJTVCUEFEJTVEJTIyJTJDJTIwdHJ1ZSklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBBZGRlZFRva2VuJTNBJTNBZnJvbSglMjIlNUJNQVNLJTVEJTIyJTJDJTIwdHJ1ZSklMkMlMEElMjAlMjAlMjAlMjAlNUQpJTBBJTIwJTIwJTIwJTIwLmJ1aWxkKCklM0I=",highlighted:`<span class="hljs-keyword">use</span> tokenizers::models::bpe::BpeTrainer; | |
| <span class="hljs-keyword">let</span> <span class="hljs-keyword">mut </span><span class="hljs-variable">trainer</span> = BpeTrainer::<span class="hljs-title function_ invoke__">builder</span>() | |
| .<span class="hljs-title function_ invoke__">special_tokens</span>(<span class="hljs-built_in">vec!</span>[ | |
| AddedToken::<span class="hljs-title function_ invoke__">from</span>(<span class="hljs-string">"[UNK]"</span>, <span class="hljs-literal">true</span>), | |
| AddedToken::<span class="hljs-title function_ invoke__">from</span>(<span class="hljs-string">"[CLS]"</span>, <span class="hljs-literal">true</span>), | |
| AddedToken::<span class="hljs-title function_ invoke__">from</span>(<span class="hljs-string">"[SEP]"</span>, <span class="hljs-literal">true</span>), | |
| AddedToken::<span class="hljs-title function_ invoke__">from</span>(<span class="hljs-string">"[PAD]"</span>, <span class="hljs-literal">true</span>), | |
| AddedToken::<span class="hljs-title function_ invoke__">from</span>(<span class="hljs-string">"[MASK]"</span>, <span class="hljs-literal">true</span>), | |
| ]) | |
| .<span class="hljs-title function_ invoke__">build</span>();`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function hs(a){let t,n;return t=new h({props:{$$slots:{default:[ds]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Us(a){let t,n;return t=new J({props:{code:"JTdCJTIwYnBlVHJhaW5lciUyMCU3RCUyMCUzRCUyMHJlcXVpcmUoJ3Rva2VuaXplcnMnKSUwQXRyYWluZXIlMjAlM0QlMjBicGVUcmFpbmVyKCU3QiUwQWVjaWFsVG9rZW5zJTNBJTIwJTVCJyU1QlVOSyU1RCclMkMlMjAnJTVCQ0xTJTVEJyUyQyUyMCclNUJTRVAlNUQnJTJDJTIwJyU1QlBBRCU1RCclMkMlMjAnJTVCTUFTSyU1RCclNUQlMkM=",highlighted:`{ bpeTrainer } = <span class="hljs-built_in">require</span>(<span class="hljs-string">'tokenizers'</span>) | |
| trainer = <span class="hljs-title function_">bpeTrainer</span>({ | |
| <span class="hljs-attr">ecialTokens</span>: [<span class="hljs-string">'[UNK]'</span>, <span class="hljs-string">'[CLS]'</span>, <span class="hljs-string">'[SEP]'</span>, <span class="hljs-string">'[PAD]'</span>, <span class="hljs-string">'[MASK]'</span>],`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ts(a){let t,n;return t=new h({props:{$$slots:{default:[Us]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ws(a){let t,n=`The order in which you write the special tokens list matters: here <code>"[UNK]"</code> will get the ID 0, | |
| <code>"[CLS]"</code> will get the ID 1 and so forth.`;return{c(){t=T("p"),t.innerHTML=n},l(e){t=w(e,"P",{"data-svelte-h":!0}),j(t)!=="svelte-a3tvl4"&&(t.innerHTML=n)},m(e,s){m(e,t,s)},p:d,d(e){e&&M(t)}}}function js(a){let t,n;return t=new J({props:{code:"ZnJvbSUyMHRva2VuaXplcnMucHJlX3Rva2VuaXplcnMlMjBpbXBvcnQlMjBXaGl0ZXNwYWNlJTBBdG9rZW5pemVyLnByZV90b2tlbml6ZXIlMjAlM0QlMjBXaGl0ZXNwYWNlKCk=",highlighted:`<span class="hljs-keyword">from</span> tokenizers.pre_tokenizers <span class="hljs-keyword">import</span> Whitespace | |
| tokenizer.pre_tokenizer = Whitespace()`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ks(a){let t,n;return t=new h({props:{$$slots:{default:[js]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function _s(a){let t,n;return t=new J({props:{code:"dXNlJTIwdG9rZW5pemVycyUzQSUzQXByZV90b2tlbml6ZXJzJTNBJTNBd2hpdGVzcGFjZSUzQSUzQVdoaXRlc3BhY2UlM0IlMEF0b2tlbml6ZXIud2l0aF9wcmVfdG9rZW5pemVyKFNvbWUoV2hpdGVzcGFjZSUyMCU3QiU3RCkpJTNC",highlighted:`<span class="hljs-keyword">use</span> tokenizers::pre_tokenizers::whitespace::Whitespace; | |
| tokenizer.<span class="hljs-title function_ invoke__">with_pre_tokenizer</span>(<span class="hljs-title function_ invoke__">Some</span>(Whitespace {}));`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Is(a){let t,n;return t=new h({props:{$$slots:{default:[_s]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function bs(a){let t,n;return t=new J({props:{code:"JTdCJTIwd2hpdGVzcGFjZVByZVRva2VuaXplciUyMCU3RCUyMCUzRCUyMHJlcXVpcmUoJ3Rva2VuaXplcnMnKSUwQW5pemVyLnNldFByZVRva2VuaXplcih3aGl0ZXNwYWNlUHJlVG9rZW5pemVyKCkp",highlighted:`{ whitespacePreTokenizer } = <span class="hljs-built_in">require</span>(<span class="hljs-string">'tokenizers'</span>) | |
| nizer.<span class="hljs-title function_">setPreTokenizer</span>(<span class="hljs-title function_">whitespacePreTokenizer</span>())`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Cs(a){let t,n;return t=new h({props:{$$slots:{default:[bs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function qs(a){let t,n;return t=new J({props:{code:"ZmlsZXMlMjAlM0QlMjAlNUJmJTIyZGF0YSUyRndpa2l0ZXh0LTEwMy1yYXclMkZ3aWtpLiU3QnNwbGl0JTdELnJhdyUyMiUyMGZvciUyMHNwbGl0JTIwaW4lMjAlNUIlMjJ0ZXN0JTIyJTJDJTIwJTIydHJhaW4lMjIlMkMlMjAlMjJ2YWxpZCUyMiU1RCU1RCUwQXRva2VuaXplci50cmFpbihmaWxlcyUyQyUyMHRyYWluZXIp",highlighted:`files = [<span class="hljs-string">f"data/wikitext-103-raw/wiki.<span class="hljs-subst">{split}</span>.raw"</span> <span class="hljs-keyword">for</span> split <span class="hljs-keyword">in</span> [<span class="hljs-string">"test"</span>, <span class="hljs-string">"train"</span>, <span class="hljs-string">"valid"</span>]] | |
| tokenizer.train(files, trainer)`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Vs(a){let t,n;return t=new h({props:{$$slots:{default:[qs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Qs(a){let t,n;return t=new J({props:{code:"bGV0JTIwZmlsZXMlMjAlM0QlMjB2ZWMhJTVCJTBBJTIwJTIwJTIwJTIwJTIyZGF0YSUyRndpa2l0ZXh0LTEwMy1yYXclMkZ3aWtpLnRyYWluLnJhdyUyMi5pbnRvKCklMkMlMEElMjAlMjAlMjAlMjAlMjJkYXRhJTJGd2lraXRleHQtMTAzLXJhdyUyRndpa2kudGVzdC5yYXclMjIuaW50bygpJTJDJTBBJTIwJTIwJTIwJTIwJTIyZGF0YSUyRndpa2l0ZXh0LTEwMy1yYXclMkZ3aWtpLnZhbGlkLnJhdyUyMi5pbnRvKCklMkMlMEElNUQlM0IlMEF0b2tlbml6ZXIudHJhaW5fZnJvbV9maWxlcyglMjZtdXQlMjB0cmFpbmVyJTJDJTIwZmlsZXMpJTNGJTNC",highlighted:`<span class="hljs-keyword">let</span> <span class="hljs-variable">files</span> = <span class="hljs-built_in">vec!</span>[ | |
| <span class="hljs-string">"data/wikitext-103-raw/wiki.train.raw"</span>.<span class="hljs-title function_ invoke__">into</span>(), | |
| <span class="hljs-string">"data/wikitext-103-raw/wiki.test.raw"</span>.<span class="hljs-title function_ invoke__">into</span>(), | |
| <span class="hljs-string">"data/wikitext-103-raw/wiki.valid.raw"</span>.<span class="hljs-title function_ invoke__">into</span>(), | |
| ]; | |
| tokenizer.<span class="hljs-title function_ invoke__">train_from_files</span>(&<span class="hljs-keyword">mut</span> trainer, files)?;`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function zs(a){let t,n;return t=new h({props:{$$slots:{default:[Qs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function vs(a){let t,n;return t=new J({props:{code:"ZmlsZXMlMjAlM0QlMjAlNUIndGVzdCclMkMlMjAndHJhaW4nJTJDJTIwJ3ZhbGlkJyU1RC5tYXAoKHNwbGl0KSUyMCUzRCUzRSUyMCU2MGRhdGElMkZ3aWtpdGV4dC0xMDMtcmF3JTJGd2lraS4lMjQlN0JzcGxpdCU3RC5yYXclNjApJTBBbml6ZXIudHJhaW4oZmlsZXMlMkMlMjB0cmFpbmVyKQ==",highlighted:'files = [<span class="hljs-string">'test'</span>, <span class="hljs-string">'train'</span>, <span class="hljs-string">'valid'</span>].<span class="hljs-title function_">map</span>(<span class="hljs-function">(<span class="hljs-params">split</span>) =></span> <span class="hljs-string">`data/wikitext-103-raw/wiki.<span class="hljs-subst">${split}</span>.raw`</span>)\nnizer.<span class="hljs-title function_">train</span>(files, trainer)',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ss(a){let t,n;return t=new h({props:{$$slots:{default:[vs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Es(a){let t,n;return t=new J({props:{code:"dG9rZW5pemVyLnNhdmUoJTIyZGF0YSUyRnRva2VuaXplci13aWtpLmpzb24lMjIp",highlighted:'tokenizer.save(<span class="hljs-string">"data/tokenizer-wiki.json"</span>)',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function xs(a){let t,n;return t=new h({props:{$$slots:{default:[Es]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Zs(a){let t,n;return t=new J({props:{code:"dG9rZW5pemVyLnNhdmUoJTIyZGF0YSUyRnRva2VuaXplci13aWtpLmpzb24lMjIlMkMlMjBmYWxzZSklM0YlM0I=",highlighted:'tokenizer.<span class="hljs-title function_ invoke__">save</span>(<span class="hljs-string">"data/tokenizer-wiki.json"</span>, <span class="hljs-literal">false</span>)?;',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function As(a){let t,n;return t=new h({props:{$$slots:{default:[Zs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ns(a){let t,n;return t=new J({props:{code:"bml6ZXIuc2F2ZSgnZGF0YSUyRnRva2VuaXplci13aWtpLmpzb24nKQ==",highlighted:'nizer.<span class="hljs-title function_">save</span>(<span class="hljs-string">'data/tokenizer-wiki.json'</span>)',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Rs(a){let t,n;return t=new h({props:{$$slots:{default:[Ns]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Gs(a){let t,n;return t=new J({props:{code:"dG9rZW5pemVyJTIwJTNEJTIwVG9rZW5pemVyLmZyb21fZmlsZSglMjJkYXRhJTJGdG9rZW5pemVyLXdpa2kuanNvbiUyMik=",highlighted:'tokenizer = Tokenizer.from_file(<span class="hljs-string">"data/tokenizer-wiki.json"</span>)',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Hs(a){let t,n;return t=new h({props:{$$slots:{default:[Gs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ws(a){let t,n;return t=new J({props:{code:"bGV0JTIwbXV0JTIwdG9rZW5pemVyJTIwJTNEJTIwVG9rZW5pemVyJTNBJTNBZnJvbV9maWxlKCUyMmRhdGElMkZ0b2tlbml6ZXItd2lraS5qc29uJTIyKSUzRiUzQg==",highlighted:'<span class="hljs-keyword">let</span> <span class="hljs-keyword">mut </span><span class="hljs-variable">tokenizer</span> = Tokenizer::<span class="hljs-title function_ invoke__">from_file</span>(<span class="hljs-string">"data/tokenizer-wiki.json"</span>)?;',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Bs(a){let t,n;return t=new h({props:{$$slots:{default:[Ws]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Xs(a){let t,n;return t=new J({props:{code:"dG9rZW5pemVyJTIwJTNEJTIwVG9rZW5pemVyLmZyb21GaWxlKCdkYXRhJTJGdG9rZW5pemVyLXdpa2kuanNvbicp",highlighted:'tokenizer = <span class="hljs-title class_">Tokenizer</span>.<span class="hljs-title function_">fromFile</span>(<span class="hljs-string">'data/tokenizer-wiki.json'</span>)',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ds(a){let t,n;return t=new h({props:{$$slots:{default:[Xs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ls(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZSglMjJIZWxsbyUyQyUyMHknYWxsISUyMEhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMik=",highlighted:'output = tokenizer.encode(<span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span>)',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ys(a){let t,n;return t=new h({props:{$$slots:{default:[Ls]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Fs(a){let t,n;return t=new J({props:{code:"bGV0JTIwb3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZSglMjJIZWxsbyUyQyUyMHknYWxsISUyMEhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiUyQyUyMHRydWUpJTNGJTNC",highlighted:'<span class="hljs-keyword">let</span> <span class="hljs-variable">output</span> = tokenizer.<span class="hljs-title function_ invoke__">encode</span>(<span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span>, <span class="hljs-literal">true</span>)?;',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ps(a){let t,n;return t=new h({props:{$$slots:{default:[Fs]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ks(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwYXdhaXQlMjB0b2tlbml6ZXIuZW5jb2RlKCUyMkhlbGxvJTJDJTIweSdhbGwhJTIwSG93JTIwYXJlJTIweW91JTIwJUYwJTlGJTk4JTgxJTIwJTNGJTIyKQ==",highlighted:'output = <span class="hljs-keyword">await</span> tokenizer.<span class="hljs-title function_">encode</span>(<span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span>)',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Os(a){let t,n;return t=new h({props:{$$slots:{default:[Ks]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function el(a){let t,n;return t=new J({props:{code:"cHJpbnQob3V0cHV0LnRva2VucyklMEElMjMlMjAlNUIlMjJIZWxsbyUyMiUyQyUyMCUyMiUyQyUyMiUyQyUyMCUyMnklMjIlMkMlMjAlMjInJTIyJTJDJTIwJTIyYWxsJTIyJTJDJTIwJTIyISUyMiUyQyUyMCUyMkhvdyUyMiUyQyUyMCUyMmFyZSUyMiUyQyUyMCUyMnlvdSUyMiUyQyUyMCUyMiU1QlVOSyU1RCUyMiUyQyUyMCUyMiUzRiUyMiU1RA==",highlighted:`<span class="hljs-built_in">print</span>(output.tokens) | |
| <span class="hljs-comment"># ["Hello", ",", "y", "'", "all", "!", "How", "are", "you", "[UNK]", "?"]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function tl(a){let t,n;return t=new h({props:{$$slots:{default:[el]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function nl(a){let t,n;return t=new J({props:{code:"cHJpbnRsbiEoJTIyJTdCJTNBJTNGJTdEJTIyJTJDJTIwb3V0cHV0LmdldF90b2tlbnMoKSklM0IlMEElMkYlMkYlMjAlNUIlMjJIZWxsbyUyMiUyQyUyMCUyMiUyQyUyMiUyQyUyMCUyMnklMjIlMkMlMjAlMjInJTIyJTJDJTIwJTIyYWxsJTIyJTJDJTIwJTIyISUyMiUyQyUyMCUyMkhvdyUyMiUyQyUyMCUyMmFyZSUyMiUyQyUyMCUyMnlvdSUyMiUyQyUyMCUyMiU1QlVOSyU1RCUyMiUyQyUyMCUyMiUzRiUyMiUyQyU1RA==",highlighted:`<span class="hljs-built_in">println!</span>(<span class="hljs-string">"{:?}"</span>, output.<span class="hljs-title function_ invoke__">get_tokens</span>()); | |
| <span class="hljs-comment">// ["Hello", ",", "y", "'", "all", "!", "How", "are", "you", "[UNK]", "?",]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function sl(a){let t,n;return t=new h({props:{$$slots:{default:[nl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ll(a){let t,n;return t=new J({props:{code:"b2xlLmxvZyhvdXRwdXQuZ2V0VG9rZW5zKCkpJTBBJTIySGVsbG8lMjIlMkMlMjAlMjIlMkMlMjIlMkMlMjAlMjJ5JTIyJTJDJTIwJTIyJyUyMiUyQyUyMCUyMmFsbCUyMiUyQyUyMCUyMiElMjIlMkMlMjAlMjJIb3clMjIlMkMlMjAlMjJhcmUlMjIlMkMlMjAlMjJ5b3UlMjIlMkMlMjAlMjIlNUJVTkslNUQlMjIlMkMlMjAlMjIlM0YlMjIlNUQ=",highlighted:`ole.<span class="hljs-title function_">log</span>(output.<span class="hljs-title function_">getTokens</span>()) | |
| <span class="hljs-string">"Hello"</span>, <span class="hljs-string">","</span>, <span class="hljs-string">"y"</span>, <span class="hljs-string">"'"</span>, <span class="hljs-string">"all"</span>, <span class="hljs-string">"!"</span>, <span class="hljs-string">"How"</span>, <span class="hljs-string">"are"</span>, <span class="hljs-string">"you"</span>, <span class="hljs-string">"[UNK]"</span>, <span class="hljs-string">"?"</span>]`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function al(a){let t,n;return t=new h({props:{$$slots:{default:[ll]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function rl(a){let t,n;return t=new J({props:{code:"cHJpbnQob3V0cHV0LmlkcyklMEElMjMlMjAlNUIyNzI1MyUyQyUyMDE2JTJDJTIwOTMlMkMlMjAxMSUyQyUyMDUwOTclMkMlMjA1JTJDJTIwNzk2MSUyQyUyMDUxMTIlMkMlMjA2MjE4JTJDJTIwMCUyQyUyMDM1JTVE",highlighted:`<span class="hljs-built_in">print</span>(output.ids) | |
| <span class="hljs-comment"># [27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ol(a){let t,n;return t=new h({props:{$$slots:{default:[rl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ul(a){let t,n;return t=new J({props:{code:"cHJpbnRsbiEoJTIyJTdCJTNBJTNGJTdEJTIyJTJDJTIwb3V0cHV0LmdldF9pZHMoKSklM0IlMEElMkYlMkYlMjAlNUIyNzI1MyUyQyUyMDE2JTJDJTIwOTMlMkMlMjAxMSUyQyUyMDUwOTclMkMlMjA1JTJDJTIwNzk2MSUyQyUyMDUxMTIlMkMlMjA2MjE4JTJDJTIwMCUyQyUyMDM1JTVE",highlighted:`<span class="hljs-built_in">println!</span>(<span class="hljs-string">"{:?}"</span>, output.<span class="hljs-title function_ invoke__">get_ids</span>()); | |
| <span class="hljs-comment">// [27253, 16, 93, 11, 5097, 5, 7961, 5112, 6218, 0, 35]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function pl(a){let t,n;return t=new h({props:{$$slots:{default:[ul]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function il(a){let t,n;return t=new J({props:{code:"b2xlLmxvZyhvdXRwdXQuZ2V0SWRzKCkpJTBBMjcyNTMlMkMlMjAxNiUyQyUyMDkzJTJDJTIwMTElMkMlMjA1MDk3JTJDJTIwNSUyQyUyMDc5NjElMkMlMjA1MTEyJTJDJTIwNjIxOCUyQyUyMDAlMkMlMjAzNSU1RA==",highlighted:`ole.<span class="hljs-title function_">log</span>(output.<span class="hljs-title function_">getIds</span>()) | |
| <span class="hljs-number">27253</span>, <span class="hljs-number">16</span>, <span class="hljs-number">93</span>, <span class="hljs-number">11</span>, <span class="hljs-number">5097</span>, <span class="hljs-number">5</span>, <span class="hljs-number">7961</span>, <span class="hljs-number">5112</span>, <span class="hljs-number">6218</span>, <span class="hljs-number">0</span>, <span class="hljs-number">35</span>]`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function cl(a){let t,n;return t=new h({props:{$$slots:{default:[il]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function $l(a){let t,n;return t=new J({props:{code:"cHJpbnQob3V0cHV0Lm9mZnNldHMlNUI5JTVEKSUwQSUyMyUyMCgyNiUyQyUyMDI3KQ==",highlighted:`<span class="hljs-built_in">print</span>(output.offsets[<span class="hljs-number">9</span>]) | |
| <span class="hljs-comment"># (26, 27)</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function fl(a){let t,n;return t=new h({props:{$$slots:{default:[$l]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ml(a){let t,n;return t=new J({props:{code:"cHJpbnRsbiEoJTIyJTdCJTNBJTNGJTdEJTIyJTJDJTIwb3V0cHV0LmdldF9vZmZzZXRzKCklNUI5JTVEKSUzQiUwQSUyRiUyRiUyMCgyNiUyQyUyMDMwKQ==",highlighted:`<span class="hljs-built_in">println!</span>(<span class="hljs-string">"{:?}"</span>, output.<span class="hljs-title function_ invoke__">get_offsets</span>()[<span class="hljs-number">9</span>]); | |
| <span class="hljs-comment">// (26, 30)</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ml(a){let t,n;return t=new h({props:{$$slots:{default:[Ml]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function yl(a){let t,n;return t=new J({props:{code:"b2Zmc2V0cyUyMCUzRCUyMG91dHB1dC5nZXRPZmZzZXRzKCklMEFvbGUubG9nKG9mZnNldHMlNUI5JTVEKSUwQTI2JTJDJTIwMjcp",highlighted:`offsets = output.<span class="hljs-title function_">getOffsets</span>() | |
| ole.<span class="hljs-title function_">log</span>(offsets[<span class="hljs-number">9</span>]) | |
| <span class="hljs-number">26</span>, <span class="hljs-number">27</span>)`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function gl(a){let t,n;return t=new h({props:{$$slots:{default:[yl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Jl(a){let t,n;return t=new J({props:{code:"c2VudGVuY2UlMjAlM0QlMjAlMjJIZWxsbyUyQyUyMHknYWxsISUyMEhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiUwQXNlbnRlbmNlJTVCMjYlM0EyNyU1RCUwQSUyMyUyMCUyMiVGMCU5RiU5OCU4MSUyMg==",highlighted:`sentence = <span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span> | |
| sentence[<span class="hljs-number">26</span>:<span class="hljs-number">27</span>] | |
| <span class="hljs-comment"># "😁"</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function dl(a){let t,n;return t=new h({props:{$$slots:{default:[Jl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function hl(a){let t,n;return t=new J({props:{code:"bGV0JTIwc2VudGVuY2UlMjAlM0QlMjAlMjJIZWxsbyUyQyUyMHknYWxsISUyMEhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiUzQiUwQXByaW50bG4hKCUyMiU3QiU3RCUyMiUyQyUyMCUyNnNlbnRlbmNlJTVCMjYuLjMwJTVEKSUzQiUwQSUyRiUyRiUyMCUyMiVGMCU5RiU5OCU4MSUyMg==",highlighted:`<span class="hljs-keyword">let</span> <span class="hljs-variable">sentence</span> = <span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span>; | |
| <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{}"</span>, &sentence[<span class="hljs-number">26</span>..<span class="hljs-number">30</span>]); | |
| <span class="hljs-comment">// "😁"</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ul(a){let t,n;return t=new h({props:{$$slots:{default:[hl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Tl(a){let t,n;return t=new J({props:{code:"JTdCJTIwc2xpY2UlMjAlN0QlMjAlM0QlMjByZXF1aXJlKCd0b2tlbml6ZXJzJyklMEFzZW50ZW5jZSUyMCUzRCUyMCUyMkhlbGxvJTJDJTIweSdhbGwhJTIwSG93JTIwYXJlJTIweW91JTIwJUYwJTlGJTk4JTgxJTIwJTNGJTIyJTBBJTVCc3RhcnQlMkMlMjBlbmQlNUQlMjAlM0QlMjBvZmZzZXRzJTVCOSU1RCUwQW9sZS5sb2coc2xpY2Uoc2VudGVuY2UlMkMlMjBzdGFydCUyQyUyMGVuZCkpJTBBJUYwJTlGJTk4JTgxJTIy",highlighted:`{ slice } = <span class="hljs-built_in">require</span>(<span class="hljs-string">'tokenizers'</span>) | |
| sentence = <span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span> | |
| [start, end] = offsets[<span class="hljs-number">9</span>] | |
| ole.<span class="hljs-title function_">log</span>(<span class="hljs-title function_">slice</span>(sentence, start, end)) | |
| 😁<span class="hljs-string">"</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function wl(a){let t,n;return t=new h({props:{$$slots:{default:[Tl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function jl(a){let t,n;return t=new J({props:{code:"dG9rZW5pemVyLnRva2VuX3RvX2lkKCUyMiU1QlNFUCU1RCUyMiklMEElMjMlMjAy",highlighted:`tokenizer.token_to_id(<span class="hljs-string">"[SEP]"</span>) | |
| <span class="hljs-comment"># 2</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function kl(a){let t,n;return t=new h({props:{$$slots:{default:[jl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function _l(a){let t,n;return t=new J({props:{code:"cHJpbnRsbiEoJTIyJTdCJTdEJTIyJTJDJTIwdG9rZW5pemVyLnRva2VuX3RvX2lkKCUyMiU1QlNFUCU1RCUyMikudW53cmFwKCkpJTNCJTBBJTJGJTJGJTIwMg==",highlighted:`<span class="hljs-built_in">println!</span>(<span class="hljs-string">"{}"</span>, tokenizer.<span class="hljs-title function_ invoke__">token_to_id</span>(<span class="hljs-string">"[SEP]"</span>).<span class="hljs-title function_ invoke__">unwrap</span>()); | |
| <span class="hljs-comment">// 2</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Il(a){let t,n;return t=new h({props:{$$slots:{default:[_l]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function bl(a){let t,n;return t=new J({props:{code:"b2xlLmxvZyh0b2tlbml6ZXIudG9rZW5Ub0lkKCclNUJTRVAlNUQnKSk=",highlighted:'ole.<span class="hljs-title function_">log</span>(tokenizer.<span class="hljs-title function_">tokenToId</span>(<span class="hljs-string">'[SEP]'</span>))',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Cl(a){let t,n;return t=new h({props:{$$slots:{default:[bl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ql(a){let t,n;return t=new J({props:{code:"ZnJvbSUyMHRva2VuaXplcnMucHJvY2Vzc29ycyUyMGltcG9ydCUyMFRlbXBsYXRlUHJvY2Vzc2luZyUwQXRva2VuaXplci5wb3N0X3Byb2Nlc3NvciUyMCUzRCUyMFRlbXBsYXRlUHJvY2Vzc2luZyglMEElMjAlMjAlMjAlMjBzaW5nbGUlM0QlMjIlNUJDTFMlNUQlMjAlMjRBJTIwJTVCU0VQJTVEJTIyJTJDJTBBJTIwJTIwJTIwJTIwcGFpciUzRCUyMiU1QkNMUyU1RCUyMCUyNEElMjAlNUJTRVAlNUQlMjAlMjRCJTNBMSUyMCU1QlNFUCU1RCUzQTElMjIlMkMlMEElMjAlMjAlMjAlMjBzcGVjaWFsX3Rva2VucyUzRCU1QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCglMjIlNUJDTFMlNUQlMjIlMkMlMjB0b2tlbml6ZXIudG9rZW5fdG9faWQoJTIyJTVCQ0xTJTVEJTIyKSklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAoJTIyJTVCU0VQJTVEJTIyJTJDJTIwdG9rZW5pemVyLnRva2VuX3RvX2lkKCUyMiU1QlNFUCU1RCUyMikpJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBKQ==",highlighted:`<span class="hljs-keyword">from</span> tokenizers.processors <span class="hljs-keyword">import</span> TemplateProcessing | |
| tokenizer.post_processor = TemplateProcessing( | |
| single=<span class="hljs-string">"[CLS] $A [SEP]"</span>, | |
| pair=<span class="hljs-string">"[CLS] $A [SEP] $B:1 [SEP]:1"</span>, | |
| special_tokens=[ | |
| (<span class="hljs-string">"[CLS]"</span>, tokenizer.token_to_id(<span class="hljs-string">"[CLS]"</span>)), | |
| (<span class="hljs-string">"[SEP]"</span>, tokenizer.token_to_id(<span class="hljs-string">"[SEP]"</span>)), | |
| ], | |
| )`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Vl(a){let t,n;return t=new h({props:{$$slots:{default:[ql]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ql(a){let t,n;return t=new J({props:{code:"dXNlJTIwdG9rZW5pemVycyUzQSUzQXByb2Nlc3NvcnMlM0ElM0F0ZW1wbGF0ZSUzQSUzQVRlbXBsYXRlUHJvY2Vzc2luZyUzQiUwQWxldCUyMHNwZWNpYWxfdG9rZW5zJTIwJTNEJTIwdmVjISU1QiUwQSUyMCUyMCUyMCUyMCglMjIlNUJDTFMlNUQlMjIlMkMlMjB0b2tlbml6ZXIudG9rZW5fdG9faWQoJTIyJTVCQ0xTJTVEJTIyKS51bndyYXAoKSklMkMlMEElMjAlMjAlMjAlMjAoJTIyJTVCU0VQJTVEJTIyJTJDJTIwdG9rZW5pemVyLnRva2VuX3RvX2lkKCUyMiU1QlNFUCU1RCUyMikudW53cmFwKCkpJTJDJTBBJTVEJTNCJTBBdG9rZW5pemVyLndpdGhfcG9zdF9wcm9jZXNzb3IoU29tZSglMEElMjAlMjAlMjAlMjBUZW1wbGF0ZVByb2Nlc3NpbmclM0ElM0FidWlsZGVyKCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAudHJ5X3NpbmdsZSglMjIlNUJDTFMlNUQlMjAlMjRBJTIwJTVCU0VQJTVEJTIyKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC51bndyYXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC50cnlfcGFpciglMjIlNUJDTFMlNUQlMjAlMjRBJTIwJTVCU0VQJTVEJTIwJTI0QiUzQTElMjAlNUJTRVAlNUQlM0ExJTIyKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC51bndyYXAoKSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC5zcGVjaWFsX3Rva2VucyhzcGVjaWFsX3Rva2VucyklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAuYnVpbGQoKSUzRiUyQyUwQSkpJTNC",highlighted:`<span class="hljs-keyword">use</span> tokenizers::processors::template::TemplateProcessing; | |
| <span class="hljs-keyword">let</span> <span class="hljs-variable">special_tokens</span> = <span class="hljs-built_in">vec!</span>[ | |
| (<span class="hljs-string">"[CLS]"</span>, tokenizer.<span class="hljs-title function_ invoke__">token_to_id</span>(<span class="hljs-string">"[CLS]"</span>).<span class="hljs-title function_ invoke__">unwrap</span>()), | |
| (<span class="hljs-string">"[SEP]"</span>, tokenizer.<span class="hljs-title function_ invoke__">token_to_id</span>(<span class="hljs-string">"[SEP]"</span>).<span class="hljs-title function_ invoke__">unwrap</span>()), | |
| ]; | |
| tokenizer.<span class="hljs-title function_ invoke__">with_post_processor</span>(<span class="hljs-title function_ invoke__">Some</span>( | |
| TemplateProcessing::<span class="hljs-title function_ invoke__">builder</span>() | |
| .<span class="hljs-title function_ invoke__">try_single</span>(<span class="hljs-string">"[CLS] $A [SEP]"</span>) | |
| .<span class="hljs-title function_ invoke__">unwrap</span>() | |
| .<span class="hljs-title function_ invoke__">try_pair</span>(<span class="hljs-string">"[CLS] $A [SEP] $B:1 [SEP]:1"</span>) | |
| .<span class="hljs-title function_ invoke__">unwrap</span>() | |
| .<span class="hljs-title function_ invoke__">special_tokens</span>(special_tokens) | |
| .<span class="hljs-title function_ invoke__">build</span>()?, | |
| ));`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function zl(a){let t,n;return t=new h({props:{$$slots:{default:[Ql]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function vl(a){let t,n;return t=new J({props:{code:"JTdCJTIwdGVtcGxhdGVQcm9jZXNzaW5nJTIwJTdEJTIwJTNEJTIwcmVxdWlyZSgndG9rZW5pemVycycpJTBBbml6ZXIuc2V0UG9zdFByb2Nlc3NvciglMEFtcGxhdGVQcm9jZXNzaW5nKCclNUJDTFMlNUQlMjAlMjRBJTIwJTVCU0VQJTVEJyUyQyUyMCclNUJDTFMlNUQlMjAlMjRBJTIwJTVCU0VQJTVEJTIwJTI0QiUzQTElMjAlNUJTRVAlNUQlM0ExJyUyQyUyMCU1QiUwQSU1QiclNUJDTFMlNUQnJTJDJTIwdG9rZW5pemVyLnRva2VuVG9JZCgnJTVCQ0xTJTVEJyklNUQlMkMlMEElNUInJTVCU0VQJTVEJyUyQyUyMHRva2VuaXplci50b2tlblRvSWQoJyU1QlNFUCU1RCcpJTVEJTJDJTBBJTJD",highlighted:`{ templateProcessing } = <span class="hljs-built_in">require</span>(<span class="hljs-string">'tokenizers'</span>) | |
| nizer.<span class="hljs-title function_">setPostProcessor</span>( | |
| <span class="hljs-title function_">mplateProcessing</span>(<span class="hljs-string">'[CLS] $A [SEP]'</span>, <span class="hljs-string">'[CLS] $A [SEP] $B:1 [SEP]:1'</span>, [ | |
| [<span class="hljs-string">'[CLS]'</span>, tokenizer.<span class="hljs-title function_">tokenToId</span>(<span class="hljs-string">'[CLS]'</span>)], | |
| [<span class="hljs-string">'[SEP]'</span>, tokenizer.<span class="hljs-title function_">tokenToId</span>(<span class="hljs-string">'[SEP]'</span>)], | |
| ,`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Sl(a){let t,n;return t=new h({props:{$$slots:{default:[vl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function El(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZSglMjJIZWxsbyUyQyUyMHknYWxsISUyMEhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiklMEFwcmludChvdXRwdXQudG9rZW5zKSUwQSUyMyUyMCU1QiUyMiU1QkNMUyU1RCUyMiUyQyUyMCUyMkhlbGxvJTIyJTJDJTIwJTIyJTJDJTIyJTJDJTIwJTIyeSUyMiUyQyUyMCUyMiclMjIlMkMlMjAlMjJhbGwlMjIlMkMlMjAlMjIhJTIyJTJDJTIwJTIySG93JTIyJTJDJTIwJTIyYXJlJTIyJTJDJTIwJTIyeW91JTIyJTJDJTIwJTIyJTVCVU5LJTVEJTIyJTJDJTIwJTIyJTNGJTIyJTJDJTIwJTIyJTVCU0VQJTVEJTIyJTVE",highlighted:`output = tokenizer.encode(<span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span>) | |
| <span class="hljs-built_in">print</span>(output.tokens) | |
| <span class="hljs-comment"># ["[CLS]", "Hello", ",", "y", "'", "all", "!", "How", "are", "you", "[UNK]", "?", "[SEP]"]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function xl(a){let t,n;return t=new h({props:{$$slots:{default:[El]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Zl(a){let t,n;return t=new J({props:{code:"bGV0JTIwb3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZSglMjJIZWxsbyUyQyUyMHknYWxsISUyMEhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiUyQyUyMHRydWUpJTNGJTNCJTBBcHJpbnRsbiEoJTIyJTdCJTNBJTNGJTdEJTIyJTJDJTIwb3V0cHV0LmdldF90b2tlbnMoKSklM0IlMEElMkYlMkYlMjAlNUIlMjIlNUJDTFMlNUQlMjIlMkMlMjAlMjJIZWxsbyUyMiUyQyUyMCUyMiUyQyUyMiUyQyUyMCUyMnklMjIlMkMlMjAlMjInJTIyJTJDJTIwJTIyYWxsJTIyJTJDJTIwJTIyISUyMiUyQyUyMCUyMkhvdyUyMiUyQyUyMCUyMmFyZSUyMiUyQyUyMCUyMnlvdSUyMiUyQyUyMCUyMiU1QlVOSyU1RCUyMiUyQyUyMCUyMiUzRiUyMiUyQyUyMCUyMiU1QlNFUCU1RCUyMiU1RA==",highlighted:`<span class="hljs-keyword">let</span> <span class="hljs-variable">output</span> = tokenizer.<span class="hljs-title function_ invoke__">encode</span>(<span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span>, <span class="hljs-literal">true</span>)?; | |
| <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{:?}"</span>, output.<span class="hljs-title function_ invoke__">get_tokens</span>()); | |
| <span class="hljs-comment">// ["[CLS]", "Hello", ",", "y", "'", "all", "!", "How", "are", "you", "[UNK]", "?", "[SEP]"]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Al(a){let t,n;return t=new h({props:{$$slots:{default:[Zl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Nl(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwYXdhaXQlMjB0b2tlbml6ZXIuZW5jb2RlKCUyMkhlbGxvJTJDJTIweSdhbGwhJTIwSG93JTIwYXJlJTIweW91JTIwJUYwJTlGJTk4JTgxJTIwJTNGJTIyKSUwQW9sZS5sb2cob3V0cHV0LmdldFRva2VucygpKSUwQSUyMiU1QkNMUyU1RCUyMiUyQyUyMCUyMkhlbGxvJTIyJTJDJTIwJTIyJTJDJTIyJTJDJTIwJTIyeSUyMiUyQyUyMCUyMiclMjIlMkMlMjAlMjJhbGwlMjIlMkMlMjAlMjIhJTIyJTJDJTIwJTIySG93JTIyJTJDJTIwJTIyYXJlJTIyJTJDJTIwJTIyeW91JTIyJTJDJTIwJTIyJTVCVU5LJTVEJTIyJTJDJTIwJTIyJTNGJTIyJTJDJTIwJTIyJTVCU0VQJTVEJTIyJTVE",highlighted:`output = <span class="hljs-keyword">await</span> tokenizer.<span class="hljs-title function_">encode</span>(<span class="hljs-string">"Hello, y'all! How are you 😁 ?"</span>) | |
| ole.<span class="hljs-title function_">log</span>(output.<span class="hljs-title function_">getTokens</span>()) | |
| <span class="hljs-string">"[CLS]"</span>, <span class="hljs-string">"Hello"</span>, <span class="hljs-string">","</span>, <span class="hljs-string">"y"</span>, <span class="hljs-string">"'"</span>, <span class="hljs-string">"all"</span>, <span class="hljs-string">"!"</span>, <span class="hljs-string">"How"</span>, <span class="hljs-string">"are"</span>, <span class="hljs-string">"you"</span>, <span class="hljs-string">"[UNK]"</span>, <span class="hljs-string">"?"</span>, <span class="hljs-string">"[SEP]"</span>]`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Rl(a){let t,n;return t=new h({props:{$$slots:{default:[Nl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Gl(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZSglMjJIZWxsbyUyQyUyMHknYWxsISUyMiUyQyUyMCUyMkhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiklMEFwcmludChvdXRwdXQudG9rZW5zKSUwQSUyMyUyMCU1QiUyMiU1QkNMUyU1RCUyMiUyQyUyMCUyMkhlbGxvJTIyJTJDJTIwJTIyJTJDJTIyJTJDJTIwJTIyeSUyMiUyQyUyMCUyMiclMjIlMkMlMjAlMjJhbGwlMjIlMkMlMjAlMjIhJTIyJTJDJTIwJTIyJTVCU0VQJTVEJTIyJTJDJTIwJTIySG93JTIyJTJDJTIwJTIyYXJlJTIyJTJDJTIwJTIyeW91JTIyJTJDJTIwJTIyJTVCVU5LJTVEJTIyJTJDJTIwJTIyJTNGJTIyJTJDJTIwJTIyJTVCU0VQJTVEJTIyJTVE",highlighted:`output = tokenizer.encode(<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>) | |
| <span class="hljs-built_in">print</span>(output.tokens) | |
| <span class="hljs-comment"># ["[CLS]", "Hello", ",", "y", "'", "all", "!", "[SEP]", "How", "are", "you", "[UNK]", "?", "[SEP]"]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Hl(a){let t,n;return t=new h({props:{$$slots:{default:[Gl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Wl(a){let t,n;return t=new J({props:{code:"bGV0JTIwb3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZSgoJTIySGVsbG8lMkMlMjB5J2FsbCElMjIlMkMlMjAlMjJIb3clMjBhcmUlMjB5b3UlMjAlRjAlOUYlOTglODElMjAlM0YlMjIpJTJDJTIwdHJ1ZSklM0YlM0IlMEFwcmludGxuISglMjIlN0IlM0ElM0YlN0QlMjIlMkMlMjBvdXRwdXQuZ2V0X3Rva2VucygpKSUzQiUwQSUyRiUyRiUyMCU1QiUyMiU1QkNMUyU1RCUyMiUyQyUyMCUyMkhlbGxvJTIyJTJDJTIwJTIyJTJDJTIyJTJDJTIwJTIyeSUyMiUyQyUyMCUyMiclMjIlMkMlMjAlMjJhbGwlMjIlMkMlMjAlMjIhJTIyJTJDJTIwJTIyJTVCU0VQJTVEJTIyJTJDJTIwJTIySG93JTIyJTJDJTIwJTIyYXJlJTIyJTJDJTIwJTIyeW91JTIyJTJDJTIwJTIyJTVCVU5LJTVEJTIyJTJDJTIwJTIyJTNGJTIyJTJDJTIwJTIyJTVCU0VQJTVEJTIyJTVE",highlighted:`<span class="hljs-keyword">let</span> <span class="hljs-variable">output</span> = tokenizer.<span class="hljs-title function_ invoke__">encode</span>((<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>), <span class="hljs-literal">true</span>)?; | |
| <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{:?}"</span>, output.<span class="hljs-title function_ invoke__">get_tokens</span>()); | |
| <span class="hljs-comment">// ["[CLS]", "Hello", ",", "y", "'", "all", "!", "[SEP]", "How", "are", "you", "[UNK]", "?", "[SEP]"]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Bl(a){let t,n;return t=new h({props:{$$slots:{default:[Wl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Xl(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwYXdhaXQlMjB0b2tlbml6ZXIuZW5jb2RlKCUyMkhlbGxvJTJDJTIweSdhbGwhJTIyJTJDJTIwJ0hvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRicpJTBBb2xlLmxvZyhvdXRwdXQuZ2V0VG9rZW5zKCkpJTBBJTIyJTVCQ0xTJTVEJTIyJTJDJTIwJTIySGVsbG8lMjIlMkMlMjAlMjIlMkMlMjIlMkMlMjAlMjJ5JTIyJTJDJTIwJTIyJyUyMiUyQyUyMCUyMmFsbCUyMiUyQyUyMCUyMiElMjIlMkMlMjAlMjIlNUJTRVAlNUQlMjIlMkMlMjAlMjJIb3clMjIlMkMlMjAlMjJhcmUlMjIlMkMlMjAlMjJ5b3UlMjIlMkMlMjAlMjIlNUJVTkslNUQlMjIlMkMlMjAlMjIlM0YlMjIlMkMlMjAlMjIlNUJTRVAlNUQlMjIlNUQ=",highlighted:`output = <span class="hljs-keyword">await</span> tokenizer.<span class="hljs-title function_">encode</span>(<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">'How are you 😁 ?'</span>) | |
| ole.<span class="hljs-title function_">log</span>(output.<span class="hljs-title function_">getTokens</span>()) | |
| <span class="hljs-string">"[CLS]"</span>, <span class="hljs-string">"Hello"</span>, <span class="hljs-string">","</span>, <span class="hljs-string">"y"</span>, <span class="hljs-string">"'"</span>, <span class="hljs-string">"all"</span>, <span class="hljs-string">"!"</span>, <span class="hljs-string">"[SEP]"</span>, <span class="hljs-string">"How"</span>, <span class="hljs-string">"are"</span>, <span class="hljs-string">"you"</span>, <span class="hljs-string">"[UNK]"</span>, <span class="hljs-string">"?"</span>, <span class="hljs-string">"[SEP]"</span>]`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Dl(a){let t,n;return t=new h({props:{$$slots:{default:[Xl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ll(a){let t,n;return t=new J({props:{code:"cHJpbnQob3V0cHV0LnR5cGVfaWRzKSUwQSUyMyUyMCU1QjAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTVE",highlighted:`<span class="hljs-built_in">print</span>(output.type_ids) | |
| <span class="hljs-comment"># [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Yl(a){let t,n;return t=new h({props:{$$slots:{default:[Ll]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Fl(a){let t,n;return t=new J({props:{code:"cHJpbnRsbiEoJTIyJTdCJTNBJTNGJTdEJTIyJTJDJTIwb3V0cHV0LmdldF90eXBlX2lkcygpKSUzQiUwQSUyRiUyRiUyMCU1QjAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTVE",highlighted:`<span class="hljs-built_in">println!</span>(<span class="hljs-string">"{:?}"</span>, output.<span class="hljs-title function_ invoke__">get_type_ids</span>()); | |
| <span class="hljs-comment">// [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Pl(a){let t,n;return t=new h({props:{$$slots:{default:[Fl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Kl(a){let t,n;return t=new J({props:{code:"b2xlLmxvZyhvdXRwdXQuZ2V0VHlwZUlkcygpKSUwQTAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwMCUyQyUyMDAlMkMlMjAwJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTVE",highlighted:`ole.<span class="hljs-title function_">log</span>(output.<span class="hljs-title function_">getTypeIds</span>()) | |
| <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ol(a){let t,n;return t=new h({props:{$$slots:{default:[Kl]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ea(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZV9iYXRjaCglNUIlMjJIZWxsbyUyQyUyMHknYWxsISUyMiUyQyUyMCUyMkhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiU1RCk=",highlighted:'output = tokenizer.encode_batch([<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>])',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ta(a){let t,n;return t=new h({props:{$$slots:{default:[ea]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function na(a){let t,n;return t=new J({props:{code:"bGV0JTIwb3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZV9iYXRjaCh2ZWMhJTVCJTIySGVsbG8lMkMlMjB5J2FsbCElMjIlMkMlMjAlMjJIb3clMjBhcmUlMjB5b3UlMjAlRjAlOUYlOTglODElMjAlM0YlMjIlNUQlMkMlMjB0cnVlKSUzRiUzQg==",highlighted:'<span class="hljs-keyword">let</span> <span class="hljs-variable">output</span> = tokenizer.<span class="hljs-title function_ invoke__">encode_batch</span>(<span class="hljs-built_in">vec!</span>[<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>], <span class="hljs-literal">true</span>)?;',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function sa(a){let t,n;return t=new h({props:{$$slots:{default:[na]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function la(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwYXdhaXQlMjB0b2tlbml6ZXIuZW5jb2RlQmF0Y2goJTVCJTIySGVsbG8lMkMlMjB5J2FsbCElMjIlMkMlMjAnSG93JTIwYXJlJTIweW91JTIwJUYwJTlGJTk4JTgxJTIwJTNGJyU1RCk=",highlighted:'output = <span class="hljs-keyword">await</span> tokenizer.<span class="hljs-title function_">encodeBatch</span>([<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">'How are you 😁 ?'</span>])',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function aa(a){let t,n;return t=new h({props:{$$slots:{default:[la]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ra(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZV9iYXRjaCglMEElMjAlMjAlMjAlMjAlNUIlNUIlMjJIZWxsbyUyQyUyMHknYWxsISUyMiUyQyUyMCUyMkhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiU1RCUyQyUyMCU1QiUyMkhlbGxvJTIwdG8lMjB5b3UlMjB0b28hJTIyJTJDJTIwJTIySSdtJTIwZmluZSUyQyUyMHRoYW5rJTIweW91ISUyMiU1RCU1RCUwQSk=",highlighted:`output = tokenizer.encode_batch( | |
| [[<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>], [<span class="hljs-string">"Hello to you too!"</span>, <span class="hljs-string">"I'm fine, thank you!"</span>]] | |
| )`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function oa(a){let t,n;return t=new h({props:{$$slots:{default:[ra]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ua(a){let t,n;return t=new J({props:{code:"bGV0JTIwb3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZV9iYXRjaCglMEElMjAlMjAlMjAlMjB2ZWMhJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwKCUyMkhlbGxvJTJDJTIweSdhbGwhJTIyJTJDJTIwJTIySG93JTIwYXJlJTIweW91JTIwJUYwJTlGJTk4JTgxJTIwJTNGJTIyKSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCglMjJIZWxsbyUyMHRvJTIweW91JTIwdG9vISUyMiUyQyUyMCUyMkknbSUyMGZpbmUlMkMlMjB0aGFuayUyMHlvdSElMjIpJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTJDJTBBJTIwJTIwJTIwJTIwdHJ1ZSUyQyUwQSklM0YlM0I=",highlighted:`<span class="hljs-keyword">let</span> <span class="hljs-variable">output</span> = tokenizer.<span class="hljs-title function_ invoke__">encode_batch</span>( | |
| <span class="hljs-built_in">vec!</span>[ | |
| (<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>), | |
| (<span class="hljs-string">"Hello to you too!"</span>, <span class="hljs-string">"I'm fine, thank you!"</span>), | |
| ], | |
| <span class="hljs-literal">true</span>, | |
| )?;`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function pa(a){let t,n;return t=new h({props:{$$slots:{default:[ua]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ia(a){let t,n;return t=new J({props:{code:"YXIlMjBvdXRwdXQlMjAlM0QlMjBhd2FpdCUyMHRva2VuaXplci5lbmNvZGVCYXRjaCglMEElMjAlMjAlMjAlNUIlNUIlMjJIZWxsbyUyQyUyMHknYWxsISUyMiUyQyUyMCUyMkhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiU1RCUyQyUyMCU1QiUyMkhlbGxvJTIwdG8lMjB5b3UlMjB0b28hJTIyJTJDJTIwJTIySSdtJTIwZmluZSUyQyUyMHRoYW5rJTIweW91ISUyMiU1RCU1RCUwQSUzQg==",highlighted:`ar output = <span class="hljs-keyword">await</span> tokenizer.<span class="hljs-title function_">encodeBatch</span>( | |
| [[<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>], [<span class="hljs-string">"Hello to you too!"</span>, <span class="hljs-string">"I'm fine, thank you!"</span>]] | |
| ;`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ca(a){let t,n;return t=new h({props:{$$slots:{default:[ia]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function $a(a){let t,n;return t=new J({props:{code:"dG9rZW5pemVyLmVuYWJsZV9wYWRkaW5nKHBhZF9pZCUzRDMlMkMlMjBwYWRfdG9rZW4lM0QlMjIlNUJQQUQlNUQlMjIp",highlighted:'tokenizer.enable_padding(pad_id=<span class="hljs-number">3</span>, pad_token=<span class="hljs-string">"[PAD]"</span>)',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function fa(a){let t,n;return t=new h({props:{$$slots:{default:[$a]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ma(a){let t,n;return t=new J({props:{code:"dXNlJTIwdG9rZW5pemVycyUzQSUzQVBhZGRpbmdQYXJhbXMlM0IlMEF0b2tlbml6ZXIud2l0aF9wYWRkaW5nKFNvbWUoUGFkZGluZ1BhcmFtcyUyMCU3QiUwQSUyMCUyMCUyMCUyMHBhZF9pZCUzQSUyMDMlMkMlMEElMjAlMjAlMjAlMjBwYWRfdG9rZW4lM0ElMjAlMjIlNUJQQUQlNUQlMjIudG9fc3RyaW5nKCklMkMlMEElMjAlMjAlMjAlMjAuLlBhZGRpbmdQYXJhbXMlM0ElM0FkZWZhdWx0KCklMEElN0QpKSUzQg==",highlighted:`<span class="hljs-keyword">use</span> tokenizers::PaddingParams; | |
| tokenizer.<span class="hljs-title function_ invoke__">with_padding</span>(<span class="hljs-title function_ invoke__">Some</span>(PaddingParams { | |
| pad_id: <span class="hljs-number">3</span>, | |
| pad_token: <span class="hljs-string">"[PAD]"</span>.<span class="hljs-title function_ invoke__">to_string</span>(), | |
| ..PaddingParams::<span class="hljs-title function_ invoke__">default</span>() | |
| }));`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ma(a){let t,n;return t=new h({props:{$$slots:{default:[Ma]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ya(a){let t,n;return t=new J({props:{code:"bml6ZXIuc2V0UGFkZGluZyglN0IlMjBwYWRJZCUzQSUyMDMlMkMlMjBwYWRUb2tlbiUzQSUyMCclNUJQQUQlNUQnJTIwJTdEKQ==",highlighted:'nizer.<span class="hljs-title function_">setPadding</span>({ <span class="hljs-attr">padId</span>: <span class="hljs-number">3</span>, <span class="hljs-attr">padToken</span>: <span class="hljs-string">'[PAD]'</span> })',wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ga(a){let t,n;return t=new h({props:{$$slots:{default:[ya]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ja(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZV9iYXRjaCglNUIlMjJIZWxsbyUyQyUyMHknYWxsISUyMiUyQyUyMCUyMkhvdyUyMGFyZSUyMHlvdSUyMCVGMCU5RiU5OCU4MSUyMCUzRiUyMiU1RCklMEFwcmludChvdXRwdXQlNUIxJTVELnRva2VucyklMEElMjMlMjAlNUIlMjIlNUJDTFMlNUQlMjIlMkMlMjAlMjJIb3clMjIlMkMlMjAlMjJhcmUlMjIlMkMlMjAlMjJ5b3UlMjIlMkMlMjAlMjIlNUJVTkslNUQlMjIlMkMlMjAlMjIlM0YlMjIlMkMlMjAlMjIlNUJTRVAlNUQlMjIlMkMlMjAlMjIlNUJQQUQlNUQlMjIlNUQ=",highlighted:`output = tokenizer.encode_batch([<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>]) | |
| <span class="hljs-built_in">print</span>(output[<span class="hljs-number">1</span>].tokens) | |
| <span class="hljs-comment"># ["[CLS]", "How", "are", "you", "[UNK]", "?", "[SEP]", "[PAD]"]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function da(a){let t,n;return t=new h({props:{$$slots:{default:[Ja]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ha(a){let t,n;return t=new J({props:{code:"bGV0JTIwb3V0cHV0JTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZV9iYXRjaCh2ZWMhJTVCJTIySGVsbG8lMkMlMjB5J2FsbCElMjIlMkMlMjAlMjJIb3clMjBhcmUlMjB5b3UlMjAlRjAlOUYlOTglODElMjAlM0YlMjIlNUQlMkMlMjB0cnVlKSUzRiUzQiUwQXByaW50bG4hKCUyMiU3QiUzQSUzRiU3RCUyMiUyQyUyMG91dHB1dCU1QjElNUQuZ2V0X3Rva2VucygpKSUzQiUwQSUyRiUyRiUyMCU1QiUyMiU1QkNMUyU1RCUyMiUyQyUyMCUyMkhvdyUyMiUyQyUyMCUyMmFyZSUyMiUyQyUyMCUyMnlvdSUyMiUyQyUyMCUyMiU1QlVOSyU1RCUyMiUyQyUyMCUyMiUzRiUyMiUyQyUyMCUyMiU1QlNFUCU1RCUyMiUyQyUyMCUyMiU1QlBBRCU1RCUyMiU1RA==",highlighted:`<span class="hljs-keyword">let</span> <span class="hljs-variable">output</span> = tokenizer.<span class="hljs-title function_ invoke__">encode_batch</span>(<span class="hljs-built_in">vec!</span>[<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">"How are you 😁 ?"</span>], <span class="hljs-literal">true</span>)?; | |
| <span class="hljs-built_in">println!</span>(<span class="hljs-string">"{:?}"</span>, output[<span class="hljs-number">1</span>].<span class="hljs-title function_ invoke__">get_tokens</span>()); | |
| <span class="hljs-comment">// ["[CLS]", "How", "are", "you", "[UNK]", "?", "[SEP]", "[PAD]"]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ua(a){let t,n;return t=new h({props:{$$slots:{default:[ha]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ta(a){let t,n;return t=new J({props:{code:"b3V0cHV0JTIwJTNEJTIwYXdhaXQlMjB0b2tlbml6ZXIuZW5jb2RlQmF0Y2goJTVCJTIySGVsbG8lMkMlMjB5J2FsbCElMjIlMkMlMjAnSG93JTIwYXJlJTIweW91JTIwJUYwJTlGJTk4JTgxJTIwJTNGJyU1RCklMEFvbGUubG9nKG91dHB1dCU1QjElNUQuZ2V0VG9rZW5zKCkpJTBBJTIyJTVCQ0xTJTVEJTIyJTJDJTIwJTIySG93JTIyJTJDJTIwJTIyYXJlJTIyJTJDJTIwJTIyeW91JTIyJTJDJTIwJTIyJTVCVU5LJTVEJTIyJTJDJTIwJTIyJTNGJTIyJTJDJTIwJTIyJTVCU0VQJTVEJTIyJTJDJTIwJTIyJTVCUEFEJTVEJTIyJTVE",highlighted:`output = <span class="hljs-keyword">await</span> tokenizer.<span class="hljs-title function_">encodeBatch</span>([<span class="hljs-string">"Hello, y'all!"</span>, <span class="hljs-string">'How are you 😁 ?'</span>]) | |
| ole.<span class="hljs-title function_">log</span>(output[<span class="hljs-number">1</span>].<span class="hljs-title function_">getTokens</span>()) | |
| <span class="hljs-string">"[CLS]"</span>, <span class="hljs-string">"How"</span>, <span class="hljs-string">"are"</span>, <span class="hljs-string">"you"</span>, <span class="hljs-string">"[UNK]"</span>, <span class="hljs-string">"?"</span>, <span class="hljs-string">"[SEP]"</span>, <span class="hljs-string">"[PAD]"</span>]`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function wa(a){let t,n;return t=new h({props:{$$slots:{default:[Ta]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ja(a){let t,n;return t=new J({props:{code:"cHJpbnQob3V0cHV0JTVCMSU1RC5hdHRlbnRpb25fbWFzayklMEElMjMlMjAlNUIxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMCU1RA==",highlighted:`<span class="hljs-built_in">print</span>(output[<span class="hljs-number">1</span>].attention_mask) | |
| <span class="hljs-comment"># [1, 1, 1, 1, 1, 1, 1, 0]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ka(a){let t,n;return t=new h({props:{$$slots:{default:[ja]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function _a(a){let t,n;return t=new J({props:{code:"cHJpbnRsbiEoJTIyJTdCJTNBJTNGJTdEJTIyJTJDJTIwb3V0cHV0JTVCMSU1RC5nZXRfYXR0ZW50aW9uX21hc2soKSklM0IlMEElMkYlMkYlMjAlNUIxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMCU1RA==",highlighted:`<span class="hljs-built_in">println!</span>(<span class="hljs-string">"{:?}"</span>, output[<span class="hljs-number">1</span>].<span class="hljs-title function_ invoke__">get_attention_mask</span>()); | |
| <span class="hljs-comment">// [1, 1, 1, 1, 1, 1, 1, 0]</span>`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ia(a){let t,n;return t=new h({props:{$$slots:{default:[_a]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function ba(a){let t,n;return t=new J({props:{code:"b2xlLmxvZyhvdXRwdXQlNUIxJTVELmdldEF0dGVudGlvbk1hc2soKSklMEExJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMSUyQyUyMDElMkMlMjAxJTJDJTIwMCU1RA==",highlighted:`ole.<span class="hljs-title function_">log</span>(output[<span class="hljs-number">1</span>].<span class="hljs-title function_">getAttentionMask</span>()) | |
| <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>]`,wrap:!1}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p:d,i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Ca(a){let t,n;return t=new h({props:{$$slots:{default:[ba]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function qa(a){let t,n,e,s=`You can load any tokenizer from the Hugging Face Hub as long as a | |
| <code>tokenizer.json</code> file is available in the repository.`,r,I,b,C,q,x,pe=`You can also import a pretrained tokenizer directly in, as long as you | |
| have its vocabulary file. For instance, here is how to import the | |
| classic pretrained BERT tokenizer:`,V,Q,z,Z,ie="as long as you have downloaded the file <code>bert-base-uncased-vocab.txt</code> with",v,S,E;return t=new A({props:{title:"Using a pretrained tokenizer",local:"using-a-pretrained-tokenizer",headingTag:"h3"}}),I=new J({props:{code:"ZnJvbSUyMHRva2VuaXplcnMlMjBpbXBvcnQlMjBUb2tlbml6ZXIlMEElMEF0b2tlbml6ZXIlMjAlM0QlMjBUb2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKCUyMmJlcnQtYmFzZS11bmNhc2VkJTIyKQ==",highlighted:`<span class="hljs-keyword">from</span> tokenizers <span class="hljs-keyword">import</span> Tokenizer | |
| tokenizer = Tokenizer.from_pretrained(<span class="hljs-string">"bert-base-uncased"</span>)`,wrap:!1}}),C=new A({props:{title:"Importing a pretrained tokenizer from legacy vocabulary files",local:"importing-a-pretrained-tokenizer-from-legacy-vocabulary-files",headingTag:"h3"}}),Q=new J({props:{code:"ZnJvbSUyMHRva2VuaXplcnMlMjBpbXBvcnQlMjBCZXJ0V29yZFBpZWNlVG9rZW5pemVyJTBBJTBBdG9rZW5pemVyJTIwJTNEJTIwQmVydFdvcmRQaWVjZVRva2VuaXplciglMjJiZXJ0LWJhc2UtdW5jYXNlZC12b2NhYi50eHQlMjIlMkMlMjBsb3dlcmNhc2UlM0RUcnVlKQ==",highlighted:`<span class="hljs-keyword">from</span> tokenizers <span class="hljs-keyword">import</span> BertWordPieceTokenizer | |
| tokenizer = BertWordPieceTokenizer(<span class="hljs-string">"bert-base-uncased-vocab.txt"</span>, lowercase=<span class="hljs-literal">True</span>)`,wrap:!1}}),S=new J({props:{code:"d2dldCUyMGh0dHBzJTNBJTJGJTJGczMuYW1hem9uYXdzLmNvbSUyRm1vZGVscy5odWdnaW5nZmFjZS5jbyUyRmJlcnQlMkZiZXJ0LWJhc2UtdW5jYXNlZC12b2NhYi50eHQ=",highlighted:"wget https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",wrap:!1}}),{c(){u(t.$$.fragment),n=y(),e=T("p"),e.innerHTML=s,r=y(),u(I.$$.fragment),b=y(),u(C.$$.fragment),q=y(),x=T("p"),x.textContent=pe,V=y(),u(Q.$$.fragment),z=y(),Z=T("p"),Z.innerHTML=ie,v=y(),u(S.$$.fragment)},l(U){p(t.$$.fragment,U),n=g(U),e=w(U,"P",{"data-svelte-h":!0}),j(e)!=="svelte-dzel7a"&&(e.innerHTML=s),r=g(U),p(I.$$.fragment,U),b=g(U),p(C.$$.fragment,U),q=g(U),x=w(U,"P",{"data-svelte-h":!0}),j(x)!=="svelte-8l30n4"&&(x.textContent=pe),V=g(U),p(Q.$$.fragment,U),z=g(U),Z=w(U,"P",{"data-svelte-h":!0}),j(Z)!=="svelte-v74lz7"&&(Z.innerHTML=ie),v=g(U),p(S.$$.fragment,U)},m(U,k){i(t,U,k),m(U,n,k),m(U,e,k),m(U,r,k),i(I,U,k),m(U,b,k),i(C,U,k),m(U,q,k),m(U,x,k),m(U,V,k),i(Q,U,k),m(U,z,k),m(U,Z,k),m(U,v,k),i(S,U,k),E=!0},p:d,i(U){E||(c(t.$$.fragment,U),c(I.$$.fragment,U),c(C.$$.fragment,U),c(Q.$$.fragment,U),c(S.$$.fragment,U),E=!0)},o(U){$(t.$$.fragment,U),$(I.$$.fragment,U),$(C.$$.fragment,U),$(Q.$$.fragment,U),$(S.$$.fragment,U),E=!1},d(U){U&&(M(n),M(e),M(r),M(b),M(q),M(x),M(V),M(z),M(Z),M(v)),f(t,U),f(I,U),f(C,U),f(Q,U),f(S,U)}}}function Va(a){let t,n;return t=new h({props:{$$slots:{default:[qa]},$$scope:{ctx:a}}}),{c(){u(t.$$.fragment)},l(e){p(t.$$.fragment,e)},m(e,s){i(t,e,s),n=!0},p(e,s){const r={};s&2&&(r.$$scope={dirty:s,ctx:e}),t.$set(r)},i(e){n||(c(t.$$.fragment,e),n=!0)},o(e){$(t.$$.fragment,e),n=!1},d(e){f(t,e)}}}function Qa(a){let t,n,e,s,r,I,b,C,q,x=`Let’s have a quick look at the 🤗 Tokenizers library features. The | |
| library provides an implementation of today’s most used tokenizers that | |
| is both easy to use and blazing fast.`,pe,V,Q,z,Z=`To illustrate how fast the 🤗 Tokenizers library is, let’s train a new | |
| tokenizer on <a href="https://www.salesforce.com/blog/the-wikitext-long-term-dependency-language-modeling-dataset/" rel="nofollow">wikitext-103</a> | |
| (516M of text) in just a few seconds. First things first, you will need | |
| to download this dataset and unzip it with:`,ie,v,S,E,U,k,Ot=`In this tour, we will build and train a Byte-Pair Encoding (BPE) | |
| tokenizer. For more information about the different type of tokenizers, | |
| check out this <a href="https://huggingface.co/transformers/tokenizer_summary.html" rel="nofollow">guide</a> in | |
| the 🤗 Transformers documentation. Here, training the tokenizer means it | |
| will learn merge rules by:`,De,ce,en=`<li>Start with all the characters present in the training corpus as | |
| tokens.</li> <li>Identify the most common pair of tokens and merge it into one token.</li> <li>Repeat until the vocabulary (e.g., the number of tokens) has reached | |
| the size we want.</li>`,Le,$e,tn=`The main API of the library is the <code>class</code> <code>Tokenizer</code>, here is how | |
| we instantiate one with a BPE model:`,Ye,N,Fe,fe,nn=`To train our tokenizer on the wikitext files, we will need to | |
| instantiate a [trainer]{.title-ref}, in this case a | |
| <code>BpeTrainer</code>`,Pe,R,Ke,Me,sn=`We can set the training arguments like <code>vocab_size</code> or <code>min_frequency</code> (here | |
| left at their default values of 30,000 and 0) but the most important | |
| part is to give the <code>special_tokens</code> we | |
| plan to use later on (they are not used at all during training) so that | |
| they get inserted in the vocabulary.`,Oe,G,et,me,ln=`We could train our tokenizer right now, but it wouldn’t be optimal. | |
| Without a pre-tokenizer that will split our inputs into words, we might | |
| get tokens that overlap several words: for instance we could get an | |
| <code>"it is"</code> token since those two words | |
| often appear next to each other. Using a pre-tokenizer will ensure no | |
| token is bigger than a word returned by the pre-tokenizer. Here we want | |
| to train a subword BPE tokenizer, and we will use the easiest | |
| pre-tokenizer possible by splitting on whitespace.`,tt,H,nt,ye,an="Now, we can just call the <code>Tokenizer.train</code> method with any list of files we want to use:",st,W,lt,ge,rn=`This should only take a few seconds to train our tokenizer on the full | |
| wikitext dataset! To save the tokenizer in one file that contains all | |
| its configuration and vocabulary, just use the | |
| <code>Tokenizer.save</code> method:`,at,B,rt,Je,on=`and you can reload your tokenizer from that file with the | |
| <code>Tokenizer.from_file</code> <code>classmethod</code>:`,ot,X,ut,de,pt,he,un=`Now that we have trained a tokenizer, we can use it on any text we want | |
| with the <code>Tokenizer.encode</code> method:`,it,D,ct,Ue,pn=`This applied the full pipeline of the tokenizer on the text, returning | |
| an <code>Encoding</code> object. To learn more | |
| about this pipeline, and how to apply (or customize) parts of it, check out <a href="https://github.com/huggingface/tokenizers/blob/main/docs/source-doc-builder/pipeline.mdx" rel="nofollow">this page</a>.`,$t,Te,cn=`This <code>Encoding</code> object then has all the | |
| attributes you need for your deep learning model (or other). The | |
| <code>tokens</code> attribute contains the | |
| segmentation of your text in tokens:`,ft,L,Mt,we,$n=`Similarly, the <code>ids</code> attribute will | |
| contain the index of each of those tokens in the tokenizer’s | |
| vocabulary:`,mt,Y,yt,je,fn=`An important feature of the 🤗 Tokenizers library is that it comes with | |
| full alignment tracking, meaning you can always get the part of your | |
| original sentence that corresponds to a given token. Those are stored in | |
| the <code>offsets</code> attribute of our | |
| <code>Encoding</code> object. For instance, let’s | |
| assume we would want to find back what caused the | |
| <code>"[UNK]"</code> token to appear, which is the | |
| token at index 9 in the list, we can just ask for the offset at the | |
| index:`,gt,F,Jt,ke,Mn=`and those are the indices that correspond to the emoji in the original | |
| sentence:`,dt,P,ht,_e,Ut,Ie,mn=`We might want our tokenizer to automatically add special tokens, like | |
| <code>"[CLS]"</code> or <code>"[SEP]"</code>. To do this, we use a post-processor. | |
| <code>TemplateProcessing</code> is the most | |
| commonly used, you just have to specify a template for the processing of | |
| single sentences and pairs of sentences, along with the special tokens | |
| and their IDs.`,Tt,be,yn=`When we built our tokenizer, we set <code>"[CLS]"</code> and <code>"[SEP]"</code> in positions 1 | |
| and 2 of our list of special tokens, so this should be their IDs. To | |
| double-check, we can use the <code>Tokenizer.token_to_id</code> method:`,wt,K,jt,Ce,gn=`Here is how we can set the post-processing to give us the traditional | |
| BERT inputs:`,kt,O,_t,qe,Jn=`Let’s go over this snippet of code in more details. First we specify | |
| the template for single sentences: those should have the form | |
| <code>"[CLS] $A [SEP]"</code> where | |
| <code>$A</code> represents our sentence.`,It,Ve,dn=`Then, we specify the template for sentence pairs, which should have the | |
| form <code>"[CLS] $A [SEP] $B [SEP]"</code> where | |
| <code>$A</code> represents the first sentence and | |
| <code>$B</code> the second one. The | |
| <code>:1</code> added in the template represent the <code>type IDs</code> we want for each part of our input: it defaults | |
| to 0 for everything (which is why we don’t have | |
| <code>$A:0</code>) and here we set it to 1 for the | |
| tokens of the second sentence and the last <code>"[SEP]"</code> token.`,bt,Qe,hn=`Lastly, we specify the special tokens we used and their IDs in our | |
| tokenizer’s vocabulary.`,Ct,ze,Un=`To check out this worked properly, let’s try to encode the same | |
| sentence as before:`,qt,ee,Vt,ve,Tn=`To check the results on a pair of sentences, we just pass the two | |
| sentences to <code>Tokenizer.encode</code>:`,Qt,te,zt,Se,wn="You can then check the type IDs attributed to each token is correct with",vt,ne,St,Ee,jn="If you save your tokenizer with <code>Tokenizer.save</code>, the post-processor will be saved along.",Et,xe,xt,Ze,kn=`To get the full speed of the 🤗 Tokenizers library, it’s best to | |
| process your texts by batches by using the | |
| <code>Tokenizer.encode_batch</code> method:`,Zt,se,At,Ae,_n=`The output is then a list of <code>Encoding</code> | |
| objects like the ones we saw before. You can process together as many | |
| texts as you like, as long as it fits in memory.`,Nt,Ne,In=`To process a batch of sentences pairs, pass two lists to the | |
| <code>Tokenizer.encode_batch</code> method: the | |
| list of sentences A and the list of sentences B:`,Rt,le,Gt,Re,bn=`When encoding multiple sentences, you can automatically pad the outputs | |
| to the longest sentence present by using | |
| <code>Tokenizer.enable_padding</code>, with the | |
| <code>pad_token</code> and its ID (which we can | |
| double-check the id for the padding token with | |
| <code>Tokenizer.token_to_id</code> like before):`,Ht,ae,Wt,Ge,Cn=`We can set the <code>direction</code> of the padding | |
| (defaults to the right) or a given <code>length</code> if we want to pad every sample to that specific number (here | |
| we leave it unset to pad to the size of the longest text).`,Bt,re,Xt,He,qn=`In this case, the <code>attention mask</code> generated by the | |
| tokenizer takes the padding into account:`,Dt,oe,Lt,We,Yt,ue,Ft,Be,Pt,Xe,Kt;return r=new ps({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),b=new A({props:{title:"Quicktour",local:"quicktour",headingTag:"h1"}}),V=new A({props:{title:"Build a tokenizer from scratch",local:"build-a-tokenizer-from-scratch",headingTag:"h2"}}),v=new J({props:{code:"d2dldCUyMGh0dHBzJTNBJTJGJTJGczMuYW1hem9uYXdzLmNvbSUyRnJlc2VhcmNoLm1ldGFtaW5kLmlvJTJGd2lraXRleHQlMkZ3aWtpdGV4dC0xMDMtcmF3LXYxLnppcCUwQXVuemlwJTIwd2lraXRleHQtMTAzLXJhdy12MS56aXA=",highlighted:`wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip | |
| unzip wikitext-103-raw-v1.zip`,wrap:!1}}),E=new A({props:{title:"Training the tokenizer",local:"training-the-tokenizer",headingTag:"h3"}}),N=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[ys],rust:[Ms],python:[$s]},$$scope:{ctx:a}}}),R=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Ts],rust:[hs],python:[Js]},$$scope:{ctx:a}}}),G=new us({props:{$$slots:{default:[ws]},$$scope:{ctx:a}}}),H=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Cs],rust:[Is],python:[ks]},$$scope:{ctx:a}}}),W=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Ss],rust:[zs],python:[Vs]},$$scope:{ctx:a}}}),B=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Rs],rust:[As],python:[xs]},$$scope:{ctx:a}}}),X=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Ds],rust:[Bs],python:[Hs]},$$scope:{ctx:a}}}),de=new A({props:{title:"Using the tokenizer",local:"using-the-tokenizer",headingTag:"h3"}}),D=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Os],rust:[Ps],python:[Ys]},$$scope:{ctx:a}}}),L=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[al],rust:[sl],python:[tl]},$$scope:{ctx:a}}}),Y=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[cl],rust:[pl],python:[ol]},$$scope:{ctx:a}}}),F=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[gl],rust:[ml],python:[fl]},$$scope:{ctx:a}}}),P=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[wl],rust:[Ul],python:[dl]},$$scope:{ctx:a}}}),_e=new A({props:{title:"Post-processing",local:"post-processing",headingTag:"h3"}}),K=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Cl],rust:[Il],python:[kl]},$$scope:{ctx:a}}}),O=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Sl],rust:[zl],python:[Vl]},$$scope:{ctx:a}}}),ee=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Rl],rust:[Al],python:[xl]},$$scope:{ctx:a}}}),te=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Dl],rust:[Bl],python:[Hl]},$$scope:{ctx:a}}}),ne=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Ol],rust:[Pl],python:[Yl]},$$scope:{ctx:a}}}),xe=new A({props:{title:"Encoding multiple sentences in a batch",local:"encoding-multiple-sentences-in-a-batch",headingTag:"h3"}}),se=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[aa],rust:[sa],python:[ta]},$$scope:{ctx:a}}}),le=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[ca],rust:[pa],python:[oa]},$$scope:{ctx:a}}}),ae=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[ga],rust:[ma],python:[fa]},$$scope:{ctx:a}}}),re=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[wa],rust:[Ua],python:[da]},$$scope:{ctx:a}}}),oe=new _({props:{python:!0,rust:!0,node:!0,$$slots:{node:[Ca],rust:[Ia],python:[ka]},$$scope:{ctx:a}}}),We=new A({props:{title:"Pretrained",local:"pretrained",headingTag:"h2"}}),ue=new _({props:{python:!0,rust:!0,node:!0,$$slots:{python:[Va]},$$scope:{ctx:a}}}),Be=new is({props:{source:"https://github.com/huggingface/tokenizers/blob/main/docs/source-doc-builder/quicktour.mdx"}}),{c(){t=T("meta"),n=y(),e=T("p"),s=y(),u(r.$$.fragment),I=y(),u(b.$$.fragment),C=y(),q=T("p"),q.textContent=x,pe=y(),u(V.$$.fragment),Q=y(),z=T("p"),z.innerHTML=Z,ie=y(),u(v.$$.fragment),S=y(),u(E.$$.fragment),U=y(),k=T("p"),k.innerHTML=Ot,De=y(),ce=T("ul"),ce.innerHTML=en,Le=y(),$e=T("p"),$e.innerHTML=tn,Ye=y(),u(N.$$.fragment),Fe=y(),fe=T("p"),fe.innerHTML=nn,Pe=y(),u(R.$$.fragment),Ke=y(),Me=T("p"),Me.innerHTML=sn,Oe=y(),u(G.$$.fragment),et=y(),me=T("p"),me.innerHTML=ln,tt=y(),u(H.$$.fragment),nt=y(),ye=T("p"),ye.innerHTML=an,st=y(),u(W.$$.fragment),lt=y(),ge=T("p"),ge.innerHTML=rn,at=y(),u(B.$$.fragment),rt=y(),Je=T("p"),Je.innerHTML=on,ot=y(),u(X.$$.fragment),ut=y(),u(de.$$.fragment),pt=y(),he=T("p"),he.innerHTML=un,it=y(),u(D.$$.fragment),ct=y(),Ue=T("p"),Ue.innerHTML=pn,$t=y(),Te=T("p"),Te.innerHTML=cn,ft=y(),u(L.$$.fragment),Mt=y(),we=T("p"),we.innerHTML=$n,mt=y(),u(Y.$$.fragment),yt=y(),je=T("p"),je.innerHTML=fn,gt=y(),u(F.$$.fragment),Jt=y(),ke=T("p"),ke.textContent=Mn,dt=y(),u(P.$$.fragment),ht=y(),u(_e.$$.fragment),Ut=y(),Ie=T("p"),Ie.innerHTML=mn,Tt=y(),be=T("p"),be.innerHTML=yn,wt=y(),u(K.$$.fragment),jt=y(),Ce=T("p"),Ce.textContent=gn,kt=y(),u(O.$$.fragment),_t=y(),qe=T("p"),qe.innerHTML=Jn,It=y(),Ve=T("p"),Ve.innerHTML=dn,bt=y(),Qe=T("p"),Qe.textContent=hn,Ct=y(),ze=T("p"),ze.textContent=Un,qt=y(),u(ee.$$.fragment),Vt=y(),ve=T("p"),ve.innerHTML=Tn,Qt=y(),u(te.$$.fragment),zt=y(),Se=T("p"),Se.textContent=wn,vt=y(),u(ne.$$.fragment),St=y(),Ee=T("p"),Ee.innerHTML=jn,Et=y(),u(xe.$$.fragment),xt=y(),Ze=T("p"),Ze.innerHTML=kn,Zt=y(),u(se.$$.fragment),At=y(),Ae=T("p"),Ae.innerHTML=_n,Nt=y(),Ne=T("p"),Ne.innerHTML=In,Rt=y(),u(le.$$.fragment),Gt=y(),Re=T("p"),Re.innerHTML=bn,Ht=y(),u(ae.$$.fragment),Wt=y(),Ge=T("p"),Ge.innerHTML=Cn,Bt=y(),u(re.$$.fragment),Xt=y(),He=T("p"),He.innerHTML=qn,Dt=y(),u(oe.$$.fragment),Lt=y(),u(We.$$.fragment),Yt=y(),u(ue.$$.fragment),Ft=y(),u(Be.$$.fragment),Pt=y(),Xe=T("p"),this.h()},l(l){const o=rs("svelte-u9bgzb",document.head);t=w(o,"META",{name:!0,content:!0}),o.forEach(M),n=g(l),e=w(l,"P",{}),es(e).forEach(M),s=g(l),p(r.$$.fragment,l),I=g(l),p(b.$$.fragment,l),C=g(l),q=w(l,"P",{"data-svelte-h":!0}),j(q)!=="svelte-vlsd60"&&(q.textContent=x),pe=g(l),p(V.$$.fragment,l),Q=g(l),z=w(l,"P",{"data-svelte-h":!0}),j(z)!=="svelte-19m2g44"&&(z.innerHTML=Z),ie=g(l),p(v.$$.fragment,l),S=g(l),p(E.$$.fragment,l),U=g(l),k=w(l,"P",{"data-svelte-h":!0}),j(k)!=="svelte-1ab5uuh"&&(k.innerHTML=Ot),De=g(l),ce=w(l,"UL",{"data-svelte-h":!0}),j(ce)!=="svelte-wkiawp"&&(ce.innerHTML=en),Le=g(l),$e=w(l,"P",{"data-svelte-h":!0}),j($e)!=="svelte-8jg8u0"&&($e.innerHTML=tn),Ye=g(l),p(N.$$.fragment,l),Fe=g(l),fe=w(l,"P",{"data-svelte-h":!0}),j(fe)!=="svelte-207jr3"&&(fe.innerHTML=nn),Pe=g(l),p(R.$$.fragment,l),Ke=g(l),Me=w(l,"P",{"data-svelte-h":!0}),j(Me)!=="svelte-1t3zo19"&&(Me.innerHTML=sn),Oe=g(l),p(G.$$.fragment,l),et=g(l),me=w(l,"P",{"data-svelte-h":!0}),j(me)!=="svelte-142dtc3"&&(me.innerHTML=ln),tt=g(l),p(H.$$.fragment,l),nt=g(l),ye=w(l,"P",{"data-svelte-h":!0}),j(ye)!=="svelte-1kti9us"&&(ye.innerHTML=an),st=g(l),p(W.$$.fragment,l),lt=g(l),ge=w(l,"P",{"data-svelte-h":!0}),j(ge)!=="svelte-ghl0s0"&&(ge.innerHTML=rn),at=g(l),p(B.$$.fragment,l),rt=g(l),Je=w(l,"P",{"data-svelte-h":!0}),j(Je)!=="svelte-18twg3e"&&(Je.innerHTML=on),ot=g(l),p(X.$$.fragment,l),ut=g(l),p(de.$$.fragment,l),pt=g(l),he=w(l,"P",{"data-svelte-h":!0}),j(he)!=="svelte-g8al98"&&(he.innerHTML=un),it=g(l),p(D.$$.fragment,l),ct=g(l),Ue=w(l,"P",{"data-svelte-h":!0}),j(Ue)!=="svelte-1eot2u1"&&(Ue.innerHTML=pn),$t=g(l),Te=w(l,"P",{"data-svelte-h":!0}),j(Te)!=="svelte-1me9v9l"&&(Te.innerHTML=cn),ft=g(l),p(L.$$.fragment,l),Mt=g(l),we=w(l,"P",{"data-svelte-h":!0}),j(we)!=="svelte-19hwcs8"&&(we.innerHTML=$n),mt=g(l),p(Y.$$.fragment,l),yt=g(l),je=w(l,"P",{"data-svelte-h":!0}),j(je)!=="svelte-oo2v9g"&&(je.innerHTML=fn),gt=g(l),p(F.$$.fragment,l),Jt=g(l),ke=w(l,"P",{"data-svelte-h":!0}),j(ke)!=="svelte-295h1u"&&(ke.textContent=Mn),dt=g(l),p(P.$$.fragment,l),ht=g(l),p(_e.$$.fragment,l),Ut=g(l),Ie=w(l,"P",{"data-svelte-h":!0}),j(Ie)!=="svelte-1lt0plb"&&(Ie.innerHTML=mn),Tt=g(l),be=w(l,"P",{"data-svelte-h":!0}),j(be)!=="svelte-1p6rfjx"&&(be.innerHTML=yn),wt=g(l),p(K.$$.fragment,l),jt=g(l),Ce=w(l,"P",{"data-svelte-h":!0}),j(Ce)!=="svelte-10p2x7x"&&(Ce.textContent=gn),kt=g(l),p(O.$$.fragment,l),_t=g(l),qe=w(l,"P",{"data-svelte-h":!0}),j(qe)!=="svelte-1654s0j"&&(qe.innerHTML=Jn),It=g(l),Ve=w(l,"P",{"data-svelte-h":!0}),j(Ve)!=="svelte-1he2eh8"&&(Ve.innerHTML=dn),bt=g(l),Qe=w(l,"P",{"data-svelte-h":!0}),j(Qe)!=="svelte-nbhw3c"&&(Qe.textContent=hn),Ct=g(l),ze=w(l,"P",{"data-svelte-h":!0}),j(ze)!=="svelte-bh0by8"&&(ze.textContent=Un),qt=g(l),p(ee.$$.fragment,l),Vt=g(l),ve=w(l,"P",{"data-svelte-h":!0}),j(ve)!=="svelte-zo3plm"&&(ve.innerHTML=Tn),Qt=g(l),p(te.$$.fragment,l),zt=g(l),Se=w(l,"P",{"data-svelte-h":!0}),j(Se)!=="svelte-14y4jd4"&&(Se.textContent=wn),vt=g(l),p(ne.$$.fragment,l),St=g(l),Ee=w(l,"P",{"data-svelte-h":!0}),j(Ee)!=="svelte-sq2861"&&(Ee.innerHTML=jn),Et=g(l),p(xe.$$.fragment,l),xt=g(l),Ze=w(l,"P",{"data-svelte-h":!0}),j(Ze)!=="svelte-1iibqnx"&&(Ze.innerHTML=kn),Zt=g(l),p(se.$$.fragment,l),At=g(l),Ae=w(l,"P",{"data-svelte-h":!0}),j(Ae)!=="svelte-ipm28y"&&(Ae.innerHTML=_n),Nt=g(l),Ne=w(l,"P",{"data-svelte-h":!0}),j(Ne)!=="svelte-3bcocl"&&(Ne.innerHTML=In),Rt=g(l),p(le.$$.fragment,l),Gt=g(l),Re=w(l,"P",{"data-svelte-h":!0}),j(Re)!=="svelte-136mrhv"&&(Re.innerHTML=bn),Ht=g(l),p(ae.$$.fragment,l),Wt=g(l),Ge=w(l,"P",{"data-svelte-h":!0}),j(Ge)!=="svelte-ljzkee"&&(Ge.innerHTML=Cn),Bt=g(l),p(re.$$.fragment,l),Xt=g(l),He=w(l,"P",{"data-svelte-h":!0}),j(He)!=="svelte-e366mm"&&(He.innerHTML=qn),Dt=g(l),p(oe.$$.fragment,l),Lt=g(l),p(We.$$.fragment,l),Yt=g(l),p(ue.$$.fragment,l),Ft=g(l),p(Be.$$.fragment,l),Pt=g(l),Xe=w(l,"P",{}),es(Xe).forEach(M),this.h()},h(){ts(t,"name","hf:doc:metadata"),ts(t,"content",za)},m(l,o){os(document.head,t),m(l,n,o),m(l,e,o),m(l,s,o),i(r,l,o),m(l,I,o),i(b,l,o),m(l,C,o),m(l,q,o),m(l,pe,o),i(V,l,o),m(l,Q,o),m(l,z,o),m(l,ie,o),i(v,l,o),m(l,S,o),i(E,l,o),m(l,U,o),m(l,k,o),m(l,De,o),m(l,ce,o),m(l,Le,o),m(l,$e,o),m(l,Ye,o),i(N,l,o),m(l,Fe,o),m(l,fe,o),m(l,Pe,o),i(R,l,o),m(l,Ke,o),m(l,Me,o),m(l,Oe,o),i(G,l,o),m(l,et,o),m(l,me,o),m(l,tt,o),i(H,l,o),m(l,nt,o),m(l,ye,o),m(l,st,o),i(W,l,o),m(l,lt,o),m(l,ge,o),m(l,at,o),i(B,l,o),m(l,rt,o),m(l,Je,o),m(l,ot,o),i(X,l,o),m(l,ut,o),i(de,l,o),m(l,pt,o),m(l,he,o),m(l,it,o),i(D,l,o),m(l,ct,o),m(l,Ue,o),m(l,$t,o),m(l,Te,o),m(l,ft,o),i(L,l,o),m(l,Mt,o),m(l,we,o),m(l,mt,o),i(Y,l,o),m(l,yt,o),m(l,je,o),m(l,gt,o),i(F,l,o),m(l,Jt,o),m(l,ke,o),m(l,dt,o),i(P,l,o),m(l,ht,o),i(_e,l,o),m(l,Ut,o),m(l,Ie,o),m(l,Tt,o),m(l,be,o),m(l,wt,o),i(K,l,o),m(l,jt,o),m(l,Ce,o),m(l,kt,o),i(O,l,o),m(l,_t,o),m(l,qe,o),m(l,It,o),m(l,Ve,o),m(l,bt,o),m(l,Qe,o),m(l,Ct,o),m(l,ze,o),m(l,qt,o),i(ee,l,o),m(l,Vt,o),m(l,ve,o),m(l,Qt,o),i(te,l,o),m(l,zt,o),m(l,Se,o),m(l,vt,o),i(ne,l,o),m(l,St,o),m(l,Ee,o),m(l,Et,o),i(xe,l,o),m(l,xt,o),m(l,Ze,o),m(l,Zt,o),i(se,l,o),m(l,At,o),m(l,Ae,o),m(l,Nt,o),m(l,Ne,o),m(l,Rt,o),i(le,l,o),m(l,Gt,o),m(l,Re,o),m(l,Ht,o),i(ae,l,o),m(l,Wt,o),m(l,Ge,o),m(l,Bt,o),i(re,l,o),m(l,Xt,o),m(l,He,o),m(l,Dt,o),i(oe,l,o),m(l,Lt,o),i(We,l,o),m(l,Yt,o),i(ue,l,o),m(l,Ft,o),i(Be,l,o),m(l,Pt,o),m(l,Xe,o),Kt=!0},p(l,[o]){const Vn={};o&2&&(Vn.$$scope={dirty:o,ctx:l}),N.$set(Vn);const Qn={};o&2&&(Qn.$$scope={dirty:o,ctx:l}),R.$set(Qn);const zn={};o&2&&(zn.$$scope={dirty:o,ctx:l}),G.$set(zn);const vn={};o&2&&(vn.$$scope={dirty:o,ctx:l}),H.$set(vn);const Sn={};o&2&&(Sn.$$scope={dirty:o,ctx:l}),W.$set(Sn);const En={};o&2&&(En.$$scope={dirty:o,ctx:l}),B.$set(En);const xn={};o&2&&(xn.$$scope={dirty:o,ctx:l}),X.$set(xn);const Zn={};o&2&&(Zn.$$scope={dirty:o,ctx:l}),D.$set(Zn);const An={};o&2&&(An.$$scope={dirty:o,ctx:l}),L.$set(An);const Nn={};o&2&&(Nn.$$scope={dirty:o,ctx:l}),Y.$set(Nn);const Rn={};o&2&&(Rn.$$scope={dirty:o,ctx:l}),F.$set(Rn);const Gn={};o&2&&(Gn.$$scope={dirty:o,ctx:l}),P.$set(Gn);const Hn={};o&2&&(Hn.$$scope={dirty:o,ctx:l}),K.$set(Hn);const Wn={};o&2&&(Wn.$$scope={dirty:o,ctx:l}),O.$set(Wn);const Bn={};o&2&&(Bn.$$scope={dirty:o,ctx:l}),ee.$set(Bn);const Xn={};o&2&&(Xn.$$scope={dirty:o,ctx:l}),te.$set(Xn);const Dn={};o&2&&(Dn.$$scope={dirty:o,ctx:l}),ne.$set(Dn);const Ln={};o&2&&(Ln.$$scope={dirty:o,ctx:l}),se.$set(Ln);const Yn={};o&2&&(Yn.$$scope={dirty:o,ctx:l}),le.$set(Yn);const Fn={};o&2&&(Fn.$$scope={dirty:o,ctx:l}),ae.$set(Fn);const Pn={};o&2&&(Pn.$$scope={dirty:o,ctx:l}),re.$set(Pn);const Kn={};o&2&&(Kn.$$scope={dirty:o,ctx:l}),oe.$set(Kn);const On={};o&2&&(On.$$scope={dirty:o,ctx:l}),ue.$set(On)},i(l){Kt||(c(r.$$.fragment,l),c(b.$$.fragment,l),c(V.$$.fragment,l),c(v.$$.fragment,l),c(E.$$.fragment,l),c(N.$$.fragment,l),c(R.$$.fragment,l),c(G.$$.fragment,l),c(H.$$.fragment,l),c(W.$$.fragment,l),c(B.$$.fragment,l),c(X.$$.fragment,l),c(de.$$.fragment,l),c(D.$$.fragment,l),c(L.$$.fragment,l),c(Y.$$.fragment,l),c(F.$$.fragment,l),c(P.$$.fragment,l),c(_e.$$.fragment,l),c(K.$$.fragment,l),c(O.$$.fragment,l),c(ee.$$.fragment,l),c(te.$$.fragment,l),c(ne.$$.fragment,l),c(xe.$$.fragment,l),c(se.$$.fragment,l),c(le.$$.fragment,l),c(ae.$$.fragment,l),c(re.$$.fragment,l),c(oe.$$.fragment,l),c(We.$$.fragment,l),c(ue.$$.fragment,l),c(Be.$$.fragment,l),Kt=!0)},o(l){$(r.$$.fragment,l),$(b.$$.fragment,l),$(V.$$.fragment,l),$(v.$$.fragment,l),$(E.$$.fragment,l),$(N.$$.fragment,l),$(R.$$.fragment,l),$(G.$$.fragment,l),$(H.$$.fragment,l),$(W.$$.fragment,l),$(B.$$.fragment,l),$(X.$$.fragment,l),$(de.$$.fragment,l),$(D.$$.fragment,l),$(L.$$.fragment,l),$(Y.$$.fragment,l),$(F.$$.fragment,l),$(P.$$.fragment,l),$(_e.$$.fragment,l),$(K.$$.fragment,l),$(O.$$.fragment,l),$(ee.$$.fragment,l),$(te.$$.fragment,l),$(ne.$$.fragment,l),$(xe.$$.fragment,l),$(se.$$.fragment,l),$(le.$$.fragment,l),$(ae.$$.fragment,l),$(re.$$.fragment,l),$(oe.$$.fragment,l),$(We.$$.fragment,l),$(ue.$$.fragment,l),$(Be.$$.fragment,l),Kt=!1},d(l){l&&(M(n),M(e),M(s),M(I),M(C),M(q),M(pe),M(Q),M(z),M(ie),M(S),M(U),M(k),M(De),M(ce),M(Le),M($e),M(Ye),M(Fe),M(fe),M(Pe),M(Ke),M(Me),M(Oe),M(et),M(me),M(tt),M(nt),M(ye),M(st),M(lt),M(ge),M(at),M(rt),M(Je),M(ot),M(ut),M(pt),M(he),M(it),M(ct),M(Ue),M($t),M(Te),M(ft),M(Mt),M(we),M(mt),M(yt),M(je),M(gt),M(Jt),M(ke),M(dt),M(ht),M(Ut),M(Ie),M(Tt),M(be),M(wt),M(jt),M(Ce),M(kt),M(_t),M(qe),M(It),M(Ve),M(bt),M(Qe),M(Ct),M(ze),M(qt),M(Vt),M(ve),M(Qt),M(zt),M(Se),M(vt),M(St),M(Ee),M(Et),M(xt),M(Ze),M(Zt),M(At),M(Ae),M(Nt),M(Ne),M(Rt),M(Gt),M(Re),M(Ht),M(Wt),M(Ge),M(Bt),M(Xt),M(He),M(Dt),M(Lt),M(Yt),M(Ft),M(Pt),M(Xe)),M(t),f(r,l),f(b,l),f(V,l),f(v,l),f(E,l),f(N,l),f(R,l),f(G,l),f(H,l),f(W,l),f(B,l),f(X,l),f(de,l),f(D,l),f(L,l),f(Y,l),f(F,l),f(P,l),f(_e,l),f(K,l),f(O,l),f(ee,l),f(te,l),f(ne,l),f(xe,l),f(se,l),f(le,l),f(ae,l),f(re,l),f(oe,l),f(We,l),f(ue,l),f(Be,l)}}}const za='{"title":"Quicktour","local":"quicktour","sections":[{"title":"Build a tokenizer from scratch","local":"build-a-tokenizer-from-scratch","sections":[{"title":"Training the tokenizer","local":"training-the-tokenizer","sections":[],"depth":3},{"title":"Using the tokenizer","local":"using-the-tokenizer","sections":[],"depth":3},{"title":"Post-processing","local":"post-processing","sections":[],"depth":3},{"title":"Encoding multiple sentences in a batch","local":"encoding-multiple-sentences-in-a-batch","sections":[],"depth":3}],"depth":2},{"title":"Pretrained","local":"pretrained","sections":[{"title":"Using a pretrained tokenizer","local":"using-a-pretrained-tokenizer","sections":[],"depth":3},{"title":"Importing a pretrained tokenizer from legacy vocabulary files","local":"importing-a-pretrained-tokenizer-from-legacy-vocabulary-files","sections":[],"depth":3}],"depth":2}],"depth":1}';function va(a){return ss(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ra extends ls{constructor(t){super(),as(this,t,va,Qa,ns,{})}}export{Ra as component}; | |
Xet Storage Details
- Size:
- 104 kB
- Xet hash:
- 463157cca2ab22a879ee50ef8251b3cf0e9c365e7770516916d2252985396121
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.