Buckets:

download
raw
71.5 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;tokenizers&quot;,&quot;local&quot;:&quot;tokenizers&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Classes&quot;,&quot;local&quot;:&quot;classes&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;AutoTokenizer&quot;,&quot;local&quot;:&quot;autotokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options)&quot;,&quot;local&quot;:&quot;autotokenizerfrompretrainedpretrainedmodelnameorpath-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PreTrainedTokenizer&quot;,&quot;local&quot;:&quot;pretrainedtokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;PreTrainedTokenizer(text, [options])&quot;,&quot;local&quot;:&quot;pretrainedtokenizertext-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.constructor(tokenizerJSON, tokenizerConfig)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerconstructortokenizerjson-tokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.convert_tokens_to_ids(tokens)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerconverttokenstoidstokens&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.tokenize(text, options)&quot;,&quot;local&quot;:&quot;pretrainedtokenizertokenizetext-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.encode(text, options)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetext-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.batch_decode(batch, decode_args)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerbatchdecodebatch-decodeargs&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.decode(token_ids, [decode_args])&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodetokenids-decodeargs&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.decode_single(token_ids, decode_args)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodesingletokenids-decodeargs&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.get_chat_template(options)&quot;,&quot;local&quot;:&quot;pretrainedtokenizergetchattemplateoptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.apply_chat_template(conversation, [options])&quot;,&quot;local&quot;:&quot;pretrainedtokenizerapplychattemplateconversation-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Type Definitions&quot;,&quot;local&quot;:&quot;type-definitions&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;PretrainedTokenizerOptions&quot;,&quot;local&quot;:&quot;pretrainedtokenizeroptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;TextContent&quot;,&quot;local&quot;:&quot;textcontent&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;ImageContent&quot;,&quot;local&quot;:&quot;imagecontent&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;MessageContent&quot;,&quot;local&quot;:&quot;messagecontent&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Message&quot;,&quot;local&quot;:&quot;message&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;BatchEncoding&quot;,&quot;local&quot;:&quot;batchencoding&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;TokenizerCallOptions&quot;,&quot;local&quot;:&quot;tokenizercalloptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;ApplyChatTemplateOptions&quot;,&quot;local&quot;:&quot;applychattemplateoptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Callbacks&quot;,&quot;local&quot;:&quot;callbacks&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;PreTrainedTokenizerCallback&quot;,&quot;local&quot;:&quot;pretrainedtokenizercallback&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers.js/pr_1665/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/entry/start.cb58eb6f.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/scheduler.6efaaf90.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/singletons.08239980.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/paths.611c3944.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/entry/app.9eafcf9d.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/preload-helper.4b821645.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/index.eb3e1f0f.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/nodes/0.d361a553.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/nodes/14.e2db6d4d.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.699f2734.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/CodeBlock.b303f0b9.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;tokenizers&quot;,&quot;local&quot;:&quot;tokenizers&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Classes&quot;,&quot;local&quot;:&quot;classes&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;AutoTokenizer&quot;,&quot;local&quot;:&quot;autotokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options)&quot;,&quot;local&quot;:&quot;autotokenizerfrompretrainedpretrainedmodelnameorpath-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PreTrainedTokenizer&quot;,&quot;local&quot;:&quot;pretrainedtokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;PreTrainedTokenizer(text, [options])&quot;,&quot;local&quot;:&quot;pretrainedtokenizertext-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.constructor(tokenizerJSON, tokenizerConfig)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerconstructortokenizerjson-tokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.convert_tokens_to_ids(tokens)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerconverttokenstoidstokens&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.tokenize(text, options)&quot;,&quot;local&quot;:&quot;pretrainedtokenizertokenizetext-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.encode(text, options)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetext-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.batch_decode(batch, decode_args)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerbatchdecodebatch-decodeargs&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.decode(token_ids, [decode_args])&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodetokenids-decodeargs&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.decode_single(token_ids, decode_args)&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodesingletokenids-decodeargs&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.get_chat_template(options)&quot;,&quot;local&quot;:&quot;pretrainedtokenizergetchattemplateoptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;PreTrainedTokenizer.apply_chat_template(conversation, [options])&quot;,&quot;local&quot;:&quot;pretrainedtokenizerapplychattemplateconversation-options&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Type Definitions&quot;,&quot;local&quot;:&quot;type-definitions&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;PretrainedTokenizerOptions&quot;,&quot;local&quot;:&quot;pretrainedtokenizeroptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;TextContent&quot;,&quot;local&quot;:&quot;textcontent&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;ImageContent&quot;,&quot;local&quot;:&quot;imagecontent&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;MessageContent&quot;,&quot;local&quot;:&quot;messagecontent&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Message&quot;,&quot;local&quot;:&quot;message&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;BatchEncoding&quot;,&quot;local&quot;:&quot;batchencoding&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;TokenizerCallOptions&quot;,&quot;local&quot;:&quot;tokenizercalloptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;ApplyChatTemplateOptions&quot;,&quot;local&quot;:&quot;applychattemplateoptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Callbacks&quot;,&quot;local&quot;:&quot;callbacks&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;PreTrainedTokenizerCallback&quot;,&quot;local&quot;:&quot;pretrainedtokenizercallback&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="tokenizers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers</span></h1> <p data-svelte-h="svelte-1i5hmha">Tokenizers turn text into the integer ids a model understands, and
decode model output back into strings. Use <code>AutoTokenizer.from_pretrained()</code>
to load the right implementation for a model ID — the class is chosen from
the tokenizer’s <code>tokenizer_config.json</code>.</p> <p data-svelte-h="svelte-ad986b">For chat-trained models, <code>tokenizer.apply_chat_template()</code> renders an
OpenAI-style message list into the model’s native prompt format.</p> <h2 class="relative group"><a id="classes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#classes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Classes</span></h2> <a id="module_tokenizers.AutoTokenizer"></a> <h3 class="relative group"><a id="autotokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#autotokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AutoTokenizer</span></h3> <p data-svelte-h="svelte-1hb1bdc">Helper class which is used to instantiate pretrained tokenizers with the <code>from_pretrained</code> function.
The chosen tokenizer class is determined by the type specified in the tokenizer config.</p> <p data-svelte-h="svelte-13a106p"><strong>Example:</strong> Create an <code>AutoTokenizer</code> and use it to tokenize a sentence.
This will automatically detect the tokenizer type based on the tokenizer class defined in <code>tokenizer_config.json</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-javascript "><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&#x27;@huggingface/transformers&#x27;</span>;
<span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">&#x27;Xenova/bert-base-uncased&#x27;</span>);
<span class="hljs-keyword">const</span> { input_ids } = <span class="hljs-keyword">await</span> <span class="hljs-title function_">tokenizer</span>(<span class="hljs-string">&#x27;I love transformers!&#x27;</span>);
<span class="hljs-comment">// Tensor {</span>
<span class="hljs-comment">// data: BigInt64Array(6) [101n, 1045n, 2293n, 19081n, 999n, 102n],</span>
<span class="hljs-comment">// dims: [1, 6],</span>
<span class="hljs-comment">// type: &#x27;int64&#x27;,</span>
<span class="hljs-comment">// size: 6,</span>
<span class="hljs-comment">// }</span><!-- HTML_TAG_END --></pre></div> <a id="module_tokenizers.AutoTokenizer.from_pretrained"></a> <h4 class="relative group"><a id="autotokenizerfrompretrainedpretrainedmodelnameorpath-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#autotokenizerfrompretrainedpretrainedmodelnameorpath-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options)</span></h4> <p data-svelte-h="svelte-s7y3dm">Instantiate one of the tokenizer classes of the library from a pretrained model.</p> <p data-svelte-h="svelte-weoe0h">The tokenizer class to instantiate is selected based on the <code>tokenizer_class</code> property of the config object
(either passed as an argument or loaded from <code>pretrained_model_name_or_path</code> if possible)</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1n0esz7"><li><code>pretrained_model_name_or_path</code> (<code>string</code>) — The name or path of the pretrained model. Can be either:
<ul><li>A string, the <em>model id</em> of a pretrained tokenizer hosted inside a model repo on huggingface.co.
Valid model ids can be located at the root-level, like <code>bert-base-uncased</code>, or namespaced under a
user or organization name, like <code>dbmdz/bert-base-german-cased</code>.</li> <li>A path to a <em>directory</em> containing tokenizer files, e.g., <code>./my_model_directory/</code>.</li></ul></li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.PretrainedTokenizerOptions"><code>PretrainedTokenizerOptions</code></a>) — Additional options for loading the tokenizer.</li></ul> <p data-svelte-h="svelte-1o8tkfk"><strong>Returns:</strong> <code>Promise</code>&lt;<a href="./tokenizers#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a>&gt; — The loaded tokenizer.</p> <a id="module_tokenizers.PreTrainedTokenizer"></a> <h3 class="relative group"><a id="pretrainedtokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer</span></h3> <p data-svelte-h="svelte-sqpc4i"><code>PreTrainedTokenizer</code> is the base class for all tokenizers in Transformers.js.</p> <a id="module_tokenizers.PreTrainedTokenizer.call"></a> <h4 class="relative group"><a id="pretrainedtokenizertext-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertext-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer(text, [options])</span></h4> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1r5rv57"><li><code>text</code> (<code>string[]?</code>)</li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.TokenizerCallOptions"><code>TokenizerCallOptions</code></a>&lt;<code>string[]?</code>, <code>boolean = true</code>&gt;) <em>optional</em></li></ul> <p data-svelte-h="svelte-1m4kvqj"><strong>Returns:</strong> <a href="./tokenizers#module_tokenizers.BatchEncoding"><code>BatchEncoding</code></a>&lt;<code>BatchEncodingItem</code>&lt;<code>string[]?</code>, <code>boolean = true</code>&gt;&gt;</p> <a id="module_tokenizers.PreTrainedTokenizer.constructor"></a> <h4 class="relative group"><a id="pretrainedtokenizerconstructortokenizerjson-tokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerconstructortokenizerjson-tokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.constructor(tokenizerJSON, tokenizerConfig)</span></h4> <p data-svelte-h="svelte-1vkx5qp">Create a new PreTrainedTokenizer instance.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-ccv9g4"><li><code>tokenizerJSON</code> (<code>Object</code>) — The JSON of the tokenizer.</li> <li><code>tokenizerConfig</code> (<code>Object</code>) — The config of the tokenizer.</li></ul> <a id="module_tokenizers.PreTrainedTokenizer.from_pretrained"></a> <h4 class="relative group"><a id="pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options)</span></h4> <p data-svelte-h="svelte-17bml36">Loads a pretrained tokenizer from the given <code>pretrained_model_name_or_path</code>.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-5qp8l2"><li><code>pretrained_model_name_or_path</code> (<code>string</code>) — The path to the pretrained tokenizer.</li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.PretrainedTokenizerOptions"><code>PretrainedTokenizerOptions</code></a>) — Additional options for loading the tokenizer.</li></ul> <p data-svelte-h="svelte-1qqeet5"><strong>Returns:</strong> <code>Promise</code>&lt;<a href="./tokenizers#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a>&gt; — A new instance of the <code>PreTrainedTokenizer</code> class.</p> <p data-svelte-h="svelte-sl4r8m"><strong>Throws</strong></p> <ul data-svelte-h="svelte-1l00lu1"><li><code>Error</code> — Throws an error if the tokenizer.json or tokenizer_config.json files are not found in the <code>pretrained_model_name_or_path</code>.</li></ul> <a id="module_tokenizers.PreTrainedTokenizer.convert_tokens_to_ids"></a> <h4 class="relative group"><a id="pretrainedtokenizerconverttokenstoidstokens" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerconverttokenstoidstokens"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.convert_tokens_to_ids(tokens)</span></h4> <p data-svelte-h="svelte-ae0mrq">Converts a token string (or a sequence of tokens) into a single integer id (or a sequence of ids), using the vocabulary.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1xqll1v"><li><code>tokens</code> (<code>string[]?</code>) — One or several token(s) to convert to token id(s).</li></ul> <p data-svelte-h="svelte-1adzu1v"><strong>Returns:</strong> <code>string[]?</code> — The token id or list of token ids.</p> <a id="module_tokenizers.PreTrainedTokenizer.tokenize"></a> <h4 class="relative group"><a id="pretrainedtokenizertokenizetext-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertokenizetext-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.tokenize(text, options)</span></h4> <p data-svelte-h="svelte-1tkmx0p">Converts a string into a sequence of tokens.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-wunemy"><li><code>text</code> (<code>string</code>) — The sequence to be encoded.</li> <li><code>options</code> (<code>Object</code>) — An optional object containing the following properties:
<ul><li><code>pair</code> (<code>string</code> | <code>null</code>) <em>optional</em> — A second sequence to be encoded with the first.</li> <li><code>add_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether or not to add the special tokens associated with the corresponding model.</li></ul></li></ul> <p data-svelte-h="svelte-1tobfvg"><strong>Returns:</strong> <code>string[]</code> — The list of tokens.</p> <a id="module_tokenizers.PreTrainedTokenizer.encode"></a> <h4 class="relative group"><a id="pretrainedtokenizerencodetext-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerencodetext-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.encode(text, options)</span></h4> <p data-svelte-h="svelte-sj7zk5">Encodes a single text or a pair of texts using the model’s tokenizer.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-3an34w"><li><code>text</code> (<code>string</code>) — The text to encode.</li> <li><code>options</code> (<code>Object</code>) — An optional object containing the following properties:
<ul><li><code>text_pair</code> (<code>string</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — The optional second text to encode.</li> <li><code>add_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>true</code> — Whether or not to add the special tokens associated with the corresponding model.</li> <li><code>return_token_type_ids</code> (<code>boolean</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Whether to return token_type_ids.</li></ul></li></ul> <p data-svelte-h="svelte-blb5y8"><strong>Returns:</strong> <code>number[]</code> — An array of token IDs representing the encoded text(s).</p> <a id="module_tokenizers.PreTrainedTokenizer.batch_decode"></a> <h4 class="relative group"><a id="pretrainedtokenizerbatchdecodebatch-decodeargs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerbatchdecodebatch-decodeargs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.batch_decode(batch, decode_args)</span></h4> <p data-svelte-h="svelte-y8f29l">Decode a batch of tokenized sequences.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-quebp8"><li><code>batch</code> (<code>number[][]</code> | <a href="./utils/tensor#module_utils/tensor.Tensor"><code>Tensor</code></a>) — List/Tensor of tokenized input sequences.</li> <li><code>decode_args</code> (<code>Object</code>) — (Optional) Object with decoding arguments.</li></ul> <p data-svelte-h="svelte-171rm6x"><strong>Returns:</strong> <code>string[]</code> — List of decoded sequences.</p> <a id="module_tokenizers.PreTrainedTokenizer.decode"></a> <h4 class="relative group"><a id="pretrainedtokenizerdecodetokenids-decodeargs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodetokenids-decodeargs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.decode(token_ids, [decode_args])</span></h4> <p data-svelte-h="svelte-168jmgj">Decodes a sequence of token IDs back to a string.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-174tohb"><li><code>token_ids</code> (<code>number[]</code> | <code>bigint[]</code> | <a href="./utils/tensor#module_utils/tensor.Tensor"><code>Tensor</code></a>) — List/Tensor of token IDs to decode.</li> <li><code>decode_args</code> (<code>Object</code>) <em>optional</em> — defaults to <code>{}</code> <ul><li><code>skip_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — If true, special tokens are removed from the output string.</li> <li><code>clean_up_tokenization_spaces</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>true</code> — If true, spaces before punctuation and abbreviated forms are removed.</li></ul></li></ul> <p data-svelte-h="svelte-1tc18iy"><strong>Returns:</strong> <code>string</code> — The decoded string.</p> <p data-svelte-h="svelte-sl4r8m"><strong>Throws</strong></p> <ul data-svelte-h="svelte-1kf4bd8"><li><code>Error</code> — If <code>token_ids</code> is not a non-empty array of integers.</li></ul> <a id="module_tokenizers.PreTrainedTokenizer.decode_single"></a> <h4 class="relative group"><a id="pretrainedtokenizerdecodesingletokenids-decodeargs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodesingletokenids-decodeargs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.decode_single(token_ids, decode_args)</span></h4> <p data-svelte-h="svelte-19e6tws">Decode a single list of token ids to a string.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1vyyepu"><li><code>token_ids</code> (<code>number[]</code> | <code>bigint[]</code>) — List of token ids to decode</li> <li><code>decode_args</code> (<code>Object</code>) — Optional arguments for decoding
<ul><li><code>skip_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether to skip special tokens during decoding</li> <li><code>clean_up_tokenization_spaces</code> (<code>boolean</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Whether to clean up tokenization spaces during decoding.
If null, the value is set to <code>this.decoder.cleanup</code> if it exists, falling back to <code>this.clean_up_tokenization_spaces</code> if it exists, falling back to <code>true</code>.</li></ul></li></ul> <p data-svelte-h="svelte-o0b1jc"><strong>Returns:</strong> <code>string</code> — The decoded string</p> <a id="module_tokenizers.PreTrainedTokenizer.get_chat_template"></a> <h4 class="relative group"><a id="pretrainedtokenizergetchattemplateoptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizergetchattemplateoptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.get_chat_template(options)</span></h4> <p data-svelte-h="svelte-1hrpjri">Retrieve the chat template string used for tokenizing chat messages. This template is used
internally by the <code>apply_chat_template</code> method and can also be used externally to retrieve the model’s chat
template for better generation tracking.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-rom54w"><li><code>options</code> (<code>Object</code>) — An optional object containing the following properties:
<ul><li><code>chat_template</code> (<code>string</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — A Jinja template or the name of a template to use for this conversion.
It is usually not necessary to pass anything to this argument,
as the model’s template will be used by default.</li> <li><code>tools</code> (<code>Object[]</code>) <em>optional</em> — defaults to <code>null</code> — A list of tools (callable functions) that will be accessible to the model. If the template does not
support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
giving the name, description and argument types for the tool. See our
<a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use" rel="nofollow">chat templating guide</a>
for more information.</li></ul></li></ul> <p data-svelte-h="svelte-1yeaouc"><strong>Returns:</strong> <code>string</code> — The chat template string.</p> <a id="module_tokenizers.PreTrainedTokenizer.apply_chat_template"></a> <h4 class="relative group"><a id="pretrainedtokenizerapplychattemplateconversation-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerapplychattemplateconversation-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.apply_chat_template(conversation, [options])</span></h4> <p data-svelte-h="svelte-qfc73q">Converts a list of message objects with <code>&quot;role&quot;</code> and <code>&quot;content&quot;</code> keys to a list of token
ids. This method is intended for use with chat models, and will read the tokenizer’s chat_template attribute to
determine the format and control tokens to use when converting.</p> <p data-svelte-h="svelte-f86ycd">See the <a href="https://huggingface.co/docs/transformers/chat_templating" rel="nofollow">chat templating guide</a> for more information.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-g6dc3z"><li><code>conversation</code> (<a href="./tokenizers#module_tokenizers.Message"><code>Message</code></a>[]) — A list of message objects with <code>&quot;role&quot;</code> and <code>&quot;content&quot;</code> keys,
representing the chat history so far.</li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.ApplyChatTemplateOptions"><code>ApplyChatTemplateOptions</code></a>&lt;<code>TTokenize</code>, <code>TReturnTensor</code>, <code>TReturnDict</code>&gt;) <em>optional</em> — Options controlling
template rendering and tokenization.</li></ul> <p data-svelte-h="svelte-1boylzr"><strong>Returns:</strong> <code>ApplyChatTemplateReturn</code>&lt;<code>TTokenize</code>, <code>TReturnTensor</code>, <code>TReturnDict</code>&gt; — The tokenized output.</p> <p data-svelte-h="svelte-h5rmtt"><strong>Example:</strong> Applying a chat template to a conversation.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-javascript "><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/transformers&quot;</span>;
<span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">&quot;Xenova/mistral-tokenizer-v1&quot;</span>);
<span class="hljs-keyword">const</span> chat = [
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Hello, how are you?&quot;</span> },
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;m doing great. How can I help you today?&quot;</span> },
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;d like to show off how chat templating works!&quot;</span> },
]
<span class="hljs-keyword">const</span> text = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">false</span> });
<span class="hljs-comment">// &quot;&lt;s&gt;[INST] Hello, how are you? [/INST]I&#x27;m doing great. How can I help you today?&lt;/s&gt; [INST] I&#x27;d like to show off how chat templating works! [/INST]&quot;</span>
<span class="hljs-keyword">const</span> input_ids = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">true</span>, <span class="hljs-attr">return_tensor</span>: <span class="hljs-literal">false</span> });
<span class="hljs-comment">// [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="type-definitions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#type-definitions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Type Definitions</span></h2> <a id="module_tokenizers.PretrainedTokenizerOptions"></a> <h3 class="relative group"><a id="pretrainedtokenizeroptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizeroptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PretrainedTokenizerOptions</span></h3> <p data-svelte-h="svelte-2hx2a2"><em>Type:</em> <a href="./utils/hub#module_utils/hub.PretrainedOptions"><code>PretrainedOptions</code></a></p> <a id="module_tokenizers.TextContent"></a> <h3 class="relative group"><a id="textcontent" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#textcontent"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TextContent</span></h3> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-1dnoeta"><li><code>type</code> (<code>&#39;text&#39;</code>) — The type of content (must be ‘text’).</li> <li><code>text</code> (<code>string</code>) — The text content.</li></ul> <a id="module_tokenizers.ImageContent"></a> <h3 class="relative group"><a id="imagecontent" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#imagecontent"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ImageContent</span></h3> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-pt5k9r"><li><p><code>type</code> (<code>&#39;image&#39;</code>) — The type of content (must be ‘image’).</p></li> <li><p><code>image</code> (<code>string</code> | <a href="./utils/image#module_utils/image.RawImage"><code>RawImage</code></a>) <em>optional</em> — Optional URL or instance of the image.</p> <p>Note: This works for SmolVLM. Qwen2VL and Idefics3 have different implementations.</p></li></ul> <a id="module_tokenizers.MessageContent"></a> <h3 class="relative group"><a id="messagecontent" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#messagecontent"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>MessageContent</span></h3> <p data-svelte-h="svelte-1wh75qd">A single content block inside a chat message. Extend the union to add
custom types (e.g. <code>AudioContent</code>) when targeting a specific model.</p> <p data-svelte-h="svelte-czyt9z"><em>Type:</em> <a href="./tokenizers#module_tokenizers.TextContent"><code>TextContent</code></a> | <a href="./tokenizers#module_tokenizers.ImageContent"><code>ImageContent</code></a> | <code>{ type: string &amp; {}, [key: string]: any }</code></p> <a id="module_tokenizers.Message"></a> <h3 class="relative group"><a id="message" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#message"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Message</span></h3> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-1ypufej"><li><code>role</code> (<code>&#39;user&#39;</code> | <code>&#39;assistant&#39;</code> | <code>&#39;system&#39;</code> | <code>(string &amp; {})</code>) — The role of the message.</li> <li><code>content</code> (<code>string</code> | <a href="./tokenizers#module_tokenizers.MessageContent"><code>MessageContent</code></a>[]) — The content of the message. Can be a simple string or an array of content objects.</li></ul> <a id="module_tokenizers.BatchEncoding"></a> <h3 class="relative group"><a id="batchencoding" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#batchencoding"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>BatchEncoding</span></h3> <p data-svelte-h="svelte-p6gi56">The object returned from <code>tokenizer(text)</code>. The fields are a <code>Tensor</code> by
default, or an <code>Array</code> when <code>return_tensor: false</code> is passed.</p> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-1hpqw4d"><li><code>input_ids</code> (<code>any</code>) — Token ids to be fed to the model.</li> <li><code>attention_mask</code> (<code>any</code>) — Mask indicating which tokens should be attended to (1) versus padded (0).</li> <li><code>token_type_ids</code> (<code>any</code>) <em>optional</em> — Segment ids, present only for tokenizers that distinguish sequence A vs B (e.g. BERT).</li></ul> <a id="module_tokenizers.TokenizerCallOptions"></a> <h3 class="relative group"><a id="tokenizercalloptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizercalloptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TokenizerCallOptions</span></h3> <p data-svelte-h="svelte-ez27i7">Options passed to <code>tokenizer(text, options)</code>.</p> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-l9568l"><li><code>text_pair</code> (<code>any</code>) <em>optional</em> — defaults to <code>null</code> — Optional second sequence to be encoded. Must match the shape of <code>text</code> — string when <code>text</code> is a string, array when <code>text</code> is an array.</li> <li><code>padding</code> (<code>boolean</code> | <code>&#39;max_length&#39;</code>) <em>optional</em> — defaults to <code>false</code> — Whether to pad the input sequences.</li> <li><code>add_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>true</code> — Whether or not to add the special tokens associated with the corresponding model.</li> <li><code>truncation</code> (<code>boolean</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Whether to truncate the input sequences.</li> <li><code>max_length</code> (<code>number</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Maximum length of the returned list and optionally padding length.</li> <li><code>return_tensor</code> (<code>any</code>) <em>optional</em> — defaults to <code>true</code> — Whether to return the results as Tensors or arrays.</li> <li><code>return_token_type_ids</code> (<code>boolean</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Whether to return the token type ids.</li></ul> <a id="module_tokenizers.ApplyChatTemplateOptions"></a> <h3 class="relative group"><a id="applychattemplateoptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#applychattemplateoptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ApplyChatTemplateOptions</span></h3> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-53g97i"><li><code>chat_template</code> (<code>string</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — A Jinja template to use for this conversion. If omitted, the model’s chat template is used.</li> <li><code>tools</code> (<code>Object[]</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — JSON Schema tool definitions exposed to templates that support function calling.
See the <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use" rel="nofollow">chat templating guide</a>.</li> <li><code>documents</code> (<code>Record</code>&lt;<code>string</code>, <code>string</code>&gt;[] | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Documents exposed to templates that support retrieval-augmented generation.
See the <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#arguments-for-RAG" rel="nofollow">RAG section</a> of the chat templating guide.</li> <li><code>add_generation_prompt</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether to end the prompt with the token(s) that indicate the start of an assistant message.
The template must support this argument for it to have any effect.</li> <li><code>tokenize</code> (<code>any</code>) <em>optional</em> — defaults to <code>true</code> — Whether to tokenize the output. If false, the output will be a string.</li> <li><code>padding</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether to pad sequences to the maximum length. Has no effect if tokenize is false.</li> <li><code>truncation</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.</li> <li><code>max_length</code> (<code>number</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Maximum length (in tokens) to use for padding or truncation. If omitted, the tokenizer’s <code>max_length</code> is used.
Has no effect if tokenize is false.</li> <li><code>return_tensor</code> (<code>any</code>) <em>optional</em> — defaults to <code>true</code> — Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.</li> <li><code>return_dict</code> (<code>any</code>) <em>optional</em> — defaults to <code>true</code> — Whether to return a dictionary with named outputs. Has no effect if tokenize is false.</li> <li><code>tokenizer_kwargs</code> (<code>Object</code>) <em>optional</em> — defaults to <code>{}</code> — Additional options to pass to the tokenizer.</li></ul> <h2 class="relative group"><a id="callbacks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#callbacks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Callbacks</span></h2> <a id="module_tokenizers.PreTrainedTokenizerCallback"></a> <h3 class="relative group"><a id="pretrainedtokenizercallback" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizercallback"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizerCallback</span></h3> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1r5rv57"><li><code>text</code> (<code>string[]?</code>)</li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.TokenizerCallOptions"><code>TokenizerCallOptions</code></a>&lt;<code>string[]?</code>, <code>boolean = true</code>&gt;) <em>optional</em></li></ul> <p data-svelte-h="svelte-1m4kvqj"><strong>Returns:</strong> <a href="./tokenizers#module_tokenizers.BatchEncoding"><code>BatchEncoding</code></a>&lt;<code>BatchEncodingItem</code>&lt;<code>string[]?</code>, <code>boolean = true</code>&gt;&gt;</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers.js/blob/main/packages/transformers/docs/source/api/tokenizers.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1t06csc = {
assets: "/docs/transformers.js/pr_1665/en",
base: "/docs/transformers.js/pr_1665/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers.js/pr_1665/en/_app/immutable/entry/start.cb58eb6f.js"),
import("/docs/transformers.js/pr_1665/en/_app/immutable/entry/app.9eafcf9d.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 14],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
71.5 kB
·
Xet hash:
5ef53cef30bf57b866c3818ba5a98785e93a0873fb9f1bab5c390ec86bc52f3b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.