Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"tokenizers","local":"tokenizers","sections":[{"title":"Classes","local":"classes","sections":[{"title":"AutoTokenizer","local":"autotokenizer","sections":[{"title":"AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options)","local":"autotokenizerfrompretrainedpretrainedmodelnameorpath-options","sections":[],"depth":4}],"depth":3},{"title":"PreTrainedTokenizer","local":"pretrainedtokenizer","sections":[{"title":"PreTrainedTokenizer(text, [options])","local":"pretrainedtokenizertext-options","sections":[],"depth":4},{"title":"PreTrainedTokenizer.constructor(tokenizerJSON, tokenizerConfig)","local":"pretrainedtokenizerconstructortokenizerjson-tokenizerconfig","sections":[],"depth":4},{"title":"PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options)","local":"pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options","sections":[],"depth":4},{"title":"PreTrainedTokenizer.convert_tokens_to_ids(tokens)","local":"pretrainedtokenizerconverttokenstoidstokens","sections":[],"depth":4},{"title":"PreTrainedTokenizer.tokenize(text, options)","local":"pretrainedtokenizertokenizetext-options","sections":[],"depth":4},{"title":"PreTrainedTokenizer.encode(text, options)","local":"pretrainedtokenizerencodetext-options","sections":[],"depth":4},{"title":"PreTrainedTokenizer.batch_decode(batch, decode_args)","local":"pretrainedtokenizerbatchdecodebatch-decodeargs","sections":[],"depth":4},{"title":"PreTrainedTokenizer.decode(token_ids, [decode_args])","local":"pretrainedtokenizerdecodetokenids-decodeargs","sections":[],"depth":4},{"title":"PreTrainedTokenizer.decode_single(token_ids, decode_args)","local":"pretrainedtokenizerdecodesingletokenids-decodeargs","sections":[],"depth":4},{"title":"PreTrainedTokenizer.get_chat_template(options)","local":"pretrainedtokenizergetchattemplateoptions","sections":[],"depth":4},{"title":"PreTrainedTokenizer.apply_chat_template(conversation, [options])","local":"pretrainedtokenizerapplychattemplateconversation-options","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Type Definitions","local":"type-definitions","sections":[{"title":"PretrainedTokenizerOptions","local":"pretrainedtokenizeroptions","sections":[],"depth":3},{"title":"TextContent","local":"textcontent","sections":[],"depth":3},{"title":"ImageContent","local":"imagecontent","sections":[],"depth":3},{"title":"MessageContent","local":"messagecontent","sections":[],"depth":3},{"title":"Message","local":"message","sections":[],"depth":3},{"title":"BatchEncoding","local":"batchencoding","sections":[],"depth":3},{"title":"TokenizerCallOptions","local":"tokenizercalloptions","sections":[],"depth":3},{"title":"ApplyChatTemplateOptions","local":"applychattemplateoptions","sections":[],"depth":3}],"depth":2},{"title":"Callbacks","local":"callbacks","sections":[{"title":"PreTrainedTokenizerCallback","local":"pretrainedtokenizercallback","sections":[],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers.js/pr_1665/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/entry/start.cb58eb6f.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/scheduler.6efaaf90.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/singletons.08239980.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/paths.611c3944.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/entry/app.9eafcf9d.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/preload-helper.4b821645.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/index.eb3e1f0f.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/nodes/0.d361a553.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/nodes/14.e2db6d4d.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.699f2734.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1665/en/_app/immutable/chunks/CodeBlock.b303f0b9.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"tokenizers","local":"tokenizers","sections":[{"title":"Classes","local":"classes","sections":[{"title":"AutoTokenizer","local":"autotokenizer","sections":[{"title":"AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options)","local":"autotokenizerfrompretrainedpretrainedmodelnameorpath-options","sections":[],"depth":4}],"depth":3},{"title":"PreTrainedTokenizer","local":"pretrainedtokenizer","sections":[{"title":"PreTrainedTokenizer(text, [options])","local":"pretrainedtokenizertext-options","sections":[],"depth":4},{"title":"PreTrainedTokenizer.constructor(tokenizerJSON, tokenizerConfig)","local":"pretrainedtokenizerconstructortokenizerjson-tokenizerconfig","sections":[],"depth":4},{"title":"PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options)","local":"pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options","sections":[],"depth":4},{"title":"PreTrainedTokenizer.convert_tokens_to_ids(tokens)","local":"pretrainedtokenizerconverttokenstoidstokens","sections":[],"depth":4},{"title":"PreTrainedTokenizer.tokenize(text, options)","local":"pretrainedtokenizertokenizetext-options","sections":[],"depth":4},{"title":"PreTrainedTokenizer.encode(text, options)","local":"pretrainedtokenizerencodetext-options","sections":[],"depth":4},{"title":"PreTrainedTokenizer.batch_decode(batch, decode_args)","local":"pretrainedtokenizerbatchdecodebatch-decodeargs","sections":[],"depth":4},{"title":"PreTrainedTokenizer.decode(token_ids, [decode_args])","local":"pretrainedtokenizerdecodetokenids-decodeargs","sections":[],"depth":4},{"title":"PreTrainedTokenizer.decode_single(token_ids, decode_args)","local":"pretrainedtokenizerdecodesingletokenids-decodeargs","sections":[],"depth":4},{"title":"PreTrainedTokenizer.get_chat_template(options)","local":"pretrainedtokenizergetchattemplateoptions","sections":[],"depth":4},{"title":"PreTrainedTokenizer.apply_chat_template(conversation, [options])","local":"pretrainedtokenizerapplychattemplateconversation-options","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Type Definitions","local":"type-definitions","sections":[{"title":"PretrainedTokenizerOptions","local":"pretrainedtokenizeroptions","sections":[],"depth":3},{"title":"TextContent","local":"textcontent","sections":[],"depth":3},{"title":"ImageContent","local":"imagecontent","sections":[],"depth":3},{"title":"MessageContent","local":"messagecontent","sections":[],"depth":3},{"title":"Message","local":"message","sections":[],"depth":3},{"title":"BatchEncoding","local":"batchencoding","sections":[],"depth":3},{"title":"TokenizerCallOptions","local":"tokenizercalloptions","sections":[],"depth":3},{"title":"ApplyChatTemplateOptions","local":"applychattemplateoptions","sections":[],"depth":3}],"depth":2},{"title":"Callbacks","local":"callbacks","sections":[{"title":"PreTrainedTokenizerCallback","local":"pretrainedtokenizercallback","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="tokenizers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers</span></h1> <p data-svelte-h="svelte-1i5hmha">Tokenizers turn text into the integer ids a model understands, and | |
| decode model output back into strings. Use <code>AutoTokenizer.from_pretrained()</code> | |
| to load the right implementation for a model ID — the class is chosen from | |
| the tokenizer’s <code>tokenizer_config.json</code>.</p> <p data-svelte-h="svelte-ad986b">For chat-trained models, <code>tokenizer.apply_chat_template()</code> renders an | |
| OpenAI-style message list into the model’s native prompt format.</p> <h2 class="relative group"><a id="classes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#classes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Classes</span></h2> <a id="module_tokenizers.AutoTokenizer"></a> <h3 class="relative group"><a id="autotokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#autotokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AutoTokenizer</span></h3> <p data-svelte-h="svelte-1hb1bdc">Helper class which is used to instantiate pretrained tokenizers with the <code>from_pretrained</code> function. | |
| The chosen tokenizer class is determined by the type specified in the tokenizer config.</p> <p data-svelte-h="svelte-13a106p"><strong>Example:</strong> Create an <code>AutoTokenizer</code> and use it to tokenize a sentence. | |
| This will automatically detect the tokenizer type based on the tokenizer class defined in <code>tokenizer_config.json</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-javascript "><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">'@huggingface/transformers'</span>; | |
| <span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">'Xenova/bert-base-uncased'</span>); | |
| <span class="hljs-keyword">const</span> { input_ids } = <span class="hljs-keyword">await</span> <span class="hljs-title function_">tokenizer</span>(<span class="hljs-string">'I love transformers!'</span>); | |
| <span class="hljs-comment">// Tensor {</span> | |
| <span class="hljs-comment">// data: BigInt64Array(6) [101n, 1045n, 2293n, 19081n, 999n, 102n],</span> | |
| <span class="hljs-comment">// dims: [1, 6],</span> | |
| <span class="hljs-comment">// type: 'int64',</span> | |
| <span class="hljs-comment">// size: 6,</span> | |
| <span class="hljs-comment">// }</span><!-- HTML_TAG_END --></pre></div> <a id="module_tokenizers.AutoTokenizer.from_pretrained"></a> <h4 class="relative group"><a id="autotokenizerfrompretrainedpretrainedmodelnameorpath-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#autotokenizerfrompretrainedpretrainedmodelnameorpath-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options)</span></h4> <p data-svelte-h="svelte-s7y3dm">Instantiate one of the tokenizer classes of the library from a pretrained model.</p> <p data-svelte-h="svelte-weoe0h">The tokenizer class to instantiate is selected based on the <code>tokenizer_class</code> property of the config object | |
| (either passed as an argument or loaded from <code>pretrained_model_name_or_path</code> if possible)</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1n0esz7"><li><code>pretrained_model_name_or_path</code> (<code>string</code>) — The name or path of the pretrained model. Can be either: | |
| <ul><li>A string, the <em>model id</em> of a pretrained tokenizer hosted inside a model repo on huggingface.co. | |
| Valid model ids can be located at the root-level, like <code>bert-base-uncased</code>, or namespaced under a | |
| user or organization name, like <code>dbmdz/bert-base-german-cased</code>.</li> <li>A path to a <em>directory</em> containing tokenizer files, e.g., <code>./my_model_directory/</code>.</li></ul></li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.PretrainedTokenizerOptions"><code>PretrainedTokenizerOptions</code></a>) — Additional options for loading the tokenizer.</li></ul> <p data-svelte-h="svelte-1o8tkfk"><strong>Returns:</strong> <code>Promise</code><<a href="./tokenizers#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a>> — The loaded tokenizer.</p> <a id="module_tokenizers.PreTrainedTokenizer"></a> <h3 class="relative group"><a id="pretrainedtokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer</span></h3> <p data-svelte-h="svelte-sqpc4i"><code>PreTrainedTokenizer</code> is the base class for all tokenizers in Transformers.js.</p> <a id="module_tokenizers.PreTrainedTokenizer.call"></a> <h4 class="relative group"><a id="pretrainedtokenizertext-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertext-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer(text, [options])</span></h4> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1r5rv57"><li><code>text</code> (<code>string[]?</code>)</li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.TokenizerCallOptions"><code>TokenizerCallOptions</code></a><<code>string[]?</code>, <code>boolean = true</code>>) <em>optional</em></li></ul> <p data-svelte-h="svelte-1m4kvqj"><strong>Returns:</strong> <a href="./tokenizers#module_tokenizers.BatchEncoding"><code>BatchEncoding</code></a><<code>BatchEncodingItem</code><<code>string[]?</code>, <code>boolean = true</code>>></p> <a id="module_tokenizers.PreTrainedTokenizer.constructor"></a> <h4 class="relative group"><a id="pretrainedtokenizerconstructortokenizerjson-tokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerconstructortokenizerjson-tokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.constructor(tokenizerJSON, tokenizerConfig)</span></h4> <p data-svelte-h="svelte-1vkx5qp">Create a new PreTrainedTokenizer instance.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-ccv9g4"><li><code>tokenizerJSON</code> (<code>Object</code>) — The JSON of the tokenizer.</li> <li><code>tokenizerConfig</code> (<code>Object</code>) — The config of the tokenizer.</li></ul> <a id="module_tokenizers.PreTrainedTokenizer.from_pretrained"></a> <h4 class="relative group"><a id="pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options)</span></h4> <p data-svelte-h="svelte-17bml36">Loads a pretrained tokenizer from the given <code>pretrained_model_name_or_path</code>.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-5qp8l2"><li><code>pretrained_model_name_or_path</code> (<code>string</code>) — The path to the pretrained tokenizer.</li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.PretrainedTokenizerOptions"><code>PretrainedTokenizerOptions</code></a>) — Additional options for loading the tokenizer.</li></ul> <p data-svelte-h="svelte-1qqeet5"><strong>Returns:</strong> <code>Promise</code><<a href="./tokenizers#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a>> — A new instance of the <code>PreTrainedTokenizer</code> class.</p> <p data-svelte-h="svelte-sl4r8m"><strong>Throws</strong></p> <ul data-svelte-h="svelte-1l00lu1"><li><code>Error</code> — Throws an error if the tokenizer.json or tokenizer_config.json files are not found in the <code>pretrained_model_name_or_path</code>.</li></ul> <a id="module_tokenizers.PreTrainedTokenizer.convert_tokens_to_ids"></a> <h4 class="relative group"><a id="pretrainedtokenizerconverttokenstoidstokens" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerconverttokenstoidstokens"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.convert_tokens_to_ids(tokens)</span></h4> <p data-svelte-h="svelte-ae0mrq">Converts a token string (or a sequence of tokens) into a single integer id (or a sequence of ids), using the vocabulary.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1xqll1v"><li><code>tokens</code> (<code>string[]?</code>) — One or several token(s) to convert to token id(s).</li></ul> <p data-svelte-h="svelte-1adzu1v"><strong>Returns:</strong> <code>string[]?</code> — The token id or list of token ids.</p> <a id="module_tokenizers.PreTrainedTokenizer.tokenize"></a> <h4 class="relative group"><a id="pretrainedtokenizertokenizetext-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertokenizetext-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.tokenize(text, options)</span></h4> <p data-svelte-h="svelte-1tkmx0p">Converts a string into a sequence of tokens.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-wunemy"><li><code>text</code> (<code>string</code>) — The sequence to be encoded.</li> <li><code>options</code> (<code>Object</code>) — An optional object containing the following properties: | |
| <ul><li><code>pair</code> (<code>string</code> | <code>null</code>) <em>optional</em> — A second sequence to be encoded with the first.</li> <li><code>add_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether or not to add the special tokens associated with the corresponding model.</li></ul></li></ul> <p data-svelte-h="svelte-1tobfvg"><strong>Returns:</strong> <code>string[]</code> — The list of tokens.</p> <a id="module_tokenizers.PreTrainedTokenizer.encode"></a> <h4 class="relative group"><a id="pretrainedtokenizerencodetext-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerencodetext-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.encode(text, options)</span></h4> <p data-svelte-h="svelte-sj7zk5">Encodes a single text or a pair of texts using the model’s tokenizer.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-3an34w"><li><code>text</code> (<code>string</code>) — The text to encode.</li> <li><code>options</code> (<code>Object</code>) — An optional object containing the following properties: | |
| <ul><li><code>text_pair</code> (<code>string</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — The optional second text to encode.</li> <li><code>add_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>true</code> — Whether or not to add the special tokens associated with the corresponding model.</li> <li><code>return_token_type_ids</code> (<code>boolean</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Whether to return token_type_ids.</li></ul></li></ul> <p data-svelte-h="svelte-blb5y8"><strong>Returns:</strong> <code>number[]</code> — An array of token IDs representing the encoded text(s).</p> <a id="module_tokenizers.PreTrainedTokenizer.batch_decode"></a> <h4 class="relative group"><a id="pretrainedtokenizerbatchdecodebatch-decodeargs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerbatchdecodebatch-decodeargs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.batch_decode(batch, decode_args)</span></h4> <p data-svelte-h="svelte-y8f29l">Decode a batch of tokenized sequences.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-quebp8"><li><code>batch</code> (<code>number[][]</code> | <a href="./utils/tensor#module_utils/tensor.Tensor"><code>Tensor</code></a>) — List/Tensor of tokenized input sequences.</li> <li><code>decode_args</code> (<code>Object</code>) — (Optional) Object with decoding arguments.</li></ul> <p data-svelte-h="svelte-171rm6x"><strong>Returns:</strong> <code>string[]</code> — List of decoded sequences.</p> <a id="module_tokenizers.PreTrainedTokenizer.decode"></a> <h4 class="relative group"><a id="pretrainedtokenizerdecodetokenids-decodeargs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodetokenids-decodeargs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.decode(token_ids, [decode_args])</span></h4> <p data-svelte-h="svelte-168jmgj">Decodes a sequence of token IDs back to a string.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-174tohb"><li><code>token_ids</code> (<code>number[]</code> | <code>bigint[]</code> | <a href="./utils/tensor#module_utils/tensor.Tensor"><code>Tensor</code></a>) — List/Tensor of token IDs to decode.</li> <li><code>decode_args</code> (<code>Object</code>) <em>optional</em> — defaults to <code>{}</code> <ul><li><code>skip_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — If true, special tokens are removed from the output string.</li> <li><code>clean_up_tokenization_spaces</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>true</code> — If true, spaces before punctuation and abbreviated forms are removed.</li></ul></li></ul> <p data-svelte-h="svelte-1tc18iy"><strong>Returns:</strong> <code>string</code> — The decoded string.</p> <p data-svelte-h="svelte-sl4r8m"><strong>Throws</strong></p> <ul data-svelte-h="svelte-1kf4bd8"><li><code>Error</code> — If <code>token_ids</code> is not a non-empty array of integers.</li></ul> <a id="module_tokenizers.PreTrainedTokenizer.decode_single"></a> <h4 class="relative group"><a id="pretrainedtokenizerdecodesingletokenids-decodeargs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodesingletokenids-decodeargs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.decode_single(token_ids, decode_args)</span></h4> <p data-svelte-h="svelte-19e6tws">Decode a single list of token ids to a string.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1vyyepu"><li><code>token_ids</code> (<code>number[]</code> | <code>bigint[]</code>) — List of token ids to decode</li> <li><code>decode_args</code> (<code>Object</code>) — Optional arguments for decoding | |
| <ul><li><code>skip_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether to skip special tokens during decoding</li> <li><code>clean_up_tokenization_spaces</code> (<code>boolean</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Whether to clean up tokenization spaces during decoding. | |
| If null, the value is set to <code>this.decoder.cleanup</code> if it exists, falling back to <code>this.clean_up_tokenization_spaces</code> if it exists, falling back to <code>true</code>.</li></ul></li></ul> <p data-svelte-h="svelte-o0b1jc"><strong>Returns:</strong> <code>string</code> — The decoded string</p> <a id="module_tokenizers.PreTrainedTokenizer.get_chat_template"></a> <h4 class="relative group"><a id="pretrainedtokenizergetchattemplateoptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizergetchattemplateoptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.get_chat_template(options)</span></h4> <p data-svelte-h="svelte-1hrpjri">Retrieve the chat template string used for tokenizing chat messages. This template is used | |
| internally by the <code>apply_chat_template</code> method and can also be used externally to retrieve the model’s chat | |
| template for better generation tracking.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-rom54w"><li><code>options</code> (<code>Object</code>) — An optional object containing the following properties: | |
| <ul><li><code>chat_template</code> (<code>string</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — A Jinja template or the name of a template to use for this conversion. | |
| It is usually not necessary to pass anything to this argument, | |
| as the model’s template will be used by default.</li> <li><code>tools</code> (<code>Object[]</code>) <em>optional</em> — defaults to <code>null</code> — A list of tools (callable functions) that will be accessible to the model. If the template does not | |
| support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema, | |
| giving the name, description and argument types for the tool. See our | |
| <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use" rel="nofollow">chat templating guide</a> | |
| for more information.</li></ul></li></ul> <p data-svelte-h="svelte-1yeaouc"><strong>Returns:</strong> <code>string</code> — The chat template string.</p> <a id="module_tokenizers.PreTrainedTokenizer.apply_chat_template"></a> <h4 class="relative group"><a id="pretrainedtokenizerapplychattemplateconversation-options" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerapplychattemplateconversation-options"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.apply_chat_template(conversation, [options])</span></h4> <p data-svelte-h="svelte-qfc73q">Converts a list of message objects with <code>"role"</code> and <code>"content"</code> keys to a list of token | |
| ids. This method is intended for use with chat models, and will read the tokenizer’s chat_template attribute to | |
| determine the format and control tokens to use when converting.</p> <p data-svelte-h="svelte-f86ycd">See the <a href="https://huggingface.co/docs/transformers/chat_templating" rel="nofollow">chat templating guide</a> for more information.</p> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-g6dc3z"><li><code>conversation</code> (<a href="./tokenizers#module_tokenizers.Message"><code>Message</code></a>[]) — A list of message objects with <code>"role"</code> and <code>"content"</code> keys, | |
| representing the chat history so far.</li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.ApplyChatTemplateOptions"><code>ApplyChatTemplateOptions</code></a><<code>TTokenize</code>, <code>TReturnTensor</code>, <code>TReturnDict</code>>) <em>optional</em> — Options controlling | |
| template rendering and tokenization.</li></ul> <p data-svelte-h="svelte-1boylzr"><strong>Returns:</strong> <code>ApplyChatTemplateReturn</code><<code>TTokenize</code>, <code>TReturnTensor</code>, <code>TReturnDict</code>> — The tokenized output.</p> <p data-svelte-h="svelte-h5rmtt"><strong>Example:</strong> Applying a chat template to a conversation.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-javascript "><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">"@huggingface/transformers"</span>; | |
| <span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">"Xenova/mistral-tokenizer-v1"</span>); | |
| <span class="hljs-keyword">const</span> chat = [ | |
| { <span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hello, how are you?"</span> }, | |
| { <span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'm doing great. How can I help you today?"</span> }, | |
| { <span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'd like to show off how chat templating works!"</span> }, | |
| ] | |
| <span class="hljs-keyword">const</span> text = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">false</span> }); | |
| <span class="hljs-comment">// "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"</span> | |
| <span class="hljs-keyword">const</span> input_ids = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">true</span>, <span class="hljs-attr">return_tensor</span>: <span class="hljs-literal">false</span> }); | |
| <span class="hljs-comment">// [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="type-definitions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#type-definitions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Type Definitions</span></h2> <a id="module_tokenizers.PretrainedTokenizerOptions"></a> <h3 class="relative group"><a id="pretrainedtokenizeroptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizeroptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PretrainedTokenizerOptions</span></h3> <p data-svelte-h="svelte-2hx2a2"><em>Type:</em> <a href="./utils/hub#module_utils/hub.PretrainedOptions"><code>PretrainedOptions</code></a></p> <a id="module_tokenizers.TextContent"></a> <h3 class="relative group"><a id="textcontent" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#textcontent"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TextContent</span></h3> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-1dnoeta"><li><code>type</code> (<code>'text'</code>) — The type of content (must be ‘text’).</li> <li><code>text</code> (<code>string</code>) — The text content.</li></ul> <a id="module_tokenizers.ImageContent"></a> <h3 class="relative group"><a id="imagecontent" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#imagecontent"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ImageContent</span></h3> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-pt5k9r"><li><p><code>type</code> (<code>'image'</code>) — The type of content (must be ‘image’).</p></li> <li><p><code>image</code> (<code>string</code> | <a href="./utils/image#module_utils/image.RawImage"><code>RawImage</code></a>) <em>optional</em> — Optional URL or instance of the image.</p> <p>Note: This works for SmolVLM. Qwen2VL and Idefics3 have different implementations.</p></li></ul> <a id="module_tokenizers.MessageContent"></a> <h3 class="relative group"><a id="messagecontent" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#messagecontent"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>MessageContent</span></h3> <p data-svelte-h="svelte-1wh75qd">A single content block inside a chat message. Extend the union to add | |
| custom types (e.g. <code>AudioContent</code>) when targeting a specific model.</p> <p data-svelte-h="svelte-czyt9z"><em>Type:</em> <a href="./tokenizers#module_tokenizers.TextContent"><code>TextContent</code></a> | <a href="./tokenizers#module_tokenizers.ImageContent"><code>ImageContent</code></a> | <code>{ type: string & {}, [key: string]: any }</code></p> <a id="module_tokenizers.Message"></a> <h3 class="relative group"><a id="message" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#message"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Message</span></h3> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-1ypufej"><li><code>role</code> (<code>'user'</code> | <code>'assistant'</code> | <code>'system'</code> | <code>(string & {})</code>) — The role of the message.</li> <li><code>content</code> (<code>string</code> | <a href="./tokenizers#module_tokenizers.MessageContent"><code>MessageContent</code></a>[]) — The content of the message. Can be a simple string or an array of content objects.</li></ul> <a id="module_tokenizers.BatchEncoding"></a> <h3 class="relative group"><a id="batchencoding" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#batchencoding"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>BatchEncoding</span></h3> <p data-svelte-h="svelte-p6gi56">The object returned from <code>tokenizer(text)</code>. The fields are a <code>Tensor</code> by | |
| default, or an <code>Array</code> when <code>return_tensor: false</code> is passed.</p> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-1hpqw4d"><li><code>input_ids</code> (<code>any</code>) — Token ids to be fed to the model.</li> <li><code>attention_mask</code> (<code>any</code>) — Mask indicating which tokens should be attended to (1) versus padded (0).</li> <li><code>token_type_ids</code> (<code>any</code>) <em>optional</em> — Segment ids, present only for tokenizers that distinguish sequence A vs B (e.g. BERT).</li></ul> <a id="module_tokenizers.TokenizerCallOptions"></a> <h3 class="relative group"><a id="tokenizercalloptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizercalloptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TokenizerCallOptions</span></h3> <p data-svelte-h="svelte-ez27i7">Options passed to <code>tokenizer(text, options)</code>.</p> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-l9568l"><li><code>text_pair</code> (<code>any</code>) <em>optional</em> — defaults to <code>null</code> — Optional second sequence to be encoded. Must match the shape of <code>text</code> — string when <code>text</code> is a string, array when <code>text</code> is an array.</li> <li><code>padding</code> (<code>boolean</code> | <code>'max_length'</code>) <em>optional</em> — defaults to <code>false</code> — Whether to pad the input sequences.</li> <li><code>add_special_tokens</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>true</code> — Whether or not to add the special tokens associated with the corresponding model.</li> <li><code>truncation</code> (<code>boolean</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Whether to truncate the input sequences.</li> <li><code>max_length</code> (<code>number</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Maximum length of the returned list and optionally padding length.</li> <li><code>return_tensor</code> (<code>any</code>) <em>optional</em> — defaults to <code>true</code> — Whether to return the results as Tensors or arrays.</li> <li><code>return_token_type_ids</code> (<code>boolean</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Whether to return the token type ids.</li></ul> <a id="module_tokenizers.ApplyChatTemplateOptions"></a> <h3 class="relative group"><a id="applychattemplateoptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#applychattemplateoptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ApplyChatTemplateOptions</span></h3> <p data-svelte-h="svelte-drbxoe"><strong>Properties</strong></p> <ul data-svelte-h="svelte-53g97i"><li><code>chat_template</code> (<code>string</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — A Jinja template to use for this conversion. If omitted, the model’s chat template is used.</li> <li><code>tools</code> (<code>Object[]</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — JSON Schema tool definitions exposed to templates that support function calling. | |
| See the <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use" rel="nofollow">chat templating guide</a>.</li> <li><code>documents</code> (<code>Record</code><<code>string</code>, <code>string</code>>[] | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Documents exposed to templates that support retrieval-augmented generation. | |
| See the <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#arguments-for-RAG" rel="nofollow">RAG section</a> of the chat templating guide.</li> <li><code>add_generation_prompt</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether to end the prompt with the token(s) that indicate the start of an assistant message. | |
| The template must support this argument for it to have any effect.</li> <li><code>tokenize</code> (<code>any</code>) <em>optional</em> — defaults to <code>true</code> — Whether to tokenize the output. If false, the output will be a string.</li> <li><code>padding</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether to pad sequences to the maximum length. Has no effect if tokenize is false.</li> <li><code>truncation</code> (<code>boolean</code>) <em>optional</em> — defaults to <code>false</code> — Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.</li> <li><code>max_length</code> (<code>number</code> | <code>null</code>) <em>optional</em> — defaults to <code>null</code> — Maximum length (in tokens) to use for padding or truncation. If omitted, the tokenizer’s <code>max_length</code> is used. | |
| Has no effect if tokenize is false.</li> <li><code>return_tensor</code> (<code>any</code>) <em>optional</em> — defaults to <code>true</code> — Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.</li> <li><code>return_dict</code> (<code>any</code>) <em>optional</em> — defaults to <code>true</code> — Whether to return a dictionary with named outputs. Has no effect if tokenize is false.</li> <li><code>tokenizer_kwargs</code> (<code>Object</code>) <em>optional</em> — defaults to <code>{}</code> — Additional options to pass to the tokenizer.</li></ul> <h2 class="relative group"><a id="callbacks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#callbacks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Callbacks</span></h2> <a id="module_tokenizers.PreTrainedTokenizerCallback"></a> <h3 class="relative group"><a id="pretrainedtokenizercallback" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizercallback"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizerCallback</span></h3> <p data-svelte-h="svelte-11fqvcp"><strong>Parameters</strong></p> <ul data-svelte-h="svelte-1r5rv57"><li><code>text</code> (<code>string[]?</code>)</li> <li><code>options</code> (<a href="./tokenizers#module_tokenizers.TokenizerCallOptions"><code>TokenizerCallOptions</code></a><<code>string[]?</code>, <code>boolean = true</code>>) <em>optional</em></li></ul> <p data-svelte-h="svelte-1m4kvqj"><strong>Returns:</strong> <a href="./tokenizers#module_tokenizers.BatchEncoding"><code>BatchEncoding</code></a><<code>BatchEncodingItem</code><<code>string[]?</code>, <code>boolean = true</code>>></p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers.js/blob/main/packages/transformers/docs/source/api/tokenizers.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1t06csc = { | |
| assets: "/docs/transformers.js/pr_1665/en", | |
| base: "/docs/transformers.js/pr_1665/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers.js/pr_1665/en/_app/immutable/entry/start.cb58eb6f.js"), | |
| import("/docs/transformers.js/pr_1665/en/_app/immutable/entry/app.9eafcf9d.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 14], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 71.5 kB
- Xet hash:
- 5ef53cef30bf57b866c3818ba5a98785e93a0873fb9f1bab5c390ec86bc52f3b
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.