Buckets:

download
raw
88.6 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;tokenizers&quot;,&quot;local&quot;:&quot;tokenizers&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;tokenizers.PreTrainedTokenizer&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)&quot;,&quot;local&quot;:&quot;new-pretrainedtokenizertokenizerjson-tokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.convert_tokens_to_ids(tokens) ⇒ any&quot;,&quot;local&quot;:&quot;pretrainedtokenizerconverttokenstoidstokens--any&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._call(text, [options]) ⇒ BatchEncoding. < BatchEncodingItem. < TText, TReturnTensor > >&quot;,&quot;local&quot;:&quot;pretrainedtokenizercalltext-options--batchencoding--batchencodingitem--ttext-treturntensor--&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._encode_text(text) ⇒ Array | null&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetexttext--array--null&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.tokenize(text, options) ⇒ Array&quot;,&quot;local&quot;:&quot;pretrainedtokenizertokenizetext-options--array&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.encode(text, options) ⇒ Array&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetext-options--array&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ Array&quot;,&quot;local&quot;:&quot;pretrainedtokenizerbatchdecodebatch-decodeargs--array&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ string&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodetokenids-decodeargs--string&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ string&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodesingletokenids-decodeargs--string&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.get_chat_template(options) ⇒ string&quot;,&quot;local&quot;:&quot;pretrainedtokenizergetchattemplateoptions--string&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.apply_chat_template(conversation, [options]) ⇒ ApplyChatTemplateReturn. < TTokenize, TReturnTensor, TReturnDict >&quot;,&quot;local&quot;:&quot;pretrainedtokenizerapplychattemplateconversation-options--applychattemplatereturn--ttokenize-treturntensor-treturndict-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ Promise. < PreTrainedTokenizer >&quot;,&quot;local&quot;:&quot;pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--promise--pretrainedtokenizer-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.loadTokenizer(pretrained_model_name_or_path, options) ⇒ Promise. < Array >&quot;,&quot;local&quot;:&quot;tokenizersloadtokenizerpretrainedmodelnameorpath-options--promise--array-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.prepareTensorForDecode(tensor) ⇒ Array&quot;,&quot;local&quot;:&quot;tokenizerspreparetensorfordecodetensor--array&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers._build_translation_inputs(self, raw_inputs, tokenizer_options, generate_kwargs) ⇒ Object&quot;,&quot;local&quot;:&quot;tokenizersbuildtranslationinputsself-rawinputs-tokenizeroptions-generatekwargs--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PretrainedTokenizerOptions : PretrainedOptions&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizeroptions--pretrainedoptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~TextContent : Object&quot;,&quot;local&quot;:&quot;tokenizerstextcontent--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ImageContent : Object&quot;,&quot;local&quot;:&quot;tokenizersimagecontent--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~MessageContent : TextContent | ImageContent | Object&quot;,&quot;local&quot;:&quot;tokenizersmessagecontent--textcontent--imagecontent--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Message : Object&quot;,&quot;local&quot;:&quot;tokenizersmessage--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BatchEncodingArrayItem : any&quot;,&quot;local&quot;:&quot;tokenizersbatchencodingarrayitem--any&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BatchEncodingItem : any&quot;,&quot;local&quot;:&quot;tokenizersbatchencodingitem--any&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BatchEncoding : Object&quot;,&quot;local&quot;:&quot;tokenizersbatchencoding--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~TokenizerCallOptions : Object&quot;,&quot;local&quot;:&quot;tokenizerstokenizercalloptions--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PreTrainedTokenizerCallback : function&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizercallback--function&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ApplyChatTemplateOptions : Object&quot;,&quot;local&quot;:&quot;tokenizersapplychattemplateoptions--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ApplyChatTemplateReturn : any&quot;,&quot;local&quot;:&quot;tokenizersapplychattemplatereturn--any&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers.js/pr_1649/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/entry/start.5f753a22.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/chunks/scheduler.6efaaf90.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/chunks/singletons.c0807d75.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/chunks/paths.c5938264.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/entry/app.db3dfcb7.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/chunks/preload-helper.1d8e1e5d.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/chunks/index.eb3e1f0f.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/nodes/0.1b727385.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/nodes/14.14ba932f.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.16f13047.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1649/en/_app/immutable/chunks/CodeBlock.72c8dd07.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;tokenizers&quot;,&quot;local&quot;:&quot;tokenizers&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;tokenizers.PreTrainedTokenizer&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)&quot;,&quot;local&quot;:&quot;new-pretrainedtokenizertokenizerjson-tokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.convert_tokens_to_ids(tokens) ⇒ any&quot;,&quot;local&quot;:&quot;pretrainedtokenizerconverttokenstoidstokens--any&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._call(text, [options]) ⇒ BatchEncoding. < BatchEncodingItem. < TText, TReturnTensor > >&quot;,&quot;local&quot;:&quot;pretrainedtokenizercalltext-options--batchencoding--batchencodingitem--ttext-treturntensor--&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._encode_text(text) ⇒ Array | null&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetexttext--array--null&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.tokenize(text, options) ⇒ Array&quot;,&quot;local&quot;:&quot;pretrainedtokenizertokenizetext-options--array&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.encode(text, options) ⇒ Array&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetext-options--array&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ Array&quot;,&quot;local&quot;:&quot;pretrainedtokenizerbatchdecodebatch-decodeargs--array&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ string&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodetokenids-decodeargs--string&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ string&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodesingletokenids-decodeargs--string&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.get_chat_template(options) ⇒ string&quot;,&quot;local&quot;:&quot;pretrainedtokenizergetchattemplateoptions--string&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.apply_chat_template(conversation, [options]) ⇒ ApplyChatTemplateReturn. < TTokenize, TReturnTensor, TReturnDict >&quot;,&quot;local&quot;:&quot;pretrainedtokenizerapplychattemplateconversation-options--applychattemplatereturn--ttokenize-treturntensor-treturndict-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ Promise. < PreTrainedTokenizer >&quot;,&quot;local&quot;:&quot;pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--promise--pretrainedtokenizer-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.loadTokenizer(pretrained_model_name_or_path, options) ⇒ Promise. < Array >&quot;,&quot;local&quot;:&quot;tokenizersloadtokenizerpretrainedmodelnameorpath-options--promise--array-&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.prepareTensorForDecode(tensor) ⇒ Array&quot;,&quot;local&quot;:&quot;tokenizerspreparetensorfordecodetensor--array&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers._build_translation_inputs(self, raw_inputs, tokenizer_options, generate_kwargs) ⇒ Object&quot;,&quot;local&quot;:&quot;tokenizersbuildtranslationinputsself-rawinputs-tokenizeroptions-generatekwargs--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PretrainedTokenizerOptions : PretrainedOptions&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizeroptions--pretrainedoptions&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~TextContent : Object&quot;,&quot;local&quot;:&quot;tokenizerstextcontent--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ImageContent : Object&quot;,&quot;local&quot;:&quot;tokenizersimagecontent--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~MessageContent : TextContent | ImageContent | Object&quot;,&quot;local&quot;:&quot;tokenizersmessagecontent--textcontent--imagecontent--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Message : Object&quot;,&quot;local&quot;:&quot;tokenizersmessage--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BatchEncodingArrayItem : any&quot;,&quot;local&quot;:&quot;tokenizersbatchencodingarrayitem--any&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BatchEncodingItem : any&quot;,&quot;local&quot;:&quot;tokenizersbatchencodingitem--any&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BatchEncoding : Object&quot;,&quot;local&quot;:&quot;tokenizersbatchencoding--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~TokenizerCallOptions : Object&quot;,&quot;local&quot;:&quot;tokenizerstokenizercalloptions--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PreTrainedTokenizerCallback : function&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizercallback--function&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ApplyChatTemplateOptions : Object&quot;,&quot;local&quot;:&quot;tokenizersapplychattemplateoptions--object&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ApplyChatTemplateReturn : any&quot;,&quot;local&quot;:&quot;tokenizersapplychattemplatereturn--any&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <a id="module_tokenizers" class="group"></a> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="tokenizers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers</span></h1> <p data-svelte-h="svelte-vqyuk9">Tokenization utilities</p> <ul data-svelte-h="svelte-1y8hapg"><li><a href="#module_tokenizers">tokenizers</a> <ul><li><em>static</em> <ul><li><a href="#module_tokenizers.PreTrainedTokenizer">.PreTrainedTokenizer</a> <ul><li><a href="#new_module_tokenizers.PreTrainedTokenizer_new"><code>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><em>instance</em> <ul><li><a href="#module_tokenizers.PreTrainedTokenizer+convert_tokens_to_ids"><code>.convert_tokens_to_ids(tokens)</code></a><code>any</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_call"><code>._call(text, [options])</code></a><code>BatchEncoding.&lt;BatchEncodingItem.&lt;TText, TReturnTensor&gt;&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_encode_text"><code>._encode_text(text)</code></a><code>Array</code> | <code>null</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+tokenize"><code>.tokenize(text, options)</code></a><code>Array</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+encode"><code>.encode(text, options)</code></a><code>Array</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+batch_decode"><code>.batch_decode(batch, decode_args)</code></a><code>Array</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode"><code>.decode(token_ids, [decode_args])</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode_single"><code>.decode_single(token_ids, decode_args)</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+get_chat_template"><code>.get_chat_template(options)</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+apply_chat_template"><code>.apply_chat_template(conversation, [options])</code></a><code>ApplyChatTemplateReturn.&lt;TTokenize, TReturnTensor, TReturnDict&gt;</code></li></ul></li> <li><em>static</em> <ul><li><a href="#module_tokenizers.PreTrainedTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a><code>Promise.&lt;PreTrainedTokenizer&gt;</code></li></ul></li></ul></li> <li><a href="#module_tokenizers.loadTokenizer"><code>.loadTokenizer(pretrained_model_name_or_path, options)</code></a><code>Promise.&lt;Array&gt;</code></li> <li><a href="#module_tokenizers.prepareTensorForDecode"><code>.prepareTensorForDecode(tensor)</code></a><code>Array</code></li> <li><a href="#module_tokenizers._build_translation_inputs"><code>._build_translation_inputs(self, raw_inputs, tokenizer_options, generate_kwargs)</code></a><code>Object</code></li></ul></li> <li><em>inner</em> <ul><li><a href="#module_tokenizers..PretrainedTokenizerOptions"><code>~PretrainedTokenizerOptions</code></a> : <a href="#PretrainedOptions"><code>PretrainedOptions</code></a></li> <li><a href="#module_tokenizers..TextContent"><code>~TextContent</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..ImageContent"><code>~ImageContent</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..MessageContent"><code>~MessageContent</code></a> : <code>TextContent</code> | <code>ImageContent</code> | <code>Object</code></li> <li><a href="#module_tokenizers..Message"><code>~Message</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..BatchEncodingArrayItem"><code>~BatchEncodingArrayItem</code></a> : <code>any</code></li> <li><a href="#module_tokenizers..BatchEncodingItem"><code>~BatchEncodingItem</code></a> : <code>any</code></li> <li><a href="#module_tokenizers..BatchEncoding"><code>~BatchEncoding</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..TokenizerCallOptions"><code>~TokenizerCallOptions</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..PreTrainedTokenizerCallback"><code>~PreTrainedTokenizerCallback</code></a> : <code>function</code></li> <li><a href="#module_tokenizers..ApplyChatTemplateOptions"><code>~ApplyChatTemplateOptions</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..ApplyChatTemplateReturn"><code>~ApplyChatTemplateReturn</code></a> : <code>any</code></li></ul></li></ul></li></ul> <hr> <a id="module_tokenizers.PreTrainedTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerspretrainedtokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretrainedtokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.PreTrainedTokenizer</span></h2> <p data-svelte-h="svelte-wbz8zs"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-hhk6jf"><li><a href="#module_tokenizers.PreTrainedTokenizer">.PreTrainedTokenizer</a> <ul><li><a href="#new_module_tokenizers.PreTrainedTokenizer_new"><code>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><em>instance</em> <ul><li><a href="#module_tokenizers.PreTrainedTokenizer+convert_tokens_to_ids"><code>.convert_tokens_to_ids(tokens)</code></a><code>any</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_call"><code>._call(text, [options])</code></a><code>BatchEncoding.&lt;BatchEncodingItem.&lt;TText, TReturnTensor&gt;&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_encode_text"><code>._encode_text(text)</code></a><code>Array</code> | <code>null</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+tokenize"><code>.tokenize(text, options)</code></a><code>Array</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+encode"><code>.encode(text, options)</code></a><code>Array</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+batch_decode"><code>.batch_decode(batch, decode_args)</code></a><code>Array</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode"><code>.decode(token_ids, [decode_args])</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode_single"><code>.decode_single(token_ids, decode_args)</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+get_chat_template"><code>.get_chat_template(options)</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+apply_chat_template"><code>.apply_chat_template(conversation, [options])</code></a><code>ApplyChatTemplateReturn.&lt;TTokenize, TReturnTensor, TReturnDict&gt;</code></li></ul></li> <li><em>static</em> <ul><li><a href="#module_tokenizers.PreTrainedTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a><code>Promise.&lt;PreTrainedTokenizer&gt;</code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers.PreTrainedTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-pretrainedtokenizertokenizerjson-tokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-pretrainedtokenizertokenizerjson-tokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</span></h3> <p data-svelte-h="svelte-1vkx5qp">Create a new PreTrainedTokenizer instance.</p> <table data-svelte-h="svelte-19pzyzr"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokenizerJSON</td><td><code>Object</code></td><td><p>The JSON of the tokenizer.</p></td> </tr><tr><td>tokenizerConfig</td><td><code>Object</code></td><td><p>The config of the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+convert_tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerconverttokenstoidstokens--any" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerconverttokenstoidstokens--any"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.convert_tokens_to_ids(tokens) ⇒ any</span></h3> <p data-svelte-h="svelte-ae0mrq">Converts a token string (or a sequence of tokens) into a single integer id (or a sequence of ids), using the vocabulary.</p> <p data-svelte-h="svelte-1h9ba18"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>any</code> - The token id or list of token ids.</p> <table data-svelte-h="svelte-12w5a1i"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>T</code></td><td><p>One or several token(s) to convert to token id(s).</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+_call" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizercalltext-options--batchencoding--batchencodingitem--ttext-treturntensor--" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizercalltext-options--batchencoding--batchencodingitem--ttext-treturntensor--"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer._call(text, [options]) ⇒ BatchEncoding. &lt; BatchEncodingItem. &lt; TText, TReturnTensor > ></span></h3> <p data-svelte-h="svelte-1il2i3r">Encode/tokenize the given text(s).</p> <p data-svelte-h="svelte-15ngd0t"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>BatchEncoding.&lt;BatchEncodingItem.&lt;TText, TReturnTensor&gt;&gt;</code> - Object to be passed to the model.</p> <table data-svelte-h="svelte-lfis8h"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>TText</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>TokenizerCallOptions.&lt;TText, TReturnTensor&gt;</code></td><td><p>Additional tokenization options.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+_encode_text" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerencodetexttext--array--null" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerencodetexttext--array--null"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer._encode_text(text) ⇒ Array | null</span></h3> <p data-svelte-h="svelte-hojn9c">Encodes a single text using the preprocessor pipeline of the tokenizer.</p> <p data-svelte-h="svelte-31ewsq"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array</code> | <code>null</code> - The encoded tokens.</p> <table data-svelte-h="svelte-x8hb9q"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>null</code></td><td><p>The text to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+tokenize" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizertokenizetext-options--array" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertokenizetext-options--array"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.tokenize(text, options) ⇒ Array</span></h3> <p data-svelte-h="svelte-1tkmx0p">Converts a string into a sequence of tokens.</p> <p data-svelte-h="svelte-135oaqt"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array</code> - The list of tokens.</p> <table data-svelte-h="svelte-1d3xkw7"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td></td><td><p>The sequence to be encoded.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.pair]</td><td><code>string</code> | <code>null</code></td><td></td><td><p>A second sequence to be encoded with the first.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+encode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerencodetext-options--array" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerencodetext-options--array"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.encode(text, options) ⇒ Array</span></h3> <p data-svelte-h="svelte-sj7zk5">Encodes a single text or a pair of texts using the model’s tokenizer.</p> <p data-svelte-h="svelte-18twv8x"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array</code> - An array of token IDs representing the encoded text(s).</p> <table data-svelte-h="svelte-goy4m0"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td></td><td><p>The text to encode.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.text_pair]</td><td><code>string</code> | <code>null</code></td><td><code>null</code></td><td><p>The optional second text to encode.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td> </tr><tr><td>[options.return_token_type_ids]</td><td><code>boolean</code> | <code>null</code></td><td><code></code></td><td><p>Whether to return token_type_ids.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+batch_decode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerbatchdecodebatch-decodeargs--array" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerbatchdecodebatch-decodeargs--array"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ Array</span></h3> <p data-svelte-h="svelte-y8f29l">Decode a batch of tokenized sequences.</p> <p data-svelte-h="svelte-1knyj56"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array</code> - List of decoded sequences.</p> <table data-svelte-h="svelte-1t0t4nv"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>batch</td><td><code>Array</code> | <code><a href="#Tensor">Tensor</a></code></td><td><p>List/Tensor of tokenized input sequences.</p></td> </tr><tr><td>decode_args</td><td><code>Object</code></td><td><p>(Optional) Object with decoding arguments.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+decode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerdecodetokenids-decodeargs--string" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodetokenids-decodeargs--string"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ string</span></h3> <p data-svelte-h="svelte-168jmgj">Decodes a sequence of token IDs back to a string.</p> <p data-svelte-h="svelte-1ferp3f"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1mplxfj"><li><code>Error</code> If `token_ids` is not a non-empty array of integers.</li></ul> <table data-svelte-h="svelte-1ivdgmm"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>token_ids</td><td><code>Array</code> | <code>Array</code> | <code><a href="#Tensor">Tensor</a></code></td><td></td><td><p>List/Tensor of token IDs to decode.</p></td> </tr><tr><td>[decode_args]</td><td><code>Object</code></td><td><code>{}</code></td><td></td> </tr><tr><td>[decode_args.skip_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>If true, special tokens are removed from the output string.</p></td> </tr><tr><td>[decode_args.clean_up_tokenization_spaces]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>If true, spaces before punctuations and abbreviated forms are removed.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+decode_single" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerdecodesingletokenids-decodeargs--string" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodesingletokenids-decodeargs--string"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ string</span></h3> <p data-svelte-h="svelte-19e6tws">Decode a single list of token ids to a string.</p> <p data-svelte-h="svelte-7zxcdh"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string</p> <table data-svelte-h="svelte-1fh0s23"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>token_ids</td><td><code>Array</code> | <code>Array</code></td><td></td><td><p>List of token ids to decode</p></td> </tr><tr><td>decode_args</td><td><code>Object</code></td><td></td><td><p>Optional arguments for decoding</p></td> </tr><tr><td>[decode_args.skip_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to skip special tokens during decoding</p></td> </tr><tr><td>[decode_args.clean_up_tokenization_spaces]</td><td><code>boolean</code> | <code>null</code></td><td><code></code></td><td><p>Whether to clean up tokenization spaces during decoding.
If null, the value is set to <code>this.decoder.cleanup</code> if it exists, falling back to <code>this.clean_up_tokenization_spaces</code> if it exists, falling back to <code>true</code>.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+get_chat_template" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizergetchattemplateoptions--string" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizergetchattemplateoptions--string"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.get_chat_template(options) ⇒ string</span></h3> <p data-svelte-h="svelte-1hrpjri">Retrieve the chat template string used for tokenizing chat messages. This template is used
internally by the <code>apply_chat_template</code> method and can also be used externally to retrieve the model’s chat
template for better generation tracking.</p> <p data-svelte-h="svelte-1xojn6p"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The chat template string.</p> <table data-svelte-h="svelte-1n3kol4"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.chat_template]</td><td><code>string</code> | <code>null</code></td><td><code>null</code></td><td><p>A Jinja template or the name of a template to use for this conversion.
It is usually not necessary to pass anything to this argument,
as the model&#39;s template will be used by default.</p></td> </tr><tr><td>[options.tools]</td><td><code>Array</code></td><td><code></code></td><td><p>A list of tools (callable functions) that will be accessible to the model. If the template does not
support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
giving the name, description and argument types for the tool. See our
<a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use">chat templating guide</a>
for more information.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+apply_chat_template" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerapplychattemplateconversation-options--applychattemplatereturn--ttokenize-treturntensor-treturndict-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerapplychattemplateconversation-options--applychattemplatereturn--ttokenize-treturntensor-treturndict-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.apply_chat_template(conversation, [options]) ⇒ ApplyChatTemplateReturn. &lt; TTokenize, TReturnTensor, TReturnDict ></span></h3> <p data-svelte-h="svelte-qfc73q">Converts a list of message objects with <code>&quot;role&quot;</code> and <code>&quot;content&quot;</code> keys to a list of token
ids. This method is intended for use with chat models, and will read the tokenizer’s chat_template attribute to
determine the format and control tokens to use when converting.</p> <p data-svelte-h="svelte-aagj55">See <a href="https://huggingface.co/docs/transformers/chat_templating" rel="nofollow">here</a> for more information.</p> <p data-svelte-h="svelte-h5rmtt"><strong>Example:</strong> Applying a chat template to a conversation.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/transformers&quot;</span>;
<span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">&quot;Xenova/mistral-tokenizer-v1&quot;</span>);
<span class="hljs-keyword">const</span> chat = [
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Hello, how are you?&quot;</span> },
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;m doing great. How can I help you today?&quot;</span> },
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;d like to show off how chat templating works!&quot;</span> },
]
<span class="hljs-keyword">const</span> text = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">false</span> });
<span class="hljs-comment">// &quot;&lt;s&gt;[INST] Hello, how are you? [/INST]I&#x27;m doing great. How can I help you today?&lt;/s&gt; [INST] I&#x27;d like to show off how chat templating works! [/INST]&quot;</span>
<span class="hljs-keyword">const</span> input_ids = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">true</span>, <span class="hljs-attr">return_tensor</span>: <span class="hljs-literal">false</span> });
<span class="hljs-comment">// [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1j4fqaf"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>ApplyChatTemplateReturn.&lt;TTokenize, TReturnTensor, TReturnDict&gt;</code> - The tokenized output.</p> <table data-svelte-h="svelte-f5b404"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>conversation</td><td><code>Array</code></td><td></td><td><p>A list of message objects with <code>&quot;role&quot;</code> and <code>&quot;content&quot;</code> keys,
representing the chat history so far.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.chat_template]</td><td><code>string</code> | <code>null</code></td><td><code>null</code></td><td><p>A Jinja template to use for this conversion. If
this is not passed, the model&#39;s chat template will be used instead.</p></td> </tr><tr><td>[options.tools]</td><td><code>Array</code></td><td><code></code></td><td><p>A list of tools (callable functions) that will be accessible to the model. If the template does not
support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
giving the name, description and argument types for the tool. See our
<a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use">chat templating guide</a>
for more information.</p></td> </tr><tr><td>[options.documents]</td><td><code>Array.&lt;Record&gt;</code></td><td><code></code></td><td><p>A list of dicts representing documents that will be accessible to the model if it is performing RAG
(retrieval-augmented generation). If the template does not support RAG, this argument will have no
effect. We recommend that each document should be a dict containing &quot;title&quot; and &quot;text&quot; keys. Please
see the RAG section of the <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#arguments-for-RAG">chat templating guide</a>
for examples of passing documents with chat templates.</p></td> </tr><tr><td>[options.add_generation_prompt]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to end the prompt with the token(s) that indicate
the start of an assistant message. This is useful when you want to generate a response from the model.
Note that this argument will be passed to the chat template, and so it must be supported in the
template for this argument to have any effect.</p></td> </tr><tr><td>[options.tokenize]</td><td><code>TTokenize</code></td><td><code>true</code></td><td><p>Whether to tokenize the output. If false, the output will be a string.</p></td> </tr><tr><td>[options.padding]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to pad sequences to the maximum length. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.truncation]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.max_length]</td><td><code>number</code> | <code>null</code></td><td><code></code></td><td><p>Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
If not specified, the tokenizer&#39;s <code>max_length</code> attribute will be used as a default.</p></td> </tr><tr><td>[options.return_tensor]</td><td><code>TReturnTensor</code></td><td><code>true</code></td><td><p>Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.return_dict]</td><td><code>TReturnDict</code></td><td><code>true</code></td><td><p>Whether to return a dictionary with named outputs. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.tokenizer_kwargs]</td><td><code>Object</code></td><td><code>{}</code></td><td><p>Additional options to pass to the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer.from_pretrained" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--promise--pretrainedtokenizer-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--promise--pretrainedtokenizer-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ Promise. &lt; PreTrainedTokenizer ></span></h3> <p data-svelte-h="svelte-1fjlsit">Loads a pre-trained tokenizer from the given <code>pretrained_model_name_or_path</code>.</p> <p data-svelte-h="svelte-3dl2f7"><strong>Kind</strong>: static method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Promise.&lt;PreTrainedTokenizer&gt;</code> - A new instance of the <code>PreTrainedTokenizer</code> class.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1tvsnc6"><li><code>Error</code> Throws an error if the tokenizer.json or tokenizer_config.json files are not found in the `pretrained_model_name_or_path`.</li></ul> <table data-svelte-h="svelte-i35q8n"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>pretrained_model_name_or_path</td><td><code>string</code></td><td><p>The path to the pre-trained tokenizer.</p></td> </tr><tr><td>options</td><td><code>PretrainedTokenizerOptions</code></td><td><p>Additional options for loading the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.loadTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersloadtokenizerpretrainedmodelnameorpath-options--promise--array-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersloadtokenizerpretrainedmodelnameorpath-options--promise--array-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.loadTokenizer(pretrained_model_name_or_path, options) ⇒ Promise. &lt; Array ></span></h2> <p data-svelte-h="svelte-reckhh">Loads a tokenizer from the specified path.</p> <p data-svelte-h="svelte-sh5wov"><strong>Kind</strong>: static method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Promise.&lt;Array&gt;</code> - A promise that resolves with information about the loaded tokenizer.</p> <table data-svelte-h="svelte-1p1jwnz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>pretrained_model_name_or_path</td><td><code>string</code></td><td><p>The path to the tokenizer directory.</p></td> </tr><tr><td>options</td><td><code>PretrainedTokenizerOptions</code></td><td><p>Additional options for loading the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.prepareTensorForDecode" class="group"></a> <h2 class="relative group"><a id="tokenizerspreparetensorfordecodetensor--array" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspreparetensorfordecodetensor--array"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.prepareTensorForDecode(tensor) ⇒ Array</span></h2> <p data-svelte-h="svelte-1sig5im">Helper function to convert a tensor to a list before decoding.</p> <p data-svelte-h="svelte-1lah7ff"><strong>Kind</strong>: static method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Array</code> - The tensor as a list.</p> <table data-svelte-h="svelte-1kahhga"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tensor</td><td><code><a href="#Tensor">Tensor</a></code></td><td><p>The tensor to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers._build_translation_inputs" class="group"></a> <h2 class="relative group"><a id="tokenizersbuildtranslationinputsself-rawinputs-tokenizeroptions-generatekwargs--object" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbuildtranslationinputsself-rawinputs-tokenizeroptions-generatekwargs--object"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers._build_translation_inputs(self, raw_inputs, tokenizer_options, generate_kwargs) ⇒ Object</span></h2> <p data-svelte-h="svelte-it4ekd">Helper function to build translation inputs for an <code>NllbTokenizer</code> or <code>M2M100Tokenizer</code>.</p> <p data-svelte-h="svelte-1s414fo"><strong>Kind</strong>: static method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Object</code> - Object to be passed to the model.</p> <table data-svelte-h="svelte-juufv8"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>self</td><td><code>PreTrainedTokenizer</code></td><td><p>The tokenizer instance.</p></td> </tr><tr><td>raw_inputs</td><td><code>string</code> | <code>Array</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>tokenizer_options</td><td><code>Object</code></td><td><p>Options to be sent to the tokenizer</p></td> </tr><tr><td>generate_kwargs</td><td><code>Object</code></td><td><p>Generation options.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PretrainedTokenizerOptions" class="group"></a> <h2 class="relative group"><a id="tokenizerspretrainedtokenizeroptions--pretrainedoptions" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretrainedtokenizeroptions--pretrainedoptions"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PretrainedTokenizerOptions : PretrainedOptions</span></h2> <p data-svelte-h="svelte-ec8jqd"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..TextContent" class="group"></a> <h2 class="relative group"><a id="tokenizerstextcontent--object" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerstextcontent--object"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~TextContent : Object</span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-vfz6hc"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>type</td><td><code>&#39;text&#39;</code></td><td><p>The type of content (must be &#39;text&#39;).</p></td> </tr><tr><td>text</td><td><code>string</code></td><td><p>The text content.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ImageContent" class="group"></a> <h2 class="relative group"><a id="tokenizersimagecontent--object" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersimagecontent--object"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ImageContent : Object</span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-1q9bdqo"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>type</td><td><code>&#39;image&#39;</code></td><td><p>The type of content (must be &#39;image&#39;).</p></td> </tr><tr><td>[image]</td><td><code>string</code> | <code><a href="#RawImage">RawImage</a></code></td><td><p>Optional URL or instance of the image.</p> <p>Note: This works for SmolVLM. Qwen2VL and Idefics3 have different implementations.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..MessageContent" class="group"></a> <h2 class="relative group"><a id="tokenizersmessagecontent--textcontent--imagecontent--object" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmessagecontent--textcontent--imagecontent--object"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~MessageContent : TextContent | ImageContent | Object</span></h2> <p data-svelte-h="svelte-19aftr9">Base type for message content. This is a discriminated union that can be extended with additional content types.
Example: <code>@typedef {TextContent | ImageContent | AudioContent} MessageContent</code></p> <p data-svelte-h="svelte-ec8jqd"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..Message" class="group"></a> <h2 class="relative group"><a id="tokenizersmessage--object" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmessage--object"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Message : Object</span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-1lb5nmi"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>role</td><td><code>&#39;user&#39;</code> | <code>&#39;assistant&#39;</code> | <code>&#39;system&#39;</code> | <code>string</code></td><td><p>The role of the message.</p></td> </tr><tr><td>content</td><td><code>string</code> | <code>Array</code></td><td><p>The content of the message. Can be a simple string or an array of content objects.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BatchEncodingArrayItem" class="group"></a> <h2 class="relative group"><a id="tokenizersbatchencodingarrayitem--any" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbatchencodingarrayitem--any"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BatchEncodingArrayItem : any</span></h2> <p data-svelte-h="svelte-ec8jqd"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..BatchEncodingItem" class="group"></a> <h2 class="relative group"><a id="tokenizersbatchencodingitem--any" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbatchencodingitem--any"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BatchEncodingItem : any</span></h2> <p data-svelte-h="svelte-ec8jqd"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..BatchEncoding" class="group"></a> <h2 class="relative group"><a id="tokenizersbatchencoding--object" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbatchencoding--object"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BatchEncoding : Object</span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-1heme48"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>input_ids</td><td><code>TItem</code></td><td><p>List of token ids to be fed to a model.</p></td> </tr><tr><td>attention_mask</td><td><code>TItem</code></td><td><p>List of indices specifying which tokens should be attended to by the model.</p></td> </tr><tr><td>[token_type_ids]</td><td><code>TItem</code></td><td><p>List of token type ids to be fed to a model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..TokenizerCallOptions" class="group"></a> <h2 class="relative group"><a id="tokenizerstokenizercalloptions--object" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerstokenizercalloptions--object"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~TokenizerCallOptions : Object</span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-12injka"><thead><tr><th>Name</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>[text_pair]</td><td><code>any</code></td><td><code></code></td><td><p>Optional second sequence to be encoded. If set, must be the same type as text.</p></td> </tr><tr><td>[padding]</td><td><code>boolean</code> | <code>&#39;max_length&#39;</code></td><td><code>false</code></td><td><p>Whether to pad the input sequences.</p></td> </tr><tr><td>[add_special_tokens]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td> </tr><tr><td>[truncation]</td><td><code>boolean</code> | <code>null</code></td><td><code></code></td><td><p>Whether to truncate the input sequences.</p></td> </tr><tr><td>[max_length]</td><td><code>number</code> | <code>null</code></td><td><code></code></td><td><p>Maximum length of the returned list and optionally padding length.</p></td> </tr><tr><td>[return_tensor]</td><td><code>TReturnTensor</code></td><td><code>true</code></td><td><p>Whether to return the results as Tensors or arrays.</p></td> </tr><tr><td>[return_token_type_ids]</td><td><code>boolean</code> | <code>null</code></td><td><code></code></td><td><p>Whether to return the token type ids.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTrainedTokenizerCallback" class="group"></a> <h2 class="relative group"><a id="tokenizerspretrainedtokenizercallback--function" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretrainedtokenizercallback--function"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PreTrainedTokenizerCallback : function</span></h2> <p data-svelte-h="svelte-ec8jqd"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..ApplyChatTemplateOptions" class="group"></a> <h2 class="relative group"><a id="tokenizersapplychattemplateoptions--object" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersapplychattemplateoptions--object"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ApplyChatTemplateOptions : Object</span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-1q26l82"><thead><tr><th>Name</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>[chat_template]</td><td><code>string</code> | <code>null</code></td><td><code>null</code></td><td><p>A Jinja template to use for this conversion.</p></td> </tr><tr><td>[tools]</td><td><code>Array</code> | <code>null</code></td><td><code></code></td><td><p>A list of tools (callable functions) that will be accessible to the model.</p></td> </tr><tr><td>[documents]</td><td><code>Array.&lt;Record&gt;</code> | <code>null</code></td><td><code></code></td><td><p>Documents that will be accessible to the model.</p></td> </tr><tr><td>[add_generation_prompt]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to end the prompt with the token(s) that indicate the start of an assistant message.</p></td> </tr><tr><td>[tokenize]</td><td><code>TTokenize</code></td><td><code>true</code></td><td><p>Whether to tokenize the output. If false, the output will be a string.</p></td> </tr><tr><td>[padding]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to pad sequences to the maximum length. Has no effect if tokenize is false.</p></td> </tr><tr><td>[truncation]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.</p></td> </tr><tr><td>[max_length]</td><td><code>number</code> | <code>null</code></td><td><code></code></td><td><p>Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.</p></td> </tr><tr><td>[return_tensor]</td><td><code>TReturnTensor</code></td><td><code>true</code></td><td><p>Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.</p></td> </tr><tr><td>[return_dict]</td><td><code>TReturnDict</code></td><td><code>true</code></td><td><p>Whether to return a dictionary with named outputs. Has no effect if tokenize is false.</p></td> </tr><tr><td>[tokenizer_kwargs]</td><td><code>Object</code></td><td><code>{}</code></td><td><p>Additional options to pass to the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ApplyChatTemplateReturn" class="group"></a> <h2 class="relative group"><a id="tokenizersapplychattemplatereturn--any" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersapplychattemplatereturn--any"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ApplyChatTemplateReturn : any</span></h2> <p data-svelte-h="svelte-ec8jqd"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers.js/blob/main/packages/transformers/docs/source/api/tokenizers.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_rvrl7f = {
assets: "/docs/transformers.js/pr_1649/en",
base: "/docs/transformers.js/pr_1649/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers.js/pr_1649/en/_app/immutable/entry/start.5f753a22.js"),
import("/docs/transformers.js/pr_1649/en/_app/immutable/entry/app.db3dfcb7.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 14],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
88.6 kB
·
Xet hash:
eedc89493655516ebccfff7eade077b4846c83c4fbc028a0f2bd3478f5dce544

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.