Buckets:

rtrm's picture
download
raw
487 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;tokenizers&quot;,&quot;local&quot;:&quot;tokenizers&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;tokenizers.TokenizerModel ⇐ <code> Callable </code>&quot;,&quot;local&quot;:&quot;tokenizerstokenizermodel--code-callable-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new TokenizerModel(config)&quot;,&quot;local&quot;:&quot;new-tokenizermodelconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.vocab : <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelvocab--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.tokens_to_ids : <code> Map. < string, number > </code>&quot;,&quot;local&quot;:&quot;tokenizermodeltokenstoids--code-map--string-number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.fuse_unk : <code> boolean </code>&quot;,&quot;local&quot;:&quot;tokenizermodelfuseunk--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel._call(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelcalltokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.encode(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelencodetokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.convert_tokens_to_ids(tokens) ⇒ <code> Array. < number > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelconverttokenstoidstokens--code-array--number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.convert_ids_to_tokens(ids) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelconvertidstotokensids--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;TokenizerModel.fromConfig(config, ...args) ⇒ <code> TokenizerModel </code>&quot;,&quot;local&quot;:&quot;tokenizermodelfromconfigconfig-args--code-tokenizermodel-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.PreTrainedTokenizer&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)&quot;,&quot;local&quot;:&quot;new-pretrainedtokenizertokenizerjson-tokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.added_tokens : <code> Array. < AddedToken > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizeraddedtokens--code-array--addedtoken--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.remove_space : <code> boolean </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerremovespace--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._call(text, options) ⇒ <code> BatchEncoding </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizercalltext-options--code-batchencoding-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._encode_text(text) ⇒ <code> Array < string > </code> | <code> null </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetexttext--code-array--string--code--code-null-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._tokenize_helper(text, options) ⇒ <code> * </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizertokenizehelpertext-options--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.tokenize(text, options) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizertokenizetext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.encode(text, options) ⇒ <code> Array. < number > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetext-options--code-array--number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerbatchdecodebatch-decodeargs--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodetokenids-decodeargs--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodesingletokenids-decodeargs--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.get_chat_template(options) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizergetchattemplateoptions--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.apply_chat_template(conversation, options) ⇒ <code> string </code> | <code> Tensor </code> | <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> BatchEncoding </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerapplychattemplateconversation-options--code-string-code--code-tensor-code--code-array--number--code--code-array--array--number---code--code-batchencoding-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.BertTokenizer ⇐ <code> PreTrainedTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersberttokenizer--code-pretrainedtokenizer-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.AlbertTokenizer ⇐ <code> PreTrainedTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersalberttokenizer--code-pretrainedtokenizer-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.NllbTokenizer&quot;,&quot;local&quot;:&quot;tokenizersnllbtokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;nllbTokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code>&quot;,&quot;local&quot;:&quot;nllbtokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.M2M100Tokenizer&quot;,&quot;local&quot;:&quot;tokenizersm2m100tokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;m2M100Tokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code>&quot;,&quot;local&quot;:&quot;m2m100tokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.WhisperTokenizer ⇐ <code> PreTrainedTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerswhispertokenizer--code-pretrainedtokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;whisperTokenizer._decode_asr(sequences, options) ⇒ <code> * </code>&quot;,&quot;local&quot;:&quot;whispertokenizerdecodeasrsequences-options--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;whisperTokenizer.decode() : <code> * </code>&quot;,&quot;local&quot;:&quot;whispertokenizerdecode--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.MarianTokenizer&quot;,&quot;local&quot;:&quot;tokenizersmariantokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new MarianTokenizer(tokenizerJSON, tokenizerConfig)&quot;,&quot;local&quot;:&quot;new-mariantokenizertokenizerjson-tokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;marianTokenizer._encode_text(text) ⇒ <code> Array </code>&quot;,&quot;local&quot;:&quot;mariantokenizerencodetexttext--code-array-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.AutoTokenizer&quot;,&quot;local&quot;:&quot;tokenizersautotokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new AutoTokenizer()&quot;,&quot;local&quot;:&quot;new-autotokenizer&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code>&quot;,&quot;local&quot;:&quot;autotokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.is_chinese_char(cp) ⇒ <code> boolean </code>&quot;,&quot;local&quot;:&quot;tokenizersischinesecharcp--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~AddedToken&quot;,&quot;local&quot;:&quot;tokenizersaddedtoken&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new AddedToken(config)&quot;,&quot;local&quot;:&quot;new-addedtokenconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~WordPieceTokenizer ⇐ <code> TokenizerModel </code>&quot;,&quot;local&quot;:&quot;tokenizerswordpiecetokenizer--code-tokenizermodel-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new WordPieceTokenizer(config)&quot;,&quot;local&quot;:&quot;new-wordpiecetokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.tokens_to_ids : <code> Map. < string, number > </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizertokenstoids--code-map--string-number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.unk_token_id : <code> number </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizerunktokenid--code-number-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.unk_token : <code> string </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizerunktoken--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.max_input_chars_per_word : <code> number </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizermaxinputcharsperword--code-number-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.vocab : <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizervocab--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.encode(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizerencodetokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Unigram ⇐ <code> TokenizerModel </code>&quot;,&quot;local&quot;:&quot;tokenizersunigram--code-tokenizermodel-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new Unigram(config, moreConfig)&quot;,&quot;local&quot;:&quot;new-unigramconfig-moreconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;unigram.populateNodes(lattice)&quot;,&quot;local&quot;:&quot;unigrampopulatenodeslattice&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;unigram.tokenize(normalized) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;unigramtokenizenormalized--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;unigram.encode(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;unigramencodetokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BPE ⇐ <code> TokenizerModel </code>&quot;,&quot;local&quot;:&quot;tokenizersbpe--code-tokenizermodel-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new BPE(config)&quot;,&quot;local&quot;:&quot;new-bpeconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.tokens_to_ids : <code> Map. < string, number > </code>&quot;,&quot;local&quot;:&quot;bpetokenstoids--code-map--string-number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.merges : <code> * </code>&quot;,&quot;local&quot;:&quot;bpemerges--code--code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;merges.config.merges : <code> * </code>&quot;,&quot;local&quot;:&quot;mergesconfigmerges--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.cache : <code> Map. < string, Array < string > > </code>&quot;,&quot;local&quot;:&quot;bpecache--code-map--string-array--string---code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.bpe(token) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;bpebpetoken--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.encode(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;bpeencodetokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~LegacyTokenizerModel&quot;,&quot;local&quot;:&quot;tokenizerslegacytokenizermodel&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new LegacyTokenizerModel(config, moreConfig)&quot;,&quot;local&quot;:&quot;new-legacytokenizermodelconfig-moreconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;legacyTokenizerModel.tokens_to_ids : <code> Map. < string, number > </code>&quot;,&quot;local&quot;:&quot;legacytokenizermodeltokenstoids--code-map--string-number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Normalizer&quot;,&quot;local&quot;:&quot;tokenizersnormalizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new Normalizer(config)&quot;,&quot;local&quot;:&quot;new-normalizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;normalizer.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;normalizernormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;normalizer._call(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;normalizercalltext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Normalizer.fromConfig(config) ⇒ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;normalizerfromconfigconfig--code-normalizer-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Replace ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersreplace--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;replace.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;replacenormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~NFC ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersnfc--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;nfC.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;nfcnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~NFKC ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersnfkc--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;nfkC.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;nfkcnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~NFKD ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersnfkd--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;nfkD.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;nfkdnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~StripNormalizer&quot;,&quot;local&quot;:&quot;tokenizersstripnormalizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;stripNormalizer.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;stripnormalizernormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~StripAccents ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersstripaccents--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;stripAccents.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;stripaccentsnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Lowercase ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizerslowercase--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;lowercase.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;lowercasenormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Prepend ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersprepend--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;prepend.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;prependnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~NormalizerSequence ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersnormalizersequence--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new NormalizerSequence(config)&quot;,&quot;local&quot;:&quot;new-normalizersequenceconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;normalizerSequence.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;normalizersequencenormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BertNormalizer ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersbertnormalizer--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;bertNormalizer._tokenize_chinese_chars(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;bertnormalizertokenizechinesecharstext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bertNormalizer.stripAccents(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;bertnormalizerstripaccentstext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bertNormalizer.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;bertnormalizernormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PreTokenizer ⇐ <code> Callable </code>&quot;,&quot;local&quot;:&quot;tokenizerspretokenizer--code-callable-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;preTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTokenizer.pre_tokenize(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretokenizerpretokenizetext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTokenizer._call(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretokenizercalltext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PreTokenizer.fromConfig(config) ⇒ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;pretokenizerfromconfigconfig--code-pretokenizer-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BertPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersbertpretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new BertPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-bertpretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bertPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;bertpretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ByteLevelPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersbytelevelpretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new ByteLevelPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-bytelevelpretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelPreTokenizer.add_prefix_space : <code> boolean </code>&quot;,&quot;local&quot;:&quot;bytelevelpretokenizeraddprefixspace--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelPreTokenizer.trim_offsets : <code> boolean </code>&quot;,&quot;local&quot;:&quot;bytelevelpretokenizertrimoffsets--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelPreTokenizer.use_regex : <code> boolean </code>&quot;,&quot;local&quot;:&quot;bytelevelpretokenizeruseregex--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;bytelevelpretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~SplitPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerssplitpretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new SplitPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-splitpretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;splitPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;splitpretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PunctuationPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerspunctuationpretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PunctuationPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-punctuationpretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;punctuationPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;punctuationpretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~DigitsPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersdigitspretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new DigitsPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-digitspretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;digitsPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;digitspretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PostProcessor ⇐ <code> Callable </code>&quot;,&quot;local&quot;:&quot;tokenizerspostprocessor--code-callable-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PostProcessor(config)&quot;,&quot;local&quot;:&quot;new-postprocessorconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;postProcessor.post_process(tokens, ...args) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;postprocessorpostprocesstokens-args--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;postProcessor._call(tokens, ...args) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;postprocessorcalltokens-args--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PostProcessor.fromConfig(config) ⇒ <code> PostProcessor </code>&quot;,&quot;local&quot;:&quot;postprocessorfromconfigconfig--code-postprocessor-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BertProcessing&quot;,&quot;local&quot;:&quot;tokenizersbertprocessing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new BertProcessing(config)&quot;,&quot;local&quot;:&quot;new-bertprocessingconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bertProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;bertprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~TemplateProcessing ⇐ <code> PostProcessor </code>&quot;,&quot;local&quot;:&quot;tokenizerstemplateprocessing--code-postprocessor-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new TemplateProcessing(config)&quot;,&quot;local&quot;:&quot;new-templateprocessingconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;templateProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;templateprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ByteLevelPostProcessor ⇐ <code> PostProcessor </code>&quot;,&quot;local&quot;:&quot;tokenizersbytelevelpostprocessor--code-postprocessor-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;byteLevelPostProcessor.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;bytelevelpostprocessorpostprocesstokens-tokenspair--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PostProcessorSequence&quot;,&quot;local&quot;:&quot;tokenizerspostprocessorsequence&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PostProcessorSequence(config)&quot;,&quot;local&quot;:&quot;new-postprocessorsequenceconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;postProcessorSequence.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;postprocessorsequencepostprocesstokens-tokenspair--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Decoder ⇐ <code> Callable </code>&quot;,&quot;local&quot;:&quot;tokenizersdecoder--code-callable-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new Decoder(config)&quot;,&quot;local&quot;:&quot;new-decoderconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoder.added_tokens : <code> Array. < AddedToken > </code>&quot;,&quot;local&quot;:&quot;decoderaddedtokens--code-array--addedtoken--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoder._call(tokens) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;decodercalltokens--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoder.decode(tokens) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;decoderdecodetokens--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoder.decode_chain(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;decoderdecodechaintokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Decoder.fromConfig(config) ⇒ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;decoderfromconfigconfig--code-decoder-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~FuseDecoder&quot;,&quot;local&quot;:&quot;tokenizersfusedecoder&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;fuseDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;fusedecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~WordPieceDecoder ⇐ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;tokenizerswordpiecedecoder--code-decoder-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new WordPieceDecoder(config)&quot;,&quot;local&quot;:&quot;new-wordpiecedecoderconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;wordpiecedecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ByteLevelDecoder ⇐ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;tokenizersbyteleveldecoder--code-decoder-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new ByteLevelDecoder(config)&quot;,&quot;local&quot;:&quot;new-byteleveldecoderconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;byteleveldecoderconverttokenstostringtokens--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;byteleveldecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~CTCDecoder&quot;,&quot;local&quot;:&quot;tokenizersctcdecoder&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;ctcDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;ctcdecoderconverttokenstostringtokens--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;ctcDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;ctcdecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~DecoderSequence ⇐ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;tokenizersdecodersequence--code-decoder-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new DecoderSequence(config)&quot;,&quot;local&quot;:&quot;new-decodersequenceconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoderSequence.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;decodersequencedecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~MetaspacePreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersmetaspacepretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new MetaspacePreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-metaspacepretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;metaspacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;metaspacepretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~MetaspaceDecoder ⇐ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;tokenizersmetaspacedecoder--code-decoder-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new MetaspaceDecoder(config)&quot;,&quot;local&quot;:&quot;new-metaspacedecoderconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;metaspaceDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;metaspacedecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Precompiled ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersprecompiled--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new Precompiled(config)&quot;,&quot;local&quot;:&quot;new-precompiledconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;precompiled.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;precompilednormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PreTokenizerSequence ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerspretokenizersequence--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PreTokenizerSequence(config)&quot;,&quot;local&quot;:&quot;new-pretokenizersequenceconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTokenizerSequence.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretokenizersequencepretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~WhitespacePreTokenizer&quot;,&quot;local&quot;:&quot;tokenizerswhitespacepretokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new WhitespacePreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-whitespacepretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;whitespacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;whitespacepretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~WhitespaceSplit ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerswhitespacesplit--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new WhitespaceSplit(config)&quot;,&quot;local&quot;:&quot;new-whitespacesplitconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;whitespaceSplit.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;whitespacesplitpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ReplacePreTokenizer&quot;,&quot;local&quot;:&quot;tokenizersreplacepretokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new ReplacePreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-replacepretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;replacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;replacepretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BYTES_TO_UNICODE ⇒ <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizersbytestounicode--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~loadTokenizer(pretrained_model_name_or_path, options) ⇒ <code> Promise. < Array < any > > </code>&quot;,&quot;local&quot;:&quot;tokenizersloadtokenizerpretrainedmodelnameorpath-options--code-promise--array--any---code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~regexSplit(text, regex) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizersregexsplittext-regex--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~createPattern(pattern, invert) ⇒ <code> RegExp </code> | <code> null </code>&quot;,&quot;local&quot;:&quot;tokenizerscreatepatternpattern-invert--code-regexp-code--code-null-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~objectToMap(obj) ⇒ <code> Map. < string, any > </code>&quot;,&quot;local&quot;:&quot;tokenizersobjecttomapobj--code-map--string-any--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~prepareTensorForDecode(tensor) ⇒ <code> Array. < number > </code>&quot;,&quot;local&quot;:&quot;tokenizerspreparetensorfordecodetensor--code-array--number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~clean_up_tokenization(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;tokenizerscleanuptokenizationtext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~remove_accents(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;tokenizersremoveaccentstext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~lowercase_and_remove_accent(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;tokenizerslowercaseandremoveaccenttext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~whitespace_split(text) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizerswhitespacesplittext--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PretrainedTokenizerOptions : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizeroptions--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BPENode : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizersbpenode--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~SplitDelimiterBehavior : <code> ’ removed ’ </code> | <code> ’ isolated ’ </code> | <code> ’ mergedWithPrevious ’ </code> | <code> ’ mergedWithNext ’ </code> | <code> ’ contiguous ’ </code>&quot;,&quot;local&quot;:&quot;tokenizerssplitdelimiterbehavior--code--removed--code--code--isolated--code--code--mergedwithprevious--code--code--mergedwithnext--code--code--contiguous--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PostProcessedOutput : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizerspostprocessedoutput--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~EncodingSingle : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizersencodingsingle--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Message : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizersmessage--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BatchEncoding : <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> Tensor </code>&quot;,&quot;local&quot;:&quot;tokenizersbatchencoding--code-array--number--code--code-array--array--number---code--code-tensor-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers.js/pr_1113/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/entry/start.88a6e140.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/scheduler.0219f8bd.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/singletons.c59c6d8d.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/paths.8e090985.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/entry/app.0003020d.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/index.f61edf3b.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/nodes/0.25c65ab2.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/nodes/14.95d1e23c.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/CodeBlock.38e566ae.js">
<link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/EditOnGithub.48fa589f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;tokenizers&quot;,&quot;local&quot;:&quot;tokenizers&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;tokenizers.TokenizerModel ⇐ <code> Callable </code>&quot;,&quot;local&quot;:&quot;tokenizerstokenizermodel--code-callable-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new TokenizerModel(config)&quot;,&quot;local&quot;:&quot;new-tokenizermodelconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.vocab : <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelvocab--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.tokens_to_ids : <code> Map. < string, number > </code>&quot;,&quot;local&quot;:&quot;tokenizermodeltokenstoids--code-map--string-number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.fuse_unk : <code> boolean </code>&quot;,&quot;local&quot;:&quot;tokenizermodelfuseunk--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel._call(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelcalltokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.encode(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelencodetokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.convert_tokens_to_ids(tokens) ⇒ <code> Array. < number > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelconverttokenstoidstokens--code-array--number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;tokenizerModel.convert_ids_to_tokens(ids) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizermodelconvertidstotokensids--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;TokenizerModel.fromConfig(config, ...args) ⇒ <code> TokenizerModel </code>&quot;,&quot;local&quot;:&quot;tokenizermodelfromconfigconfig-args--code-tokenizermodel-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.PreTrainedTokenizer&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)&quot;,&quot;local&quot;:&quot;new-pretrainedtokenizertokenizerjson-tokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.added_tokens : <code> Array. < AddedToken > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizeraddedtokens--code-array--addedtoken--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.remove_space : <code> boolean </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerremovespace--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._call(text, options) ⇒ <code> BatchEncoding </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizercalltext-options--code-batchencoding-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._encode_text(text) ⇒ <code> Array < string > </code> | <code> null </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetexttext--code-array--string--code--code-null-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer._tokenize_helper(text, options) ⇒ <code> * </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizertokenizehelpertext-options--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.tokenize(text, options) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizertokenizetext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.encode(text, options) ⇒ <code> Array. < number > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerencodetext-options--code-array--number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerbatchdecodebatch-decodeargs--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodetokenids-decodeargs--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerdecodesingletokenids-decodeargs--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.get_chat_template(options) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizergetchattemplateoptions--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTrainedTokenizer.apply_chat_template(conversation, options) ⇒ <code> string </code> | <code> Tensor </code> | <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> BatchEncoding </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerapplychattemplateconversation-options--code-string-code--code-tensor-code--code-array--number--code--code-array--array--number---code--code-batchencoding-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code>&quot;,&quot;local&quot;:&quot;pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.BertTokenizer ⇐ <code> PreTrainedTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersberttokenizer--code-pretrainedtokenizer-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.AlbertTokenizer ⇐ <code> PreTrainedTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersalberttokenizer--code-pretrainedtokenizer-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.NllbTokenizer&quot;,&quot;local&quot;:&quot;tokenizersnllbtokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;nllbTokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code>&quot;,&quot;local&quot;:&quot;nllbtokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.M2M100Tokenizer&quot;,&quot;local&quot;:&quot;tokenizersm2m100tokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;m2M100Tokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code>&quot;,&quot;local&quot;:&quot;m2m100tokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.WhisperTokenizer ⇐ <code> PreTrainedTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerswhispertokenizer--code-pretrainedtokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;whisperTokenizer._decode_asr(sequences, options) ⇒ <code> * </code>&quot;,&quot;local&quot;:&quot;whispertokenizerdecodeasrsequences-options--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;whisperTokenizer.decode() : <code> * </code>&quot;,&quot;local&quot;:&quot;whispertokenizerdecode--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.MarianTokenizer&quot;,&quot;local&quot;:&quot;tokenizersmariantokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new MarianTokenizer(tokenizerJSON, tokenizerConfig)&quot;,&quot;local&quot;:&quot;new-mariantokenizertokenizerjson-tokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;marianTokenizer._encode_text(text) ⇒ <code> Array </code>&quot;,&quot;local&quot;:&quot;mariantokenizerencodetexttext--code-array-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.AutoTokenizer&quot;,&quot;local&quot;:&quot;tokenizersautotokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new AutoTokenizer()&quot;,&quot;local&quot;:&quot;new-autotokenizer&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code>&quot;,&quot;local&quot;:&quot;autotokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers.is_chinese_char(cp) ⇒ <code> boolean </code>&quot;,&quot;local&quot;:&quot;tokenizersischinesecharcp--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~AddedToken&quot;,&quot;local&quot;:&quot;tokenizersaddedtoken&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new AddedToken(config)&quot;,&quot;local&quot;:&quot;new-addedtokenconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~WordPieceTokenizer ⇐ <code> TokenizerModel </code>&quot;,&quot;local&quot;:&quot;tokenizerswordpiecetokenizer--code-tokenizermodel-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new WordPieceTokenizer(config)&quot;,&quot;local&quot;:&quot;new-wordpiecetokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.tokens_to_ids : <code> Map. < string, number > </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizertokenstoids--code-map--string-number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.unk_token_id : <code> number </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizerunktokenid--code-number-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.unk_token : <code> string </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizerunktoken--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.max_input_chars_per_word : <code> number </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizermaxinputcharsperword--code-number-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.vocab : <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizervocab--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceTokenizer.encode(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;wordpiecetokenizerencodetokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Unigram ⇐ <code> TokenizerModel </code>&quot;,&quot;local&quot;:&quot;tokenizersunigram--code-tokenizermodel-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new Unigram(config, moreConfig)&quot;,&quot;local&quot;:&quot;new-unigramconfig-moreconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;unigram.populateNodes(lattice)&quot;,&quot;local&quot;:&quot;unigrampopulatenodeslattice&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;unigram.tokenize(normalized) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;unigramtokenizenormalized--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;unigram.encode(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;unigramencodetokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BPE ⇐ <code> TokenizerModel </code>&quot;,&quot;local&quot;:&quot;tokenizersbpe--code-tokenizermodel-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new BPE(config)&quot;,&quot;local&quot;:&quot;new-bpeconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.tokens_to_ids : <code> Map. < string, number > </code>&quot;,&quot;local&quot;:&quot;bpetokenstoids--code-map--string-number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.merges : <code> * </code>&quot;,&quot;local&quot;:&quot;bpemerges--code--code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;merges.config.merges : <code> * </code>&quot;,&quot;local&quot;:&quot;mergesconfigmerges--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.cache : <code> Map. < string, Array < string > > </code>&quot;,&quot;local&quot;:&quot;bpecache--code-map--string-array--string---code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.bpe(token) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;bpebpetoken--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bpE.encode(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;bpeencodetokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~LegacyTokenizerModel&quot;,&quot;local&quot;:&quot;tokenizerslegacytokenizermodel&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new LegacyTokenizerModel(config, moreConfig)&quot;,&quot;local&quot;:&quot;new-legacytokenizermodelconfig-moreconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;legacyTokenizerModel.tokens_to_ids : <code> Map. < string, number > </code>&quot;,&quot;local&quot;:&quot;legacytokenizermodeltokenstoids--code-map--string-number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Normalizer&quot;,&quot;local&quot;:&quot;tokenizersnormalizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new Normalizer(config)&quot;,&quot;local&quot;:&quot;new-normalizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;normalizer.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;normalizernormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;normalizer._call(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;normalizercalltext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Normalizer.fromConfig(config) ⇒ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;normalizerfromconfigconfig--code-normalizer-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Replace ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersreplace--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;replace.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;replacenormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~NFC ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersnfc--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;nfC.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;nfcnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~NFKC ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersnfkc--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;nfkC.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;nfkcnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~NFKD ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersnfkd--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;nfkD.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;nfkdnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~StripNormalizer&quot;,&quot;local&quot;:&quot;tokenizersstripnormalizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;stripNormalizer.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;stripnormalizernormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~StripAccents ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersstripaccents--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;stripAccents.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;stripaccentsnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Lowercase ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizerslowercase--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;lowercase.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;lowercasenormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Prepend ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersprepend--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;prepend.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;prependnormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~NormalizerSequence ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersnormalizersequence--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new NormalizerSequence(config)&quot;,&quot;local&quot;:&quot;new-normalizersequenceconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;normalizerSequence.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;normalizersequencenormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BertNormalizer ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersbertnormalizer--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;bertNormalizer._tokenize_chinese_chars(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;bertnormalizertokenizechinesecharstext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bertNormalizer.stripAccents(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;bertnormalizerstripaccentstext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bertNormalizer.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;bertnormalizernormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PreTokenizer ⇐ <code> Callable </code>&quot;,&quot;local&quot;:&quot;tokenizerspretokenizer--code-callable-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;preTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTokenizer.pre_tokenize(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretokenizerpretokenizetext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTokenizer._call(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretokenizercalltext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PreTokenizer.fromConfig(config) ⇒ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;pretokenizerfromconfigconfig--code-pretokenizer-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BertPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersbertpretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new BertPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-bertpretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bertPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;bertpretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ByteLevelPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersbytelevelpretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new ByteLevelPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-bytelevelpretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelPreTokenizer.add_prefix_space : <code> boolean </code>&quot;,&quot;local&quot;:&quot;bytelevelpretokenizeraddprefixspace--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelPreTokenizer.trim_offsets : <code> boolean </code>&quot;,&quot;local&quot;:&quot;bytelevelpretokenizertrimoffsets--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelPreTokenizer.use_regex : <code> boolean </code>&quot;,&quot;local&quot;:&quot;bytelevelpretokenizeruseregex--code-boolean-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;bytelevelpretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~SplitPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerssplitpretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new SplitPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-splitpretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;splitPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;splitpretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PunctuationPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerspunctuationpretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PunctuationPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-punctuationpretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;punctuationPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;punctuationpretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~DigitsPreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersdigitspretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new DigitsPreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-digitspretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;digitsPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;digitspretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PostProcessor ⇐ <code> Callable </code>&quot;,&quot;local&quot;:&quot;tokenizerspostprocessor--code-callable-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PostProcessor(config)&quot;,&quot;local&quot;:&quot;new-postprocessorconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;postProcessor.post_process(tokens, ...args) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;postprocessorpostprocesstokens-args--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;postProcessor._call(tokens, ...args) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;postprocessorcalltokens-args--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;PostProcessor.fromConfig(config) ⇒ <code> PostProcessor </code>&quot;,&quot;local&quot;:&quot;postprocessorfromconfigconfig--code-postprocessor-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BertProcessing&quot;,&quot;local&quot;:&quot;tokenizersbertprocessing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new BertProcessing(config)&quot;,&quot;local&quot;:&quot;new-bertprocessingconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;bertProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;bertprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~TemplateProcessing ⇐ <code> PostProcessor </code>&quot;,&quot;local&quot;:&quot;tokenizerstemplateprocessing--code-postprocessor-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new TemplateProcessing(config)&quot;,&quot;local&quot;:&quot;new-templateprocessingconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;templateProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;templateprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ByteLevelPostProcessor ⇐ <code> PostProcessor </code>&quot;,&quot;local&quot;:&quot;tokenizersbytelevelpostprocessor--code-postprocessor-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;byteLevelPostProcessor.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;bytelevelpostprocessorpostprocesstokens-tokenspair--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PostProcessorSequence&quot;,&quot;local&quot;:&quot;tokenizerspostprocessorsequence&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PostProcessorSequence(config)&quot;,&quot;local&quot;:&quot;new-postprocessorsequenceconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;postProcessorSequence.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>&quot;,&quot;local&quot;:&quot;postprocessorsequencepostprocesstokens-tokenspair--code-postprocessedoutput-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Decoder ⇐ <code> Callable </code>&quot;,&quot;local&quot;:&quot;tokenizersdecoder--code-callable-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new Decoder(config)&quot;,&quot;local&quot;:&quot;new-decoderconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoder.added_tokens : <code> Array. < AddedToken > </code>&quot;,&quot;local&quot;:&quot;decoderaddedtokens--code-array--addedtoken--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoder._call(tokens) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;decodercalltokens--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoder.decode(tokens) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;decoderdecodetokens--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoder.decode_chain(tokens) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;decoderdecodechaintokens--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Decoder.fromConfig(config) ⇒ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;decoderfromconfigconfig--code-decoder-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~FuseDecoder&quot;,&quot;local&quot;:&quot;tokenizersfusedecoder&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;fuseDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;fusedecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~WordPieceDecoder ⇐ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;tokenizerswordpiecedecoder--code-decoder-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new WordPieceDecoder(config)&quot;,&quot;local&quot;:&quot;new-wordpiecedecoderconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;wordPieceDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;wordpiecedecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ByteLevelDecoder ⇐ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;tokenizersbyteleveldecoder--code-decoder-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new ByteLevelDecoder(config)&quot;,&quot;local&quot;:&quot;new-byteleveldecoderconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;byteleveldecoderconverttokenstostringtokens--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;byteLevelDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;byteleveldecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~CTCDecoder&quot;,&quot;local&quot;:&quot;tokenizersctcdecoder&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;ctcDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;ctcdecoderconverttokenstostringtokens--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;ctcDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;ctcdecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~DecoderSequence ⇐ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;tokenizersdecodersequence--code-decoder-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new DecoderSequence(config)&quot;,&quot;local&quot;:&quot;new-decodersequenceconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;decoderSequence.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;decodersequencedecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~MetaspacePreTokenizer ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizersmetaspacepretokenizer--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new MetaspacePreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-metaspacepretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;metaspacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;metaspacepretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~MetaspaceDecoder ⇐ <code> Decoder </code>&quot;,&quot;local&quot;:&quot;tokenizersmetaspacedecoder--code-decoder-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new MetaspaceDecoder(config)&quot;,&quot;local&quot;:&quot;new-metaspacedecoderconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;metaspaceDecoder.decode_chain() : <code> * </code>&quot;,&quot;local&quot;:&quot;metaspacedecoderdecodechain--code--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Precompiled ⇐ <code> Normalizer </code>&quot;,&quot;local&quot;:&quot;tokenizersprecompiled--code-normalizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new Precompiled(config)&quot;,&quot;local&quot;:&quot;new-precompiledconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;precompiled.normalize(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;precompilednormalizetext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PreTokenizerSequence ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerspretokenizersequence--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new PreTokenizerSequence(config)&quot;,&quot;local&quot;:&quot;new-pretokenizersequenceconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;preTokenizerSequence.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;pretokenizersequencepretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~WhitespacePreTokenizer&quot;,&quot;local&quot;:&quot;tokenizerswhitespacepretokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new WhitespacePreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-whitespacepretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;whitespacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;whitespacepretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~WhitespaceSplit ⇐ <code> PreTokenizer </code>&quot;,&quot;local&quot;:&quot;tokenizerswhitespacesplit--code-pretokenizer-code&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new WhitespaceSplit(config)&quot;,&quot;local&quot;:&quot;new-whitespacesplitconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;whitespaceSplit.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;whitespacesplitpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~ReplacePreTokenizer&quot;,&quot;local&quot;:&quot;tokenizersreplacepretokenizer&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;new ReplacePreTokenizer(config)&quot;,&quot;local&quot;:&quot;new-replacepretokenizerconfig&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;replacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;replacepretokenizerpretokenizetexttext-options--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BYTES_TO_UNICODE ⇒ <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizersbytestounicode--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~loadTokenizer(pretrained_model_name_or_path, options) ⇒ <code> Promise. < Array < any > > </code>&quot;,&quot;local&quot;:&quot;tokenizersloadtokenizerpretrainedmodelnameorpath-options--code-promise--array--any---code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~regexSplit(text, regex) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizersregexsplittext-regex--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~createPattern(pattern, invert) ⇒ <code> RegExp </code> | <code> null </code>&quot;,&quot;local&quot;:&quot;tokenizerscreatepatternpattern-invert--code-regexp-code--code-null-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~objectToMap(obj) ⇒ <code> Map. < string, any > </code>&quot;,&quot;local&quot;:&quot;tokenizersobjecttomapobj--code-map--string-any--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~prepareTensorForDecode(tensor) ⇒ <code> Array. < number > </code>&quot;,&quot;local&quot;:&quot;tokenizerspreparetensorfordecodetensor--code-array--number--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~clean_up_tokenization(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;tokenizerscleanuptokenizationtext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~remove_accents(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;tokenizersremoveaccentstext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~lowercase_and_remove_accent(text) ⇒ <code> string </code>&quot;,&quot;local&quot;:&quot;tokenizerslowercaseandremoveaccenttext--code-string-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~whitespace_split(text) ⇒ <code> Array. < string > </code>&quot;,&quot;local&quot;:&quot;tokenizerswhitespacesplittext--code-array--string--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PretrainedTokenizerOptions : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizerspretrainedtokenizeroptions--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BPENode : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizersbpenode--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~SplitDelimiterBehavior : <code> ’ removed ’ </code> | <code> ’ isolated ’ </code> | <code> ’ mergedWithPrevious ’ </code> | <code> ’ mergedWithNext ’ </code> | <code> ’ contiguous ’ </code>&quot;,&quot;local&quot;:&quot;tokenizerssplitdelimiterbehavior--code--removed--code--code--isolated--code--code--mergedwithprevious--code--code--mergedwithnext--code--code--contiguous--code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~PostProcessedOutput : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizerspostprocessedoutput--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~EncodingSingle : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizersencodingsingle--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~Message : <code> Object </code>&quot;,&quot;local&quot;:&quot;tokenizersmessage--code-object-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;tokenizers~BatchEncoding : <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> Tensor </code>&quot;,&quot;local&quot;:&quot;tokenizersbatchencoding--code-array--number--code--code-array--array--number---code--code-tensor-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <a id="module_tokenizers" class="group"></a> <h1 class="relative group"><a id="tokenizers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers</span></h1> <p data-svelte-h="svelte-ost35k">Tokenizers are used to prepare textual inputs for a model.</p> <p data-svelte-h="svelte-gbl9g8"><strong>Example:</strong> Create an <code>AutoTokenizer</code> and use it to tokenize a sentence.
This will automatically detect the tokenizer type based on the tokenizer class defined in <code>tokenizer.json</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&#x27;@huggingface/transformers&#x27;</span>;
<span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">&#x27;Xenova/bert-base-uncased&#x27;</span>);
<span class="hljs-keyword">const</span> { input_ids } = <span class="hljs-keyword">await</span> <span class="hljs-title function_">tokenizer</span>(<span class="hljs-string">&#x27;I love transformers!&#x27;</span>);
<span class="hljs-comment">// Tensor {</span>
<span class="hljs-comment">// data: BigInt64Array(6) [101n, 1045n, 2293n, 19081n, 999n, 102n],</span>
<span class="hljs-comment">// dims: [1, 6],</span>
<span class="hljs-comment">// type: &#x27;int64&#x27;,</span>
<span class="hljs-comment">// size: 6,</span>
<span class="hljs-comment">// }</span><!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-nkk3er"><li><a href="#module_tokenizers">tokenizers</a><ul><li><em>static</em><ul><li><a href="#module_tokenizers.TokenizerModel">.TokenizerModel</a><a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers.TokenizerModel_new"><code>new TokenizerModel(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers.TokenizerModel+vocab"><code>.vocab</code></a> : <code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.&lt;string, number&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+fuse_unk"><code>.fuse_unk</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers.TokenizerModel+_call"><code>._call(tokens)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+encode"><code>.encode(tokens)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+convert_tokens_to_ids"><code>.convert_tokens_to_ids(tokens)</code></a><code>Array.&lt;number&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+convert_ids_to_tokens"><code>.convert_ids_to_tokens(ids)</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers.TokenizerModel.fromConfig"><code>.fromConfig(config, ...args)</code></a><code>TokenizerModel</code></li></ul></li></ul></li> <li><a href="#module_tokenizers.PreTrainedTokenizer">.PreTrainedTokenizer</a><ul><li><a href="#new_module_tokenizers.PreTrainedTokenizer_new"><code>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers.PreTrainedTokenizer+added_tokens"><code>.added_tokens</code></a> : <code>Array.&lt;AddedToken&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+remove_space"><code>.remove_space</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_call"><code>._call(text, options)</code></a><code>BatchEncoding</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_encode_text"><code>._encode_text(text)</code></a><code>Array&lt;string&gt;</code> | <code>null</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_tokenize_helper"><code>._tokenize_helper(text, options)</code></a><code>*</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+tokenize"><code>.tokenize(text, options)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+encode"><code>.encode(text, options)</code></a><code>Array.&lt;number&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+batch_decode"><code>.batch_decode(batch, decode_args)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode"><code>.decode(token_ids, [decode_args])</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode_single"><code>.decode_single(token_ids, decode_args)</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+get_chat_template"><code>.get_chat_template(options)</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+apply_chat_template"><code>.apply_chat_template(conversation, options)</code></a><code>string</code> | <a href="#Tensor"><code>Tensor</code></a> | <code>Array&lt;number&gt;</code> | <code>Array&lt;Array&lt;number&gt;&gt;</code> | <code>BatchEncoding</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers.PreTrainedTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a><code>Promise.&lt;PreTrainedTokenizer&gt;</code></li></ul></li></ul></li> <li><a href="#module_tokenizers.BertTokenizer">.BertTokenizer</a><code>PreTrainedTokenizer</code></li> <li><a href="#module_tokenizers.AlbertTokenizer">.AlbertTokenizer</a><code>PreTrainedTokenizer</code></li> <li><a href="#module_tokenizers.NllbTokenizer">.NllbTokenizer</a><ul><li><a href="#module_tokenizers.NllbTokenizer+_build_translation_inputs"><code>._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs)</code></a><code>Object</code></li></ul></li> <li><a href="#module_tokenizers.M2M100Tokenizer">.M2M100Tokenizer</a><ul><li><a href="#module_tokenizers.M2M100Tokenizer+_build_translation_inputs"><code>._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs)</code></a><code>Object</code></li></ul></li> <li><a href="#module_tokenizers.WhisperTokenizer">.WhisperTokenizer</a><code>PreTrainedTokenizer</code><ul><li><a href="#module_tokenizers.WhisperTokenizer+_decode_asr"><code>._decode_asr(sequences, options)</code></a><code>*</code></li> <li><a href="#module_tokenizers.WhisperTokenizer+decode"><code>.decode()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers.MarianTokenizer">.MarianTokenizer</a><ul><li><a href="#new_module_tokenizers.MarianTokenizer_new"><code>new MarianTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><a href="#module_tokenizers.MarianTokenizer+_encode_text"><code>._encode_text(text)</code></a><code>Array</code></li></ul></li> <li><a href="#module_tokenizers.AutoTokenizer">.AutoTokenizer</a><ul><li><a href="#new_module_tokenizers.AutoTokenizer_new"><code>new AutoTokenizer()</code></a></li> <li><a href="#module_tokenizers.AutoTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a><code>Promise.&lt;PreTrainedTokenizer&gt;</code></li></ul></li> <li><a href="#module_tokenizers.is_chinese_char"><code>.is_chinese_char(cp)</code></a><code>boolean</code></li></ul></li> <li><em>inner</em><ul><li><a href="#module_tokenizers..AddedToken">~AddedToken</a><ul><li><a href="#new_module_tokenizers..AddedToken_new"><code>new AddedToken(config)</code></a></li></ul></li> <li><a href="#module_tokenizers..WordPieceTokenizer">~WordPieceTokenizer</a><code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..WordPieceTokenizer_new"><code>new WordPieceTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..WordPieceTokenizer+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.&lt;string, number&gt;</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+unk_token_id"><code>.unk_token_id</code></a> : <code>number</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+unk_token"><code>.unk_token</code></a> : <code>string</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+max_input_chars_per_word"><code>.max_input_chars_per_word</code></a> : <code>number</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+vocab"><code>.vocab</code></a> : <code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+encode"><code>.encode(tokens)</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..Unigram">~Unigram</a><code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..Unigram_new"><code>new Unigram(config, moreConfig)</code></a></li> <li><a href="#module_tokenizers..Unigram+populateNodes"><code>.populateNodes(lattice)</code></a></li> <li><a href="#module_tokenizers..Unigram+tokenize"><code>.tokenize(normalized)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..Unigram+encode"><code>.encode(tokens)</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..BPE">~BPE</a><code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..BPE_new"><code>new BPE(config)</code></a></li> <li><a href="#module_tokenizers..BPE+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.&lt;string, number&gt;</code></li> <li><a href="#module_tokenizers..BPE+merges"><code>.merges</code></a> : <code>*</code><ul><li><a href="#module_tokenizers..BPE+merges.config.merges"><code>.config.merges</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..BPE+cache"><code>.cache</code></a> : <code>Map.&lt;string, Array&lt;string&gt;&gt;</code></li> <li><a href="#module_tokenizers..BPE+bpe"><code>.bpe(token)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..BPE+encode"><code>.encode(tokens)</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..LegacyTokenizerModel">~LegacyTokenizerModel</a><ul><li><a href="#new_module_tokenizers..LegacyTokenizerModel_new"><code>new LegacyTokenizerModel(config, moreConfig)</code></a></li> <li><a href="#module_tokenizers..LegacyTokenizerModel+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.&lt;string, number&gt;</code></li></ul></li> <li><em><a href="#module_tokenizers..Normalizer">~Normalizer</a></em><ul><li><em><a href="#new_module_tokenizers..Normalizer_new"><code>new Normalizer(config)</code></a></em></li> <li><em>instance</em><ul><li><strong><a href="#module_tokenizers..Normalizer+normalize"><code>.normalize(text)</code></a><code>string</code></strong></li> <li><em><a href="#module_tokenizers..Normalizer+_call"><code>._call(text)</code></a><code>string</code></em></li></ul></li> <li><em>static</em><ul><li><em><a href="#module_tokenizers..Normalizer.fromConfig"><code>.fromConfig(config)</code></a><code>Normalizer</code></em></li></ul></li></ul></li> <li><a href="#module_tokenizers..Replace">~Replace</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..Replace+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..NFC">~NFC</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..NFC+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..NFKC">~NFKC</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..NFKC+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..NFKD">~NFKD</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..NFKD+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..StripNormalizer">~StripNormalizer</a><ul><li><a href="#module_tokenizers..StripNormalizer+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..StripAccents">~StripAccents</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..StripAccents+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..Lowercase">~Lowercase</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..Lowercase+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..Prepend">~Prepend</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..Prepend+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..NormalizerSequence">~NormalizerSequence</a><code>Normalizer</code><ul><li><a href="#new_module_tokenizers..NormalizerSequence_new"><code>new NormalizerSequence(config)</code></a></li> <li><a href="#module_tokenizers..NormalizerSequence+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..BertNormalizer">~BertNormalizer</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..BertNormalizer+_tokenize_chinese_chars"><code>._tokenize_chinese_chars(text)</code></a><code>string</code></li> <li><a href="#module_tokenizers..BertNormalizer+stripAccents"><code>.stripAccents(text)</code></a><code>string</code></li> <li><a href="#module_tokenizers..BertNormalizer+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..PreTokenizer">~PreTokenizer</a><a href="#Callable"><code>Callable</code></a><ul><li><em>instance</em><ul><li><em><a href="#module_tokenizers..PreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></em></li> <li><a href="#module_tokenizers..PreTokenizer+pre_tokenize"><code>.pre_tokenize(text, [options])</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..PreTokenizer+_call"><code>._call(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..PreTokenizer.fromConfig"><code>.fromConfig(config)</code></a><code>PreTokenizer</code></li></ul></li></ul></li> <li><a href="#module_tokenizers..BertPreTokenizer">~BertPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..BertPreTokenizer_new"><code>new BertPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..BertPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer">~ByteLevelPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..ByteLevelPreTokenizer_new"><code>new ByteLevelPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+add_prefix_space"><code>.add_prefix_space</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+trim_offsets"><code>.trim_offsets</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+use_regex"><code>.use_regex</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..SplitPreTokenizer">~SplitPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..SplitPreTokenizer_new"><code>new SplitPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..SplitPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..PunctuationPreTokenizer">~PunctuationPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..PunctuationPreTokenizer_new"><code>new PunctuationPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..PunctuationPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..DigitsPreTokenizer">~DigitsPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..DigitsPreTokenizer_new"><code>new DigitsPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..DigitsPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..PostProcessor">~PostProcessor</a><a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers..PostProcessor_new"><code>new PostProcessor(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers..PostProcessor+post_process"><code>.post_process(tokens, ...args)</code></a><code>PostProcessedOutput</code></li> <li><a href="#module_tokenizers..PostProcessor+_call"><code>._call(tokens, ...args)</code></a><code>PostProcessedOutput</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..PostProcessor.fromConfig"><code>.fromConfig(config)</code></a><code>PostProcessor</code></li></ul></li></ul></li> <li><a href="#module_tokenizers..BertProcessing">~BertProcessing</a><ul><li><a href="#new_module_tokenizers..BertProcessing_new"><code>new BertProcessing(config)</code></a></li> <li><a href="#module_tokenizers..BertProcessing+post_process"><code>.post_process(tokens, [tokens_pair])</code></a><code>PostProcessedOutput</code></li></ul></li> <li><a href="#module_tokenizers..TemplateProcessing">~TemplateProcessing</a><code>PostProcessor</code><ul><li><a href="#new_module_tokenizers..TemplateProcessing_new"><code>new TemplateProcessing(config)</code></a></li> <li><a href="#module_tokenizers..TemplateProcessing+post_process"><code>.post_process(tokens, [tokens_pair])</code></a><code>PostProcessedOutput</code></li></ul></li> <li><a href="#module_tokenizers..ByteLevelPostProcessor">~ByteLevelPostProcessor</a><code>PostProcessor</code><ul><li><a href="#module_tokenizers..ByteLevelPostProcessor+post_process"><code>.post_process(tokens, [tokens_pair])</code></a><code>PostProcessedOutput</code></li></ul></li> <li><a href="#module_tokenizers..PostProcessorSequence">~PostProcessorSequence</a><ul><li><a href="#new_module_tokenizers..PostProcessorSequence_new"><code>new PostProcessorSequence(config)</code></a></li> <li><a href="#module_tokenizers..PostProcessorSequence+post_process"><code>.post_process(tokens, [tokens_pair])</code></a><code>PostProcessedOutput</code></li></ul></li> <li><a href="#module_tokenizers..Decoder">~Decoder</a><a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers..Decoder_new"><code>new Decoder(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers..Decoder+added_tokens"><code>.added_tokens</code></a> : <code>Array.&lt;AddedToken&gt;</code></li> <li><a href="#module_tokenizers..Decoder+_call"><code>._call(tokens)</code></a><code>string</code></li> <li><a href="#module_tokenizers..Decoder+decode"><code>.decode(tokens)</code></a><code>string</code></li> <li><a href="#module_tokenizers..Decoder+decode_chain"><code>.decode_chain(tokens)</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..Decoder.fromConfig"><code>.fromConfig(config)</code></a><code>Decoder</code></li></ul></li></ul></li> <li><a href="#module_tokenizers..FuseDecoder">~FuseDecoder</a><ul><li><a href="#module_tokenizers..FuseDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..WordPieceDecoder">~WordPieceDecoder</a><code>Decoder</code><ul><li><a href="#new_module_tokenizers..WordPieceDecoder_new"><code>new WordPieceDecoder(config)</code></a></li> <li><a href="#module_tokenizers..WordPieceDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..ByteLevelDecoder">~ByteLevelDecoder</a><code>Decoder</code><ul><li><a href="#new_module_tokenizers..ByteLevelDecoder_new"><code>new ByteLevelDecoder(config)</code></a></li> <li><a href="#module_tokenizers..ByteLevelDecoder+convert_tokens_to_string"><code>.convert_tokens_to_string(tokens)</code></a><code>string</code></li> <li><a href="#module_tokenizers..ByteLevelDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..CTCDecoder">~CTCDecoder</a><ul><li><a href="#module_tokenizers..CTCDecoder+convert_tokens_to_string"><code>.convert_tokens_to_string(tokens)</code></a><code>string</code></li> <li><a href="#module_tokenizers..CTCDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..DecoderSequence">~DecoderSequence</a><code>Decoder</code><ul><li><a href="#new_module_tokenizers..DecoderSequence_new"><code>new DecoderSequence(config)</code></a></li> <li><a href="#module_tokenizers..DecoderSequence+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..MetaspacePreTokenizer">~MetaspacePreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..MetaspacePreTokenizer_new"><code>new MetaspacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..MetaspacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..MetaspaceDecoder">~MetaspaceDecoder</a><code>Decoder</code><ul><li><a href="#new_module_tokenizers..MetaspaceDecoder_new"><code>new MetaspaceDecoder(config)</code></a></li> <li><a href="#module_tokenizers..MetaspaceDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..Precompiled">~Precompiled</a><code>Normalizer</code><ul><li><a href="#new_module_tokenizers..Precompiled_new"><code>new Precompiled(config)</code></a></li> <li><a href="#module_tokenizers..Precompiled+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li> <li><a href="#module_tokenizers..PreTokenizerSequence">~PreTokenizerSequence</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..PreTokenizerSequence_new"><code>new PreTokenizerSequence(config)</code></a></li> <li><a href="#module_tokenizers..PreTokenizerSequence+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..WhitespacePreTokenizer">~WhitespacePreTokenizer</a><ul><li><a href="#new_module_tokenizers..WhitespacePreTokenizer_new"><code>new WhitespacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..WhitespacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..WhitespaceSplit">~WhitespaceSplit</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..WhitespaceSplit_new"><code>new WhitespaceSplit(config)</code></a></li> <li><a href="#module_tokenizers..WhitespaceSplit+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..ReplacePreTokenizer">~ReplacePreTokenizer</a><ul><li><a href="#new_module_tokenizers..ReplacePreTokenizer_new"><code>new ReplacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..ReplacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><a href="#module_tokenizers..BYTES_TO_UNICODE"><code>~BYTES_TO_UNICODE</code></a><code>Object</code></li> <li><a href="#module_tokenizers..loadTokenizer"><code>~loadTokenizer(pretrained_model_name_or_path, options)</code></a><code>Promise.&lt;Array&lt;any&gt;&gt;</code></li> <li><a href="#module_tokenizers..regexSplit"><code>~regexSplit(text, regex)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..createPattern"><code>~createPattern(pattern, invert)</code></a><code>RegExp</code> | <code>null</code></li> <li><a href="#module_tokenizers..objectToMap"><code>~objectToMap(obj)</code></a><code>Map.&lt;string, any&gt;</code></li> <li><a href="#module_tokenizers..prepareTensorForDecode"><code>~prepareTensorForDecode(tensor)</code></a><code>Array.&lt;number&gt;</code></li> <li><a href="#module_tokenizers..clean_up_tokenization"><code>~clean_up_tokenization(text)</code></a><code>string</code></li> <li><a href="#module_tokenizers..remove_accents"><code>~remove_accents(text)</code></a><code>string</code></li> <li><a href="#module_tokenizers..lowercase_and_remove_accent"><code>~lowercase_and_remove_accent(text)</code></a><code>string</code></li> <li><a href="#module_tokenizers..whitespace_split"><code>~whitespace_split(text)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..PretrainedTokenizerOptions"><code>~PretrainedTokenizerOptions</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..BPENode"><code>~BPENode</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..SplitDelimiterBehavior"><code>~SplitDelimiterBehavior</code></a> : <code>’removed’</code> | <code>’isolated’</code> | <code>’mergedWithPrevious’</code> | <code>’mergedWithNext’</code> | <code>’contiguous’</code></li> <li><a href="#module_tokenizers..PostProcessedOutput"><code>~PostProcessedOutput</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..EncodingSingle"><code>~EncodingSingle</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..Message"><code>~Message</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..BatchEncoding"><code>~BatchEncoding</code></a> : <code>Array&lt;number&gt;</code> | <code>Array&lt;Array&lt;number&gt;&gt;</code> | <a href="#Tensor"><code>Tensor</code></a></li></ul></li></ul></li></ul> <hr> <a id="module_tokenizers.TokenizerModel" class="group"></a> <h2 class="relative group"><a id="tokenizerstokenizermodel--code-callable-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerstokenizermodel--code-callable-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.TokenizerModel ⇐ &lt;code> Callable &lt;/code></span></h2> <p data-svelte-h="svelte-6wtrbd">Abstract base class for tokenizer models.</p> <p data-svelte-h="svelte-1ut0prd"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <a href="#Callable"><code>Callable</code></a></p> <ul data-svelte-h="svelte-1b39pjq"><li><a href="#module_tokenizers.TokenizerModel">.TokenizerModel</a><a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers.TokenizerModel_new"><code>new TokenizerModel(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers.TokenizerModel+vocab"><code>.vocab</code></a> : <code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.&lt;string, number&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+fuse_unk"><code>.fuse_unk</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers.TokenizerModel+_call"><code>._call(tokens)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+encode"><code>.encode(tokens)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+convert_tokens_to_ids"><code>.convert_tokens_to_ids(tokens)</code></a><code>Array.&lt;number&gt;</code></li> <li><a href="#module_tokenizers.TokenizerModel+convert_ids_to_tokens"><code>.convert_ids_to_tokens(ids)</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers.TokenizerModel.fromConfig"><code>.fromConfig(config, ...args)</code></a><code>TokenizerModel</code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers.TokenizerModel_new" class="group"></a> <h3 class="relative group"><a id="new-tokenizermodelconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-tokenizermodelconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new TokenizerModel(config)</span></h3> <p data-svelte-h="svelte-1c6qq54">Creates a new instance of TokenizerModel.</p> <table data-svelte-h="svelte-14oz1tm"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the TokenizerModel.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel+vocab" class="group"></a> <h3 class="relative group"><a id="tokenizermodelvocab--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelvocab--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.vocab : &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-t0dblq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a></p> <hr> <a id="module_tokenizers.TokenizerModel+tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="tokenizermodeltokenstoids--code-map--string-number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodeltokenstoids--code-map--string-number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.tokens_to_ids : &lt;code> Map. &lt; string, number > &lt;/code></span></h3> <p data-svelte-h="svelte-186upcr">A mapping of tokens to ids.</p> <p data-svelte-h="svelte-t0dblq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a></p> <hr> <a id="module_tokenizers.TokenizerModel+fuse_unk" class="group"></a> <h3 class="relative group"><a id="tokenizermodelfuseunk--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelfuseunk--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.fuse_unk : &lt;code> boolean &lt;/code></span></h3> <p data-svelte-h="svelte-14z39rg">Whether to fuse unknown tokens when encoding. Defaults to false.</p> <p data-svelte-h="svelte-t0dblq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a></p> <hr> <a id="module_tokenizers.TokenizerModel+_call" class="group"></a> <h3 class="relative group"><a id="tokenizermodelcalltokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelcalltokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel._call(tokens) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-72rkk4">Internal function to call the TokenizerModel instance.</p> <p data-svelte-h="svelte-ivoqgv"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Overrides</strong>: <a href="#Callable+_call"><code>_call</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The encoded tokens.</p> <table data-svelte-h="svelte-aw190w"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel+encode" class="group"></a> <h3 class="relative group"><a id="tokenizermodelencodetokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelencodetokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.encode(tokens) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-n03492">Encodes a list of tokens into a list of token IDs.</p> <p data-svelte-h="svelte-c8nyr7"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The encoded tokens.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-73au6u"><li>Will throw an error if not implemented in a subclass.</li></ul> <table data-svelte-h="svelte-aw190w"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel+convert_tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="tokenizermodelconverttokenstoidstokens--code-array--number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelconverttokenstoidstokens--code-array--number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.convert_tokens_to_ids(tokens) ⇒ &lt;code> Array. &lt; number > &lt;/code></span></h3> <p data-svelte-h="svelte-kt6n9f">Converts a list of tokens into a list of token IDs.</p> <p data-svelte-h="svelte-2fhfjm"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Returns</strong>: <code>Array.&lt;number&gt;</code> - The converted token IDs.</p> <table data-svelte-h="svelte-4i3luh"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The tokens to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel+convert_ids_to_tokens" class="group"></a> <h3 class="relative group"><a id="tokenizermodelconvertidstotokensids--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelconvertidstotokensids--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.convert_ids_to_tokens(ids) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-1dytmg7">Converts a list of token IDs into a list of tokens.</p> <p data-svelte-h="svelte-kiokn"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The converted tokens.</p> <table data-svelte-h="svelte-1fwo8eg"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>ids</td><td><code>Array&lt;number&gt;</code> | <code>Array&lt;bigint&gt;</code></td><td><p>The token IDs to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel.fromConfig" class="group"></a> <h3 class="relative group"><a id="tokenizermodelfromconfigconfig-args--code-tokenizermodel-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelfromconfigconfig-args--code-tokenizermodel-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TokenizerModel.fromConfig(config, ...args) ⇒ &lt;code> TokenizerModel &lt;/code></span></h3> <p data-svelte-h="svelte-owdb7i">Instantiates a new TokenizerModel instance based on the configuration object provided.</p> <p data-svelte-h="svelte-ni7m8l"><strong>Kind</strong>: static method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Returns</strong>: <code>TokenizerModel</code> - A new instance of a TokenizerModel.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1jzd58b"><li>Will throw an error if the TokenizerModel type in the config is not recognized.</li></ul> <table data-svelte-h="svelte-10vnea5"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the TokenizerModel.</p></td> </tr><tr><td>...args</td><td><code>*</code></td><td><p>Optional arguments to pass to the specific TokenizerModel constructor.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerspretrainedtokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretrainedtokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.PreTrainedTokenizer</span></h2> <p data-svelte-h="svelte-wbz8zs"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-lojjgj"><li><a href="#module_tokenizers.PreTrainedTokenizer">.PreTrainedTokenizer</a><ul><li><a href="#new_module_tokenizers.PreTrainedTokenizer_new"><code>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers.PreTrainedTokenizer+added_tokens"><code>.added_tokens</code></a> : <code>Array.&lt;AddedToken&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+remove_space"><code>.remove_space</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_call"><code>._call(text, options)</code></a><code>BatchEncoding</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_encode_text"><code>._encode_text(text)</code></a><code>Array&lt;string&gt;</code> | <code>null</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_tokenize_helper"><code>._tokenize_helper(text, options)</code></a><code>*</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+tokenize"><code>.tokenize(text, options)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+encode"><code>.encode(text, options)</code></a><code>Array.&lt;number&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+batch_decode"><code>.batch_decode(batch, decode_args)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode"><code>.decode(token_ids, [decode_args])</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode_single"><code>.decode_single(token_ids, decode_args)</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+get_chat_template"><code>.get_chat_template(options)</code></a><code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+apply_chat_template"><code>.apply_chat_template(conversation, options)</code></a><code>string</code> | <a href="#Tensor"><code>Tensor</code></a> | <code>Array&lt;number&gt;</code> | <code>Array&lt;Array&lt;number&gt;&gt;</code> | <code>BatchEncoding</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers.PreTrainedTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a><code>Promise.&lt;PreTrainedTokenizer&gt;</code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers.PreTrainedTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-pretrainedtokenizertokenizerjson-tokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-pretrainedtokenizertokenizerjson-tokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</span></h3> <p data-svelte-h="svelte-1vkx5qp">Create a new PreTrainedTokenizer instance.</p> <table data-svelte-h="svelte-19pzyzr"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokenizerJSON</td><td><code>Object</code></td><td><p>The JSON of the tokenizer.</p></td> </tr><tr><td>tokenizerConfig</td><td><code>Object</code></td><td><p>The config of the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+added_tokens" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizeraddedtokens--code-array--addedtoken--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizeraddedtokens--code-array--addedtoken--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.added_tokens : &lt;code> Array. &lt; AddedToken > &lt;/code></span></h3> <p data-svelte-h="svelte-c201sq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a></p> <hr> <a id="module_tokenizers.PreTrainedTokenizer+remove_space" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerremovespace--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerremovespace--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.remove_space : &lt;code> boolean &lt;/code></span></h3> <p data-svelte-h="svelte-1p46594">Whether or not to strip the text when tokenizing (removing excess spaces before and after the string).</p> <p data-svelte-h="svelte-c201sq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a></p> <hr> <a id="module_tokenizers.PreTrainedTokenizer+_call" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizercalltext-options--code-batchencoding-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizercalltext-options--code-batchencoding-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer._call(text, options) ⇒ &lt;code> BatchEncoding &lt;/code></span></h3> <p data-svelte-h="svelte-1il2i3r">Encode/tokenize the given text(s).</p> <p data-svelte-h="svelte-17ot913"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>BatchEncoding</code> - Object to be passed to the model.</p> <table data-svelte-h="svelte-1tazz35"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>Array&lt;string&gt;</code></td><td></td><td><p>The text to tokenize.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.text_pair]</td><td><code>string</code> | <code>Array&lt;string&gt;</code></td><td><code>null</code></td><td><p>Optional second sequence to be encoded. If set, must be the same type as text.</p></td> </tr><tr><td>[options.padding]</td><td><code>boolean</code> | <code>&#39;max_length&#39;</code></td><td><code>false</code></td><td><p>Whether to pad the input sequences.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td> </tr><tr><td>[options.truncation]</td><td><code>boolean</code></td><td><code></code></td><td><p>Whether to truncate the input sequences.</p></td> </tr><tr><td>[options.max_length]</td><td><code>number</code></td><td><code></code></td><td><p>Maximum length of the returned list and optionally padding length.</p></td> </tr><tr><td>[options.return_tensor]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to return the results as Tensors or arrays.</p></td> </tr><tr><td>[options.return_token_type_ids]</td><td><code>boolean</code></td><td><code></code></td><td><p>Whether to return the token type ids.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+_encode_text" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerencodetexttext--code-array--string--code--code-null-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerencodetexttext--code-array--string--code--code-null-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer._encode_text(text) ⇒ &lt;code> Array &lt; string > &lt;/code> | &lt;code> null &lt;/code></span></h3> <p data-svelte-h="svelte-hojn9c">Encodes a single text using the preprocessor pipeline of the tokenizer.</p> <p data-svelte-h="svelte-18rc32p"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array&lt;string&gt;</code> | <code>null</code> - The encoded tokens.</p> <table data-svelte-h="svelte-x8hb9q"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>null</code></td><td><p>The text to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+_tokenize_helper" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizertokenizehelpertext-options--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertokenizehelpertext-options--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer._tokenize_helper(text, options) ⇒ &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-1rkj6gn">Internal helper function to tokenize a text, and optionally a pair of texts.</p> <p data-svelte-h="svelte-g6epud"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>*</code> - An object containing the tokens and optionally the token type IDs.</p> <table data-svelte-h="svelte-c4289e"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td></td><td><p>The text to tokenize.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.pair]</td><td><code>string</code></td><td><code>null</code></td><td><p>The optional second text to tokenize.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+tokenize" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizertokenizetext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertokenizetext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.tokenize(text, options) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-1tkmx0p">Converts a string into a sequence of tokens.</p> <p data-svelte-h="svelte-ukfz5o"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The list of tokens.</p> <table data-svelte-h="svelte-azbat1"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td></td><td><p>The sequence to be encoded.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.pair]</td><td><code>string</code></td><td></td><td><p>A second sequence to be encoded with the first.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+encode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerencodetext-options--code-array--number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerencodetext-options--code-array--number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.encode(text, options) ⇒ &lt;code> Array. &lt; number > &lt;/code></span></h3> <p data-svelte-h="svelte-sj7zk5">Encodes a single text or a pair of texts using the model’s tokenizer.</p> <p data-svelte-h="svelte-qcuvtk"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;number&gt;</code> - An array of token IDs representing the encoded text(s).</p> <table data-svelte-h="svelte-1gq42bo"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td></td><td><p>The text to encode.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.text_pair]</td><td><code>string</code></td><td><code>null</code></td><td><p>The optional second text to encode.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td> </tr><tr><td>[options.return_token_type_ids]</td><td><code>boolean</code></td><td><code></code></td><td><p>Whether to return token_type_ids.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+batch_decode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerbatchdecodebatch-decodeargs--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerbatchdecodebatch-decodeargs--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-y8f29l">Decode a batch of tokenized sequences.</p> <p data-svelte-h="svelte-m72phd"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - List of decoded sequences.</p> <table data-svelte-h="svelte-mde7o5"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>batch</td><td><code>Array&lt;Array&lt;number&gt;&gt;</code> | <code><a href="#Tensor">Tensor</a></code></td><td><p>List/Tensor of tokenized input sequences.</p></td> </tr><tr><td>decode_args</td><td><code>Object</code></td><td><p>(Optional) Object with decoding arguments.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+decode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerdecodetokenids-decodeargs--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodetokenids-decodeargs--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-168jmgj">Decodes a sequence of token IDs back to a string.</p> <p data-svelte-h="svelte-1ferp3f"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1r89r1j"><li><code>Error</code> If `token_ids` is not a non-empty array of integers.</li></ul> <table data-svelte-h="svelte-nsz5oo"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>token_ids</td><td><code>Array&lt;number&gt;</code> | <code>Array&lt;bigint&gt;</code> | <code><a href="#Tensor">Tensor</a></code></td><td></td><td><p>List/Tensor of token IDs to decode.</p></td> </tr><tr><td>[decode_args]</td><td><code>Object</code></td><td><code>{}</code></td><td></td> </tr><tr><td>[decode_args.skip_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>If true, special tokens are removed from the output string.</p></td> </tr><tr><td>[decode_args.clean_up_tokenization_spaces]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>If true, spaces before punctuations and abbreviated forms are removed.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+decode_single" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerdecodesingletokenids-decodeargs--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodesingletokenids-decodeargs--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-19e6tws">Decode a single list of token ids to a string.</p> <p data-svelte-h="svelte-7zxcdh"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string</p> <table data-svelte-h="svelte-18jnxrz"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>token_ids</td><td><code>Array&lt;number&gt;</code> | <code>Array&lt;bigint&gt;</code></td><td></td><td><p>List of token ids to decode</p></td> </tr><tr><td>decode_args</td><td><code>Object</code></td><td></td><td><p>Optional arguments for decoding</p></td> </tr><tr><td>[decode_args.skip_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to skip special tokens during decoding</p></td> </tr><tr><td>[decode_args.clean_up_tokenization_spaces]</td><td><code>boolean</code></td><td><code></code></td><td><p>Whether to clean up tokenization spaces during decoding.
If null, the value is set to <code>this.decoder.cleanup</code> if it exists, falling back to <code>this.clean_up_tokenization_spaces</code> if it exists, falling back to <code>true</code>.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+get_chat_template" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizergetchattemplateoptions--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizergetchattemplateoptions--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.get_chat_template(options) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1hrpjri">Retrieve the chat template string used for tokenizing chat messages. This template is used
internally by the <code>apply_chat_template</code> method and can also be used externally to retrieve the model’s chat
template for better generation tracking.</p> <p data-svelte-h="svelte-1xojn6p"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The chat template string.</p> <table data-svelte-h="svelte-1tql3fk"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.chat_template]</td><td><code>string</code></td><td><code>null</code></td><td><p>A Jinja template or the name of a template to use for this conversion.
It is usually not necessary to pass anything to this argument,
as the model&#39;s template will be used by default.</p></td> </tr><tr><td>[options.tools]</td><td><code>Array.&lt;Object&gt;</code></td><td><code></code></td><td><p>A list of tools (callable functions) that will be accessible to the model. If the template does not
support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
giving the name, description and argument types for the tool. See our
<a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use">chat templating guide</a>
for more information.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+apply_chat_template" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerapplychattemplateconversation-options--code-string-code--code-tensor-code--code-array--number--code--code-array--array--number---code--code-batchencoding-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerapplychattemplateconversation-options--code-string-code--code-tensor-code--code-array--number--code--code-array--array--number---code--code-batchencoding-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.apply_chat_template(conversation, options) ⇒ &lt;code> string &lt;/code> | &lt;code> Tensor &lt;/code> | &lt;code> Array &lt; number > &lt;/code> | &lt;code> Array &lt; Array &lt; number > > &lt;/code> | &lt;code> BatchEncoding &lt;/code></span></h3> <p data-svelte-h="svelte-qfc73q">Converts a list of message objects with <code>&quot;role&quot;</code> and <code>&quot;content&quot;</code> keys to a list of token
ids. This method is intended for use with chat models, and will read the tokenizer’s chat_template attribute to
determine the format and control tokens to use when converting.</p> <p data-svelte-h="svelte-aagj55">See <a href="https://huggingface.co/docs/transformers/chat_templating" rel="nofollow">here</a> for more information.</p> <p data-svelte-h="svelte-h5rmtt"><strong>Example:</strong> Applying a chat template to a conversation.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/transformers&quot;</span>;
<span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">&quot;Xenova/mistral-tokenizer-v1&quot;</span>);
<span class="hljs-keyword">const</span> chat = [
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;Hello, how are you?&quot;</span> },
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;assistant&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;m doing great. How can I help you today?&quot;</span> },
{ <span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: <span class="hljs-string">&quot;I&#x27;d like to show off how chat templating works!&quot;</span> },
]
<span class="hljs-keyword">const</span> text = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">false</span> });
<span class="hljs-comment">// &quot;&lt;s&gt;[INST] Hello, how are you? [/INST]I&#x27;m doing great. How can I help you today?&lt;/s&gt; [INST] I&#x27;d like to show off how chat templating works! [/INST]&quot;</span>
<span class="hljs-keyword">const</span> input_ids = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">true</span>, <span class="hljs-attr">return_tensor</span>: <span class="hljs-literal">false</span> });
<span class="hljs-comment">// [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-p0ysxs"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> | <a href="#Tensor"><code>Tensor</code></a> | <code>Array&lt;number&gt;</code> | <code>Array&lt;Array&lt;number&gt;&gt;</code> | <code>BatchEncoding</code> - The tokenized output.</p> <table data-svelte-h="svelte-1fytlyx"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>conversation</td><td><code>Array.&lt;Message&gt;</code></td><td></td><td><p>A list of message objects with <code>&quot;role&quot;</code> and <code>&quot;content&quot;</code> keys,
representing the chat history so far.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.chat_template]</td><td><code>string</code></td><td><code>null</code></td><td><p>A Jinja template to use for this conversion. If
this is not passed, the model&#39;s chat template will be used instead.</p></td> </tr><tr><td>[options.tools]</td><td><code>Array.&lt;Object&gt;</code></td><td><code></code></td><td><p>A list of tools (callable functions) that will be accessible to the model. If the template does not
support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
giving the name, description and argument types for the tool. See our
<a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use">chat templating guide</a>
for more information.</p></td> </tr><tr><td>[options.documents]</td><td><code>*</code></td><td><code></code></td><td><p>A list of dicts representing documents that will be accessible to the model if it is performing RAG
(retrieval-augmented generation). If the template does not support RAG, this argument will have no
effect. We recommend that each document should be a dict containing &quot;title&quot; and &quot;text&quot; keys. Please
see the RAG section of the <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#arguments-for-RAG">chat templating guide</a>
for examples of passing documents with chat templates.</p></td> </tr><tr><td>[options.add_generation_prompt]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to end the prompt with the token(s) that indicate
the start of an assistant message. This is useful when you want to generate a response from the model.
Note that this argument will be passed to the chat template, and so it must be supported in the
template for this argument to have any effect.</p></td> </tr><tr><td>[options.tokenize]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to tokenize the output. If false, the output will be a string.</p></td> </tr><tr><td>[options.padding]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to pad sequences to the maximum length. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.truncation]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.max_length]</td><td><code>number</code></td><td><code></code></td><td><p>Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
If not specified, the tokenizer&#39;s <code>max_length</code> attribute will be used as a default.</p></td> </tr><tr><td>[options.return_tensor]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.return_dict]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to return a dictionary with named outputs. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.tokenizer_kwargs]</td><td><code>Object</code></td><td><code>{}</code></td><td><p>Additional options to pass to the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer.from_pretrained" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ &lt;code> Promise. &lt; PreTrainedTokenizer > &lt;/code></span></h3> <p data-svelte-h="svelte-1fjlsit">Loads a pre-trained tokenizer from the given <code>pretrained_model_name_or_path</code>.</p> <p data-svelte-h="svelte-3dl2f7"><strong>Kind</strong>: static method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Promise.&lt;PreTrainedTokenizer&gt;</code> - A new instance of the <code>PreTrainedTokenizer</code> class.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1ehlqi8"><li><code>Error</code> Throws an error if the tokenizer.json or tokenizer_config.json files are not found in the `pretrained_model_name_or_path`.</li></ul> <table data-svelte-h="svelte-i35q8n"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>pretrained_model_name_or_path</td><td><code>string</code></td><td><p>The path to the pre-trained tokenizer.</p></td> </tr><tr><td>options</td><td><code>PretrainedTokenizerOptions</code></td><td><p>Additional options for loading the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.BertTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersberttokenizer--code-pretrainedtokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersberttokenizer--code-pretrainedtokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.BertTokenizer ⇐ &lt;code> PreTrainedTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-y7f9tl">BertTokenizer is a class used to tokenize text for BERT models.</p> <p data-svelte-h="svelte-1g9fk6q"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTrainedTokenizer</code></p> <hr> <a id="module_tokenizers.AlbertTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersalberttokenizer--code-pretrainedtokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersalberttokenizer--code-pretrainedtokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.AlbertTokenizer ⇐ &lt;code> PreTrainedTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-1i5oqqh">Albert tokenizer</p> <p data-svelte-h="svelte-1g9fk6q"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTrainedTokenizer</code></p> <hr> <a id="module_tokenizers.NllbTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersnllbtokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnllbtokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.NllbTokenizer</span></h2> <p data-svelte-h="svelte-169llpf">The NllbTokenizer class is used to tokenize text for NLLB (“No Language Left Behind”) models.</p> <p data-svelte-h="svelte-5qkw4u">No Language Left Behind (NLLB) is a first-of-its-kind, AI breakthrough project
that open-sources models capable of delivering high-quality translations directly
between any pair of 200+ languages — including low-resource languages like Asturian,
Luganda, Urdu and more. It aims to help people communicate with anyone, anywhere,
regardless of their language preferences. For more information, check out their
<a href="https://arxiv.org/abs/2207.04672" rel="nofollow">paper</a>.</p> <p data-svelte-h="svelte-1bnq2ss">For a list of supported languages (along with their language codes),</p> <p data-svelte-h="svelte-ixckw7"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>See</strong>: <a href="https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200" rel="nofollow">https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200</a></p> <hr> <a id="module_tokenizers.NllbTokenizer+_build_translation_inputs" class="group"></a> <h3 class="relative group"><a id="nllbtokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nllbtokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>nllbTokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ &lt;code> Object &lt;/code></span></h3> <p data-svelte-h="svelte-17djevv">Helper function to build translation inputs for an <code>NllbTokenizer</code>.</p> <p data-svelte-h="svelte-151azqd"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.NllbTokenizer"><code>NllbTokenizer</code></a><br> <strong>Returns</strong>: <code>Object</code> - Object to be passed to the model.</p> <table data-svelte-h="svelte-nmwt0y"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>raw_inputs</td><td><code>string</code> | <code>Array&lt;string&gt;</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>tokenizer_options</td><td><code>Object</code></td><td><p>Options to be sent to the tokenizer</p></td> </tr><tr><td>generate_kwargs</td><td><code>Object</code></td><td><p>Generation options.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.M2M100Tokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersm2m100tokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersm2m100tokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.M2M100Tokenizer</span></h2> <p data-svelte-h="svelte-ggx2qy">The M2M100Tokenizer class is used to tokenize text for M2M100 (“Many-to-Many”) models.</p> <p data-svelte-h="svelte-matuab">M2M100 is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many
multilingual translation. It was introduced in this <a href="https://arxiv.org/abs/2010.11125" rel="nofollow">paper</a>
and first released in <a href="https://github.com/pytorch/fairseq/tree/master/examples/m2m_100" rel="nofollow">this</a> repository.</p> <p data-svelte-h="svelte-1bnq2ss">For a list of supported languages (along with their language codes),</p> <p data-svelte-h="svelte-17rdxyz"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>See</strong>: <a href="https://huggingface.co/facebook/m2m100_418M#languages-covered" rel="nofollow">https://huggingface.co/facebook/m2m100_418M#languages-covered</a></p> <hr> <a id="module_tokenizers.M2M100Tokenizer+_build_translation_inputs" class="group"></a> <h3 class="relative group"><a id="m2m100tokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#m2m100tokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>m2M100Tokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ &lt;code> Object &lt;/code></span></h3> <p data-svelte-h="svelte-rixacq">Helper function to build translation inputs for an <code>M2M100Tokenizer</code>.</p> <p data-svelte-h="svelte-nwf7ij"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.M2M100Tokenizer"><code>M2M100Tokenizer</code></a><br> <strong>Returns</strong>: <code>Object</code> - Object to be passed to the model.</p> <table data-svelte-h="svelte-nmwt0y"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>raw_inputs</td><td><code>string</code> | <code>Array&lt;string&gt;</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>tokenizer_options</td><td><code>Object</code></td><td><p>Options to be sent to the tokenizer</p></td> </tr><tr><td>generate_kwargs</td><td><code>Object</code></td><td><p>Generation options.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.WhisperTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerswhispertokenizer--code-pretrainedtokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswhispertokenizer--code-pretrainedtokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.WhisperTokenizer ⇐ &lt;code> PreTrainedTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-yc5dgi">WhisperTokenizer tokenizer</p> <p data-svelte-h="svelte-1g9fk6q"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTrainedTokenizer</code></p> <ul data-svelte-h="svelte-6hj9gd"><li><a href="#module_tokenizers.WhisperTokenizer">.WhisperTokenizer</a><code>PreTrainedTokenizer</code><ul><li><a href="#module_tokenizers.WhisperTokenizer+_decode_asr"><code>._decode_asr(sequences, options)</code></a><code>*</code></li> <li><a href="#module_tokenizers.WhisperTokenizer+decode"><code>.decode()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="module_tokenizers.WhisperTokenizer+_decode_asr" class="group"></a> <h3 class="relative group"><a id="whispertokenizerdecodeasrsequences-options--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whispertokenizerdecodeasrsequences-options--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>whisperTokenizer._decode_asr(sequences, options) ⇒ &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-1ldfc4">Decodes automatic speech recognition (ASR) sequences.</p> <p data-svelte-h="svelte-1dbusbf"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.WhisperTokenizer"><code>WhisperTokenizer</code></a><br> <strong>Returns</strong>: <code>*</code> - The decoded sequences.</p> <table data-svelte-h="svelte-16is2zf"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>sequences</td><td><code>*</code></td><td><p>The sequences to decode.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td><p>The options to use for decoding.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.WhisperTokenizer+decode" class="group"></a> <h3 class="relative group"><a id="whispertokenizerdecode--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whispertokenizerdecode--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>whisperTokenizer.decode() : &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-1d8g5ae"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.WhisperTokenizer"><code>WhisperTokenizer</code></a></p> <hr> <a id="module_tokenizers.MarianTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersmariantokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmariantokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.MarianTokenizer</span></h2> <p data-svelte-h="svelte-18k70d9"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Todo</strong></p> <ul data-svelte-h="svelte-1y8rly"><li>This model is not yet supported by Hugging Face’s “fast” tokenizers library (<a href="https://github.com/huggingface/tokenizers" rel="nofollow">https://github.com/huggingface/tokenizers</a>).
Therefore, this implementation (which is based on fast tokenizers) may produce slightly inaccurate results.</li></ul> <ul data-svelte-h="svelte-1ynv7b0"><li><a href="#module_tokenizers.MarianTokenizer">.MarianTokenizer</a><ul><li><a href="#new_module_tokenizers.MarianTokenizer_new"><code>new MarianTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><a href="#module_tokenizers.MarianTokenizer+_encode_text"><code>._encode_text(text)</code></a><code>Array</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers.MarianTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-mariantokenizertokenizerjson-tokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-mariantokenizertokenizerjson-tokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new MarianTokenizer(tokenizerJSON, tokenizerConfig)</span></h3> <p data-svelte-h="svelte-12gr81">Create a new MarianTokenizer instance.</p> <table data-svelte-h="svelte-19pzyzr"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokenizerJSON</td><td><code>Object</code></td><td><p>The JSON of the tokenizer.</p></td> </tr><tr><td>tokenizerConfig</td><td><code>Object</code></td><td><p>The config of the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.MarianTokenizer+_encode_text" class="group"></a> <h3 class="relative group"><a id="mariantokenizerencodetexttext--code-array-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mariantokenizerencodetexttext--code-array-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>marianTokenizer._encode_text(text) ⇒ &lt;code> Array &lt;/code></span></h3> <p data-svelte-h="svelte-12lkjoz">Encodes a single text. Overriding this method is necessary since the language codes
must be removed before encoding with sentencepiece model.</p> <p data-svelte-h="svelte-zlwn3v"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.MarianTokenizer"><code>MarianTokenizer</code></a><br> <strong>Returns</strong>: <code>Array</code> - The encoded tokens.<br> <strong>See</strong>: <a href="https://github.com/huggingface/transformers/blob/12d51db243a00726a548a43cc333390ebae731e3/src/transformers/models/marian/tokenization_marian.py#L204-L213" rel="nofollow">https://github.com/huggingface/transformers/blob/12d51db243a00726a548a43cc333390ebae731e3/src/transformers/models/marian/tokenization_marian.py#L204-L213</a></p> <table data-svelte-h="svelte-x8hb9q"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>null</code></td><td><p>The text to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.AutoTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersautotokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersautotokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.AutoTokenizer</span></h2> <p data-svelte-h="svelte-1hb1bdc">Helper class which is used to instantiate pretrained tokenizers with the <code>from_pretrained</code> function.
The chosen tokenizer class is determined by the type specified in the tokenizer config.</p> <p data-svelte-h="svelte-wbz8zs"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-m4wzai"><li><a href="#module_tokenizers.AutoTokenizer">.AutoTokenizer</a><ul><li><a href="#new_module_tokenizers.AutoTokenizer_new"><code>new AutoTokenizer()</code></a></li> <li><a href="#module_tokenizers.AutoTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a><code>Promise.&lt;PreTrainedTokenizer&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers.AutoTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-autotokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-autotokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new AutoTokenizer()</span></h3> <p data-svelte-h="svelte-1aswmub"><strong>Example</strong></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">&#x27;Xenova/bert-base-uncased&#x27;</span>);<!-- HTML_TAG_END --></pre></div> <hr> <a id="module_tokenizers.AutoTokenizer.from_pretrained" class="group"></a> <h3 class="relative group"><a id="autotokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#autotokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ &lt;code> Promise. &lt; PreTrainedTokenizer > &lt;/code></span></h3> <p data-svelte-h="svelte-s7y3dm">Instantiate one of the tokenizer classes of the library from a pretrained model.</p> <p data-svelte-h="svelte-weoe0h">The tokenizer class to instantiate is selected based on the <code>tokenizer_class</code> property of the config object
(either passed as an argument or loaded from <code>pretrained_model_name_or_path</code> if possible)</p> <p data-svelte-h="svelte-13ehx3q"><strong>Kind</strong>: static method of <a href="#module_tokenizers.AutoTokenizer"><code>AutoTokenizer</code></a><br> <strong>Returns</strong>: <code>Promise.&lt;PreTrainedTokenizer&gt;</code> - A new instance of the PreTrainedTokenizer class.</p> <table data-svelte-h="svelte-1eafp23"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>pretrained_model_name_or_path</td><td><code>string</code></td><td><p>The name or path of the pretrained model. Can be either:</p> <ul><li>A string, the <em>model id</em> of a pretrained tokenizer hosted inside a model repo on huggingface.co.
Valid model ids can be located at the root-level, like <code>bert-base-uncased</code>, or namespaced under a
user or organization name, like <code>dbmdz/bert-base-german-cased</code>.</li> <li>A path to a <em>directory</em> containing tokenizer files, e.g., <code>./my_model_directory/</code>.</li></ul></td> </tr><tr><td>options</td><td><code>PretrainedTokenizerOptions</code></td><td><p>Additional options for loading the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.is_chinese_char" class="group"></a> <h2 class="relative group"><a id="tokenizersischinesecharcp--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersischinesecharcp--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.is_chinese_char(cp) ⇒ &lt;code> boolean &lt;/code></span></h2> <p data-svelte-h="svelte-arkn8x">Checks whether the given Unicode codepoint represents a CJK (Chinese, Japanese, or Korean) character.</p> <p data-svelte-h="svelte-1crm30w">A “chinese character” is defined as anything in the CJK Unicode block:
<a href="https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)" rel="nofollow">https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)</a></p> <p data-svelte-h="svelte-1vcx9k0">Note that the CJK Unicode block is NOT all Japanese and Korean characters, despite its name.
The modern Korean Hangul alphabet is a different block, as is Japanese Hiragana and Katakana.
Those alphabets are used to write space-separated words, so they are not treated specially
and are handled like all other languages.</p> <p data-svelte-h="svelte-10qiwse"><strong>Kind</strong>: static method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>boolean</code> - True if the codepoint represents a CJK character, false otherwise.</p> <table data-svelte-h="svelte-oxo98v"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>cp</td><td><code>number</code> | <code>bigint</code></td><td><p>The Unicode codepoint to check.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..AddedToken" class="group"></a> <h2 class="relative group"><a id="tokenizersaddedtoken" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersaddedtoken"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~AddedToken</span></h2> <p data-svelte-h="svelte-bxhodd">Represent a token added by the user on top of the existing Model vocabulary.
AddedToken can be configured to specify the behavior they should have in various situations like:</p> <ul data-svelte-h="svelte-uthgcg"><li>Whether they should only match single words</li> <li>Whether to include any whitespace on its left or right</li></ul> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="new_module_tokenizers..AddedToken_new" class="group"></a> <h3 class="relative group"><a id="new-addedtokenconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-addedtokenconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new AddedToken(config)</span></h3> <p data-svelte-h="svelte-1jn9qcj">Creates a new instance of AddedToken.</p> <table data-svelte-h="svelte-lcj2ea"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td></td><td><p>Added token configuration object.</p></td> </tr><tr><td>config.content</td><td><code>string</code></td><td></td><td><p>The content of the added token.</p></td> </tr><tr><td>config.id</td><td><code>number</code></td><td></td><td><p>The id of the added token.</p></td> </tr><tr><td>[config.single_word]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token must be a single word or can break words.</p></td> </tr><tr><td>[config.lstrip]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token should strip whitespaces on its left.</p></td> </tr><tr><td>[config.rstrip]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token should strip whitespaces on its right.</p></td> </tr><tr><td>[config.normalized]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token should be normalized.</p></td> </tr><tr><td>[config.special]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token is special.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WordPieceTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerswordpiecetokenizer--code-tokenizermodel-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswordpiecetokenizer--code-tokenizermodel-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~WordPieceTokenizer ⇐ &lt;code> TokenizerModel &lt;/code></span></h2> <p data-svelte-h="svelte-os971x">A subclass of TokenizerModel that uses WordPiece encoding to encode tokens.</p> <p data-svelte-h="svelte-66xmtz"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>TokenizerModel</code></p> <ul data-svelte-h="svelte-j7wu8g"><li><a href="#module_tokenizers..WordPieceTokenizer">~WordPieceTokenizer</a><code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..WordPieceTokenizer_new"><code>new WordPieceTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..WordPieceTokenizer+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.&lt;string, number&gt;</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+unk_token_id"><code>.unk_token_id</code></a> : <code>number</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+unk_token"><code>.unk_token</code></a> : <code>string</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+max_input_chars_per_word"><code>.max_input_chars_per_word</code></a> : <code>number</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+vocab"><code>.vocab</code></a> : <code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+encode"><code>.encode(tokens)</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..WordPieceTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-wordpiecetokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-wordpiecetokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new WordPieceTokenizer(config)</span></h3> <table data-svelte-h="svelte-mywrud"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td></td><td><p>The configuration object.</p></td> </tr><tr><td>config.vocab</td><td><code>Object</code></td><td></td><td><p>A mapping of tokens to ids.</p></td> </tr><tr><td>config.unk_token</td><td><code>string</code></td><td></td><td><p>The unknown token string.</p></td> </tr><tr><td>config.continuing_subword_prefix</td><td><code>string</code></td><td></td><td><p>The prefix to use for continuing subwords.</p></td> </tr><tr><td>[config.max_input_chars_per_word]</td><td><code>number</code></td><td><code>100</code></td><td><p>The maximum number of characters per word.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WordPieceTokenizer+tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizertokenstoids--code-map--string-number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizertokenstoids--code-map--string-number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.tokens_to_ids : &lt;code> Map. &lt; string, number > &lt;/code></span></h3> <p data-svelte-h="svelte-186upcr">A mapping of tokens to ids.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+unk_token_id" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizerunktokenid--code-number-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizerunktokenid--code-number-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.unk_token_id : &lt;code> number &lt;/code></span></h3> <p data-svelte-h="svelte-16n6zoj">The id of the unknown token.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+unk_token" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizerunktoken--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizerunktoken--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.unk_token : &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1wc0kv1">The unknown token string.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+max_input_chars_per_word" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizermaxinputcharsperword--code-number-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizermaxinputcharsperword--code-number-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.max_input_chars_per_word : &lt;code> number &lt;/code></span></h3> <p data-svelte-h="svelte-1tq6mkg">The maximum number of characters allowed per word.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+vocab" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizervocab--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizervocab--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.vocab : &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-ps7hhj">An array of tokens.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+encode" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizerencodetokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizerencodetokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.encode(tokens) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-hvbi1z">Encodes an array of tokens using WordPiece encoding.</p> <p data-svelte-h="svelte-1exk1td"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of encoded tokens.</p> <table data-svelte-h="svelte-aw190w"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Unigram" class="group"></a> <h2 class="relative group"><a id="tokenizersunigram--code-tokenizermodel-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersunigram--code-tokenizermodel-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Unigram ⇐ &lt;code> TokenizerModel &lt;/code></span></h2> <p data-svelte-h="svelte-168cdr8">Class representing a Unigram tokenizer model.</p> <p data-svelte-h="svelte-66xmtz"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>TokenizerModel</code></p> <ul data-svelte-h="svelte-cjspr2"><li><a href="#module_tokenizers..Unigram">~Unigram</a><code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..Unigram_new"><code>new Unigram(config, moreConfig)</code></a></li> <li><a href="#module_tokenizers..Unigram+populateNodes"><code>.populateNodes(lattice)</code></a></li> <li><a href="#module_tokenizers..Unigram+tokenize"><code>.tokenize(normalized)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..Unigram+encode"><code>.encode(tokens)</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..Unigram_new" class="group"></a> <h3 class="relative group"><a id="new-unigramconfig-moreconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-unigramconfig-moreconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new Unigram(config, moreConfig)</span></h3> <p data-svelte-h="svelte-k7r2g2">Create a new Unigram tokenizer model.</p> <table data-svelte-h="svelte-1t06ejl"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the Unigram model.</p></td> </tr><tr><td>config.unk_id</td><td><code>number</code></td><td><p>The ID of the unknown token</p></td> </tr><tr><td>config.vocab</td><td><code>Array.&lt;Array&lt;any&gt;&gt;</code></td><td><p>A 2D array representing a mapping of tokens to scores.</p></td> </tr><tr><td>moreConfig</td><td><code>Object</code></td><td><p>Additional configuration object for the Unigram model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Unigram+populateNodes" class="group"></a> <h3 class="relative group"><a id="unigrampopulatenodeslattice" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unigrampopulatenodeslattice"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>unigram.populateNodes(lattice)</span></h3> <p data-svelte-h="svelte-13ypkqy">Populates lattice nodes.</p> <p data-svelte-h="svelte-1wapgb2"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Unigram"><code>Unigram</code></a></p> <table data-svelte-h="svelte-1djdg5g"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>lattice</td><td><code>TokenLattice</code></td><td><p>The token lattice to populate with nodes.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Unigram+tokenize" class="group"></a> <h3 class="relative group"><a id="unigramtokenizenormalized--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unigramtokenizenormalized--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>unigram.tokenize(normalized) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-s5rlnu">Encodes an array of tokens into an array of subtokens using the unigram model.</p> <p data-svelte-h="svelte-1txbxlz"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Unigram"><code>Unigram</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of subtokens obtained by encoding the input tokens using the unigram model.</p> <table data-svelte-h="svelte-14yooq"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>normalized</td><td><code>string</code></td><td><p>The normalized string.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Unigram+encode" class="group"></a> <h3 class="relative group"><a id="unigramencodetokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unigramencodetokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>unigram.encode(tokens) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-1igffgy">Encodes an array of tokens using Unigram encoding.</p> <p data-svelte-h="svelte-onyfvx"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Unigram"><code>Unigram</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of encoded tokens.</p> <table data-svelte-h="svelte-aw190w"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BPE" class="group"></a> <h2 class="relative group"><a id="tokenizersbpe--code-tokenizermodel-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbpe--code-tokenizermodel-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BPE ⇐ &lt;code> TokenizerModel &lt;/code></span></h2> <p data-svelte-h="svelte-1ki6zy5">BPE class for encoding text into Byte-Pair-Encoding (BPE) tokens.</p> <p data-svelte-h="svelte-66xmtz"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>TokenizerModel</code></p> <ul data-svelte-h="svelte-tgr5re"><li><a href="#module_tokenizers..BPE">~BPE</a><code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..BPE_new"><code>new BPE(config)</code></a></li> <li><a href="#module_tokenizers..BPE+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.&lt;string, number&gt;</code></li> <li><a href="#module_tokenizers..BPE+merges"><code>.merges</code></a> : <code>*</code><ul><li><a href="#module_tokenizers..BPE+merges.config.merges"><code>.config.merges</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..BPE+cache"><code>.cache</code></a> : <code>Map.&lt;string, Array&lt;string&gt;&gt;</code></li> <li><a href="#module_tokenizers..BPE+bpe"><code>.bpe(token)</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..BPE+encode"><code>.encode(tokens)</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..BPE_new" class="group"></a> <h3 class="relative group"><a id="new-bpeconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-bpeconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new BPE(config)</span></h3> <p data-svelte-h="svelte-1hluawr">Create a BPE instance.</p> <table data-svelte-h="svelte-1pjqs45"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td></td><td><p>The configuration object for BPE.</p></td> </tr><tr><td>config.vocab</td><td><code>Object</code></td><td></td><td><p>A mapping of tokens to ids.</p></td> </tr><tr><td>config.merges</td><td><code>*</code></td><td></td><td><p>An array of BPE merges as strings.</p></td> </tr><tr><td>config.unk_token</td><td><code>string</code></td><td></td><td><p>The unknown token used for out of vocabulary words.</p></td> </tr><tr><td>config.end_of_word_suffix</td><td><code>string</code></td><td></td><td><p>The suffix to place at the end of each word.</p></td> </tr><tr><td>[config.continuing_subword_suffix]</td><td><code>string</code></td><td></td><td><p>The suffix to insert between words.</p></td> </tr><tr><td>[config.byte_fallback]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to use spm byte-fallback trick (defaults to False)</p></td> </tr><tr><td>[config.ignore_merges]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not to match tokens with the vocab before using merges.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BPE+tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="bpetokenstoids--code-map--string-number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpetokenstoids--code-map--string-number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.tokens_to_ids : &lt;code> Map. &lt; string, number > &lt;/code></span></h3> <p data-svelte-h="svelte-vbft5q"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..BPE"><code>BPE</code></a></p> <hr> <a id="module_tokenizers..BPE+merges" class="group"></a> <h3 class="relative group"><a id="bpemerges--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpemerges--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.merges : &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-vbft5q"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..BPE"><code>BPE</code></a></p> <hr> <a id="module_tokenizers..BPE+merges.config.merges" class="group"></a> <h4 class="relative group"><a id="mergesconfigmerges--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mergesconfigmerges--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>merges.config.merges : &lt;code> * &lt;/code></span></h4> <p data-svelte-h="svelte-1nha7op"><strong>Kind</strong>: static property of <a href="#module_tokenizers..BPE+merges"><code>merges</code></a></p> <hr> <a id="module_tokenizers..BPE+cache" class="group"></a> <h3 class="relative group"><a id="bpecache--code-map--string-array--string---code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpecache--code-map--string-array--string---code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.cache : &lt;code> Map. &lt; string, Array &lt; string > > &lt;/code></span></h3> <p data-svelte-h="svelte-vbft5q"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..BPE"><code>BPE</code></a></p> <hr> <a id="module_tokenizers..BPE+bpe" class="group"></a> <h3 class="relative group"><a id="bpebpetoken--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpebpetoken--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.bpe(token) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-a8vvsp">Apply Byte-Pair-Encoding (BPE) to a given token. Efficient heap-based priority
queue implementation adapted from <a href="https://github.com/belladoreai/llama-tokenizer-js" rel="nofollow">https://github.com/belladoreai/llama-tokenizer-js</a>.</p> <p data-svelte-h="svelte-vghqp2"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BPE"><code>BPE</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The BPE encoded tokens.</p> <table data-svelte-h="svelte-ef47i8"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>token</td><td><code>string</code></td><td><p>The token to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BPE+encode" class="group"></a> <h3 class="relative group"><a id="bpeencodetokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpeencodetokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.encode(tokens) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-1gsp3x1">Encodes the input sequence of tokens using the BPE algorithm and returns the resulting subword tokens.</p> <p data-svelte-h="svelte-1xk2nb1"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BPE"><code>BPE</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The resulting subword tokens after applying the BPE algorithm to the input sequence of tokens.</p> <table data-svelte-h="svelte-170f8q"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The input sequence of tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..LegacyTokenizerModel" class="group"></a> <h2 class="relative group"><a id="tokenizerslegacytokenizermodel" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerslegacytokenizermodel"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~LegacyTokenizerModel</span></h2> <p data-svelte-h="svelte-aoayky">Legacy tokenizer class for tokenizers with only a vocabulary.</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-1wfo8qw"><li><a href="#module_tokenizers..LegacyTokenizerModel">~LegacyTokenizerModel</a><ul><li><a href="#new_module_tokenizers..LegacyTokenizerModel_new"><code>new LegacyTokenizerModel(config, moreConfig)</code></a></li> <li><a href="#module_tokenizers..LegacyTokenizerModel+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.&lt;string, number&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..LegacyTokenizerModel_new" class="group"></a> <h3 class="relative group"><a id="new-legacytokenizermodelconfig-moreconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-legacytokenizermodelconfig-moreconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new LegacyTokenizerModel(config, moreConfig)</span></h3> <p data-svelte-h="svelte-190m3yr">Create a LegacyTokenizerModel instance.</p> <table data-svelte-h="svelte-6s4tsa"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for LegacyTokenizerModel.</p></td> </tr><tr><td>config.vocab</td><td><code>Object</code></td><td><p>A (possibly nested) mapping of tokens to ids.</p></td> </tr><tr><td>moreConfig</td><td><code>Object</code></td><td><p>Additional configuration object for the LegacyTokenizerModel model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..LegacyTokenizerModel+tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="legacytokenizermodeltokenstoids--code-map--string-number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#legacytokenizermodeltokenstoids--code-map--string-number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>legacyTokenizerModel.tokens_to_ids : &lt;code> Map. &lt; string, number > &lt;/code></span></h3> <p data-svelte-h="svelte-w0hzr2"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..LegacyTokenizerModel"><code>LegacyTokenizerModel</code></a></p> <hr> <a id="module_tokenizers..Normalizer" class="group"></a> <h2 class="relative group"><a id="tokenizersnormalizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnormalizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Normalizer</span></h2> <p data-svelte-h="svelte-10jww09">A base class for text normalization.</p> <p data-svelte-h="svelte-jwwxx4"><strong>Kind</strong>: inner abstract class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-130c6f2"><li><em><a href="#module_tokenizers..Normalizer">~Normalizer</a></em><ul><li><em><a href="#new_module_tokenizers..Normalizer_new"><code>new Normalizer(config)</code></a></em></li> <li><em>instance</em><ul><li><strong><a href="#module_tokenizers..Normalizer+normalize"><code>.normalize(text)</code></a><code>string</code></strong></li> <li><em><a href="#module_tokenizers..Normalizer+_call"><code>._call(text)</code></a><code>string</code></em></li></ul></li> <li><em>static</em><ul><li><em><a href="#module_tokenizers..Normalizer.fromConfig"><code>.fromConfig(config)</code></a><code>Normalizer</code></em></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers..Normalizer_new" class="group"></a> <h3 class="relative group"><a id="new-normalizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-normalizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new Normalizer(config)</span></h3> <table data-svelte-h="svelte-tpsyfn"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the normalizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Normalizer+normalize" class="group"></a> <h3 class="relative group"><a id="normalizernormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#normalizernormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>normalizer.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-ax8bnv">Normalize the input text.</p> <p data-svelte-h="svelte-lf79s"><strong>Kind</strong>: instance abstract method of <a href="#module_tokenizers..Normalizer"><code>Normalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1ceb94n"><li><code>Error</code> If this method is not implemented in a subclass.</li></ul> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Normalizer+_call" class="group"></a> <h3 class="relative group"><a id="normalizercalltext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#normalizercalltext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>normalizer._call(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-13yzawo">Alias for <a href="Normalizer#normalize">Normalizer#normalize</a>.</p> <p data-svelte-h="svelte-hak1rq"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Normalizer"><code>Normalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Normalizer.fromConfig" class="group"></a> <h3 class="relative group"><a id="normalizerfromconfigconfig--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#normalizerfromconfigconfig--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Normalizer.fromConfig(config) ⇒ &lt;code> Normalizer &lt;/code></span></h3> <p data-svelte-h="svelte-1drayrb">Factory method for creating normalizers from config objects.</p> <p data-svelte-h="svelte-1v9m58b"><strong>Kind</strong>: static method of <a href="#module_tokenizers..Normalizer"><code>Normalizer</code></a><br> <strong>Returns</strong>: <code>Normalizer</code> - A Normalizer object.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-h8agyt"><li><code>Error</code> If an unknown Normalizer type is specified in the config.</li></ul> <table data-svelte-h="svelte-tpsyfn"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the normalizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Replace" class="group"></a> <h2 class="relative group"><a id="tokenizersreplace--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersreplace--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Replace ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-mx5gat">Replace normalizer that replaces occurrences of a pattern with a given string or regular expression.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..Replace+normalize" class="group"></a> <h3 class="relative group"><a id="replacenormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#replacenormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>replace.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-z30qdq">Normalize the input text by replacing the pattern with the content.</p> <p data-svelte-h="svelte-6bh9xi"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Replace"><code>Replace</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text after replacing the pattern with the content.</p> <table data-svelte-h="svelte-ci8if4"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text to be normalized.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NFC" class="group"></a> <h2 class="relative group"><a id="tokenizersnfc--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnfc--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~NFC ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-v0gnhd">A normalizer that applies Unicode normalization form C (NFC) to the input text.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..NFC+normalize" class="group"></a> <h3 class="relative group"><a id="nfcnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nfcnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>nfC.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1wx1017">Normalize the input text by applying Unicode normalization form C (NFC).</p> <p data-svelte-h="svelte-1ysyaya"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..NFC"><code>NFC</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-ci8if4"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text to be normalized.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NFKC" class="group"></a> <h2 class="relative group"><a id="tokenizersnfkc--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnfkc--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~NFKC ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-rjq8j">NFKC Normalizer.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..NFKC+normalize" class="group"></a> <h3 class="relative group"><a id="nfkcnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nfkcnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>nfkC.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1e9f3f3">Normalize text using NFKC normalization.</p> <p data-svelte-h="svelte-1xa1tv2"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..NFKC"><code>NFKC</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1n56lec"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be normalized.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NFKD" class="group"></a> <h2 class="relative group"><a id="tokenizersnfkd--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnfkd--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~NFKD ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-1k1jv4k">NFKD Normalizer.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..NFKD+normalize" class="group"></a> <h3 class="relative group"><a id="nfkdnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nfkdnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>nfkD.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-ezhhkq">Normalize text using NFKD normalization.</p> <p data-svelte-h="svelte-11z17sm"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..NFKD"><code>NFKD</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1n56lec"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be normalized.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..StripNormalizer" class="group"></a> <h2 class="relative group"><a id="tokenizersstripnormalizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersstripnormalizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~StripNormalizer</span></h2> <p data-svelte-h="svelte-4eye56">A normalizer that strips leading and/or trailing whitespace from the input text.</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..StripNormalizer+normalize" class="group"></a> <h3 class="relative group"><a id="stripnormalizernormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stripnormalizernormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>stripNormalizer.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-vhri9a">Strip leading and/or trailing whitespace from the input text.</p> <p data-svelte-h="svelte-zw21ea"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..StripNormalizer"><code>StripNormalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-u57eej"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..StripAccents" class="group"></a> <h2 class="relative group"><a id="tokenizersstripaccents--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersstripaccents--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~StripAccents ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-1laj15h">StripAccents normalizer removes all accents from the text.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..StripAccents+normalize" class="group"></a> <h3 class="relative group"><a id="stripaccentsnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stripaccentsnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>stripAccents.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1j27yzo">Remove all accents from the text.</p> <p data-svelte-h="svelte-13bo68j"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..StripAccents"><code>StripAccents</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text without accents.</p> <table data-svelte-h="svelte-u57eej"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Lowercase" class="group"></a> <h2 class="relative group"><a id="tokenizerslowercase--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerslowercase--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Lowercase ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-1njfgof">A Normalizer that lowercases the input string.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..Lowercase+normalize" class="group"></a> <h3 class="relative group"><a id="lowercasenormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lowercasenormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>lowercase.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1d19cn0">Lowercases the input string.</p> <p data-svelte-h="svelte-1h5axm"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Lowercase"><code>Lowercase</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Prepend" class="group"></a> <h2 class="relative group"><a id="tokenizersprepend--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersprepend--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Prepend ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-sliawd">A Normalizer that prepends a string to the input string.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..Prepend+normalize" class="group"></a> <h3 class="relative group"><a id="prependnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prependnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>prepend.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1elztq5">Prepends the input string.</p> <p data-svelte-h="svelte-131i334"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Prepend"><code>Prepend</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NormalizerSequence" class="group"></a> <h2 class="relative group"><a id="tokenizersnormalizersequence--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnormalizersequence--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~NormalizerSequence ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-1752gus">A Normalizer that applies a sequence of Normalizers.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <ul data-svelte-h="svelte-17ix58l"><li><a href="#module_tokenizers..NormalizerSequence">~NormalizerSequence</a><code>Normalizer</code><ul><li><a href="#new_module_tokenizers..NormalizerSequence_new"><code>new NormalizerSequence(config)</code></a></li> <li><a href="#module_tokenizers..NormalizerSequence+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..NormalizerSequence_new" class="group"></a> <h3 class="relative group"><a id="new-normalizersequenceconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-normalizersequenceconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new NormalizerSequence(config)</span></h3> <p data-svelte-h="svelte-11kq2wb">Create a new instance of NormalizerSequence.</p> <table data-svelte-h="svelte-5ym4bl"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td> </tr><tr><td>config.normalizers</td><td><code>Array.&lt;Object&gt;</code></td><td><p>An array of Normalizer configuration objects.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NormalizerSequence+normalize" class="group"></a> <h3 class="relative group"><a id="normalizersequencenormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#normalizersequencenormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>normalizerSequence.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1edabpq">Apply a sequence of Normalizers to the input text.</p> <p data-svelte-h="svelte-pymlm6"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..NormalizerSequence"><code>NormalizerSequence</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertNormalizer" class="group"></a> <h2 class="relative group"><a id="tokenizersbertnormalizer--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbertnormalizer--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BertNormalizer ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-1l2tjxd">A class representing a normalizer used in BERT tokenization.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <ul data-svelte-h="svelte-ld6a7d"><li><a href="#module_tokenizers..BertNormalizer">~BertNormalizer</a><code>Normalizer</code><ul><li><a href="#module_tokenizers..BertNormalizer+_tokenize_chinese_chars"><code>._tokenize_chinese_chars(text)</code></a><code>string</code></li> <li><a href="#module_tokenizers..BertNormalizer+stripAccents"><code>.stripAccents(text)</code></a><code>string</code></li> <li><a href="#module_tokenizers..BertNormalizer+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li></ul> <hr> <a id="module_tokenizers..BertNormalizer+_tokenize_chinese_chars" class="group"></a> <h3 class="relative group"><a id="bertnormalizertokenizechinesecharstext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertnormalizertokenizechinesecharstext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertNormalizer._tokenize_chinese_chars(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1bijj0e">Adds whitespace around any CJK (Chinese, Japanese, or Korean) character in the input text.</p> <p data-svelte-h="svelte-185sdhq"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertNormalizer"><code>BertNormalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The tokenized text with whitespace added around CJK characters.</p> <table data-svelte-h="svelte-cxfvn5"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text to tokenize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertNormalizer+stripAccents" class="group"></a> <h3 class="relative group"><a id="bertnormalizerstripaccentstext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertnormalizerstripaccentstext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertNormalizer.stripAccents(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1bhx3l9">Strips accents from the given text.</p> <p data-svelte-h="svelte-1dzzyok"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertNormalizer"><code>BertNormalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The text with accents removed.</p> <table data-svelte-h="svelte-o2vd1j"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to strip accents from.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertNormalizer+normalize" class="group"></a> <h3 class="relative group"><a id="bertnormalizernormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertnormalizernormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertNormalizer.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-8ayr2g">Normalizes the given text based on the configuration.</p> <p data-svelte-h="svelte-1j7ytyy"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertNormalizer"><code>BertNormalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerspretokenizer--code-callable-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretokenizer--code-callable-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PreTokenizer ⇐ &lt;code> Callable &lt;/code></span></h2> <p data-svelte-h="svelte-1jqub8o">A callable class representing a pre-tokenizer used in tokenization. Subclasses
should implement the <code>pre_tokenize_text</code> method to define the specific pre-tokenization logic.</p> <p data-svelte-h="svelte-1aafbib"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <a href="#Callable"><code>Callable</code></a></p> <ul data-svelte-h="svelte-1bn4cl6"><li><a href="#module_tokenizers..PreTokenizer">~PreTokenizer</a><a href="#Callable"><code>Callable</code></a><ul><li><em>instance</em><ul><li><em><a href="#module_tokenizers..PreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></em></li> <li><a href="#module_tokenizers..PreTokenizer+pre_tokenize"><code>.pre_tokenize(text, [options])</code></a><code>Array.&lt;string&gt;</code></li> <li><a href="#module_tokenizers..PreTokenizer+_call"><code>._call(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..PreTokenizer.fromConfig"><code>.fromConfig(config)</code></a><code>PreTokenizer</code></li></ul></li></ul></li></ul> <hr> <a id="module_tokenizers..PreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="pretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-19addtz">Method that should be implemented by subclasses to define the specific pre-tokenization logic.</p> <p data-svelte-h="svelte-1spca8v"><strong>Kind</strong>: instance abstract method of <a href="#module_tokenizers..PreTokenizer"><code>PreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The pre-tokenized text.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1q3adi"><li><code>Error</code> If the method is not implemented in the subclass.</li></ul> <table data-svelte-h="svelte-zcvat0"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizer+pre_tokenize" class="group"></a> <h3 class="relative group"><a id="pretokenizerpretokenizetext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizerpretokenizetext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTokenizer.pre_tokenize(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-1oc7xq7">Tokenizes the given text into pre-tokens.</p> <p data-svelte-h="svelte-mvzmzf"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PreTokenizer"><code>PreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of pre-tokens.</p> <table data-svelte-h="svelte-1q2ym19"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>Array&lt;string&gt;</code></td><td><p>The text or array of texts to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizer+_call" class="group"></a> <h3 class="relative group"><a id="pretokenizercalltext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizercalltext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTokenizer._call(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-baebiw">Alias for <a href="PreTokenizer#pre_tokenize">PreTokenizer#pre_tokenize</a>.</p> <p data-svelte-h="svelte-m5jkl3"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PreTokenizer"><code>PreTokenizer</code></a><br> <strong>Overrides</strong>: <a href="#Callable+_call"><code>_call</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of pre-tokens.</p> <table data-svelte-h="svelte-1q2ym19"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>Array&lt;string&gt;</code></td><td><p>The text or array of texts to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizer.fromConfig" class="group"></a> <h3 class="relative group"><a id="pretokenizerfromconfigconfig--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizerfromconfigconfig--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTokenizer.fromConfig(config) ⇒ &lt;code> PreTokenizer &lt;/code></span></h3> <p data-svelte-h="svelte-redbex">Factory method that returns an instance of a subclass of <code>PreTokenizer</code> based on the provided configuration.</p> <p data-svelte-h="svelte-1tdb68h"><strong>Kind</strong>: static method of <a href="#module_tokenizers..PreTokenizer"><code>PreTokenizer</code></a><br> <strong>Returns</strong>: <code>PreTokenizer</code> - An instance of a subclass of <code>PreTokenizer</code>.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-rns9ge"><li><code>Error</code> If the provided configuration object does not correspond to any known pre-tokenizer.</li></ul> <table data-svelte-h="svelte-1ty8cz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>A configuration object for the pre-tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersbertpretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbertpretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BertPreTokenizer ⇐ &lt;code> PreTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-1a68u8"><li><a href="#module_tokenizers..BertPreTokenizer">~BertPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..BertPreTokenizer_new"><code>new BertPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..BertPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..BertPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-bertpretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-bertpretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new BertPreTokenizer(config)</span></h3> <p data-svelte-h="svelte-gtgeht">A PreTokenizer that splits text into wordpieces using a basic tokenization scheme
similar to that used in the original implementation of BERT.</p> <table data-svelte-h="svelte-tworoc"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="bertpretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertpretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertPreTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-tgmicg">Tokenizes a single text using the BERT pre-tokenization scheme.</p> <p data-svelte-h="svelte-16xyhnz"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertPreTokenizer"><code>BertPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersbytelevelpretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbytelevelpretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ByteLevelPreTokenizer ⇐ &lt;code> PreTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-34r8p">A pre-tokenizer that splits text into Byte-Pair-Encoding (BPE) subwords.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-1owpbdc"><li><a href="#module_tokenizers..ByteLevelPreTokenizer">~ByteLevelPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..ByteLevelPreTokenizer_new"><code>new ByteLevelPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+add_prefix_space"><code>.add_prefix_space</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+trim_offsets"><code>.trim_offsets</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+use_regex"><code>.use_regex</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..ByteLevelPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-bytelevelpretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-bytelevelpretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new ByteLevelPreTokenizer(config)</span></h3> <p data-svelte-h="svelte-7elsye">Creates a new instance of the <code>ByteLevelPreTokenizer</code> class.</p> <table data-svelte-h="svelte-tworoc"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer+add_prefix_space" class="group"></a> <h3 class="relative group"><a id="bytelevelpretokenizeraddprefixspace--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpretokenizeraddprefixspace--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPreTokenizer.add_prefix_space : &lt;code> boolean &lt;/code></span></h3> <p data-svelte-h="svelte-141kpye">Whether to add a leading space to the first word.This allows to treat the leading word just as any other word.</p> <p data-svelte-h="svelte-c8mfrk"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..ByteLevelPreTokenizer"><code>ByteLevelPreTokenizer</code></a></p> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer+trim_offsets" class="group"></a> <h3 class="relative group"><a id="bytelevelpretokenizertrimoffsets--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpretokenizertrimoffsets--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPreTokenizer.trim_offsets : &lt;code> boolean &lt;/code></span></h3> <p data-svelte-h="svelte-1pv6ugb">Whether the post processing step should trim offsetsto avoid including whitespaces.</p> <p data-svelte-h="svelte-1jwkwcb"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..ByteLevelPreTokenizer"><code>ByteLevelPreTokenizer</code></a><br> <strong>Todo</strong></p> <ul data-svelte-h="svelte-1tkofaw"><li>Use this in the pretokenization step.</li></ul> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer+use_regex" class="group"></a> <h3 class="relative group"><a id="bytelevelpretokenizeruseregex--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpretokenizeruseregex--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPreTokenizer.use_regex : &lt;code> boolean &lt;/code></span></h3> <p data-svelte-h="svelte-1o4txfk">Whether to use the standard GPT2 regex for whitespace splitting.Set it to False if you want to use your own splitting. Defaults to true.</p> <p data-svelte-h="svelte-c8mfrk"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..ByteLevelPreTokenizer"><code>ByteLevelPreTokenizer</code></a></p> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="bytelevelpretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPreTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-dyllm1">Tokenizes a single piece of text using byte-level tokenization.</p> <p data-svelte-h="svelte-9ja9mh"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ByteLevelPreTokenizer"><code>ByteLevelPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..SplitPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerssplitpretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerssplitpretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~SplitPreTokenizer ⇐ &lt;code> PreTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-6fldli">Splits text using a given pattern.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-19mec1z"><li><a href="#module_tokenizers..SplitPreTokenizer">~SplitPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..SplitPreTokenizer_new"><code>new SplitPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..SplitPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..SplitPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-splitpretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-splitpretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new SplitPreTokenizer(config)</span></h3> <table data-svelte-h="svelte-18pcmyh"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the pre-tokenizer.</p></td> </tr><tr><td>config.pattern</td><td><code>Object</code></td><td><p>The pattern used to split the text. Can be a string or a regex object.</p></td> </tr><tr><td>config.pattern.String</td><td><code>string</code> | <code>undefined</code></td><td><p>The string to use for splitting. Only defined if the pattern is a string.</p></td> </tr><tr><td>config.pattern.Regex</td><td><code>string</code> | <code>undefined</code></td><td><p>The regex to use for splitting. Only defined if the pattern is a regex.</p></td> </tr><tr><td>config.behavior</td><td><code>SplitDelimiterBehavior</code></td><td><p>The behavior to use when splitting.</p></td> </tr><tr><td>config.invert</td><td><code>boolean</code></td><td><p>Whether to split (invert=false) or match (invert=true) the pattern.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..SplitPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="splitpretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#splitpretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>splitPreTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-1e9v9pn">Tokenizes text by splitting it using the given pattern.</p> <p data-svelte-h="svelte-1nb2x3d"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..SplitPreTokenizer"><code>SplitPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PunctuationPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerspunctuationpretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspunctuationpretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PunctuationPreTokenizer ⇐ &lt;code> PreTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-o97k8y">Splits text based on punctuation.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-4kya8j"><li><a href="#module_tokenizers..PunctuationPreTokenizer">~PunctuationPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..PunctuationPreTokenizer_new"><code>new PunctuationPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..PunctuationPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..PunctuationPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-punctuationpretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-punctuationpretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PunctuationPreTokenizer(config)</span></h3> <table data-svelte-h="svelte-1t0eat8"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the pre-tokenizer.</p></td> </tr><tr><td>config.behavior</td><td><code>SplitDelimiterBehavior</code></td><td><p>The behavior to use when splitting.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PunctuationPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="punctuationpretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#punctuationpretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>punctuationPreTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-1e9v9pn">Tokenizes text by splitting it using the given pattern.</p> <p data-svelte-h="svelte-pgfgex"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PunctuationPreTokenizer"><code>PunctuationPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..DigitsPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersdigitspretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersdigitspretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~DigitsPreTokenizer ⇐ &lt;code> PreTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-1i900bk">Splits text based on digits.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-fuzbfp"><li><a href="#module_tokenizers..DigitsPreTokenizer">~DigitsPreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..DigitsPreTokenizer_new"><code>new DigitsPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..DigitsPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..DigitsPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-digitspretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-digitspretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new DigitsPreTokenizer(config)</span></h3> <table data-svelte-h="svelte-1rz32no"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the pre-tokenizer.</p></td> </tr><tr><td>config.individual_digits</td><td><code>boolean</code></td><td><p>Whether to split on individual digits.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..DigitsPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="digitspretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#digitspretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>digitsPreTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-1e9v9pn">Tokenizes text by splitting it using the given pattern.</p> <p data-svelte-h="svelte-1k2jvw7"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..DigitsPreTokenizer"><code>DigitsPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessor" class="group"></a> <h2 class="relative group"><a id="tokenizerspostprocessor--code-callable-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspostprocessor--code-callable-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PostProcessor ⇐ &lt;code> Callable &lt;/code></span></h2> <p data-svelte-h="svelte-1aafbib"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <a href="#Callable"><code>Callable</code></a></p> <ul data-svelte-h="svelte-er11m4"><li><a href="#module_tokenizers..PostProcessor">~PostProcessor</a><a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers..PostProcessor_new"><code>new PostProcessor(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers..PostProcessor+post_process"><code>.post_process(tokens, ...args)</code></a><code>PostProcessedOutput</code></li> <li><a href="#module_tokenizers..PostProcessor+_call"><code>._call(tokens, ...args)</code></a><code>PostProcessedOutput</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..PostProcessor.fromConfig"><code>.fromConfig(config)</code></a><code>PostProcessor</code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers..PostProcessor_new" class="group"></a> <h3 class="relative group"><a id="new-postprocessorconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-postprocessorconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PostProcessor(config)</span></h3> <table data-svelte-h="svelte-m3g71k"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration for the post-processor.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessor+post_process" class="group"></a> <h3 class="relative group"><a id="postprocessorpostprocesstokens-args--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessorpostprocesstokens-args--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>postProcessor.post_process(tokens, ...args) ⇒ &lt;code> PostProcessedOutput &lt;/code></span></h3> <p data-svelte-h="svelte-14whgj2">Method to be implemented in subclass to apply post-processing on the given tokens.</p> <p data-svelte-h="svelte-ufqaef"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PostProcessor"><code>PostProcessor</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - The post-processed tokens.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-6hfrhb"><li><code>Error</code> If the method is not implemented in subclass.</li></ul> <table data-svelte-h="svelte-x1f9dp"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array</code></td><td><p>The input tokens to be post-processed.</p></td> </tr><tr><td>...args</td><td><code>*</code></td><td><p>Additional arguments required by the post-processing logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessor+_call" class="group"></a> <h3 class="relative group"><a id="postprocessorcalltokens-args--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessorcalltokens-args--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>postProcessor._call(tokens, ...args) ⇒ &lt;code> PostProcessedOutput &lt;/code></span></h3> <p data-svelte-h="svelte-1h8wzy0">Alias for <a href="PostProcessor#post_process">PostProcessor#post_process</a>.</p> <p data-svelte-h="svelte-pyh1lv"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PostProcessor"><code>PostProcessor</code></a><br> <strong>Overrides</strong>: <a href="#Callable+_call"><code>_call</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - The post-processed tokens.</p> <table data-svelte-h="svelte-bnut61"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array</code></td><td><p>The text or array of texts to post-process.</p></td> </tr><tr><td>...args</td><td><code>*</code></td><td><p>Additional arguments required by the post-processing logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessor.fromConfig" class="group"></a> <h3 class="relative group"><a id="postprocessorfromconfigconfig--code-postprocessor-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessorfromconfigconfig--code-postprocessor-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PostProcessor.fromConfig(config) ⇒ &lt;code> PostProcessor &lt;/code></span></h3> <p data-svelte-h="svelte-44djt6">Factory method to create a PostProcessor object from a configuration object.</p> <p data-svelte-h="svelte-1b7ak99"><strong>Kind</strong>: static method of <a href="#module_tokenizers..PostProcessor"><code>PostProcessor</code></a><br> <strong>Returns</strong>: <code>PostProcessor</code> - A PostProcessor object created from the given configuration.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-13th0qu"><li><code>Error</code> If an unknown PostProcessor type is encountered.</li></ul> <table data-svelte-h="svelte-v4jm0i"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>Configuration object representing a PostProcessor.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertProcessing" class="group"></a> <h2 class="relative group"><a id="tokenizersbertprocessing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbertprocessing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BertProcessing</span></h2> <p data-svelte-h="svelte-jv2j77">A post-processor that adds special tokens to the beginning and end of the input.</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-1fzs36r"><li><a href="#module_tokenizers..BertProcessing">~BertProcessing</a><ul><li><a href="#new_module_tokenizers..BertProcessing_new"><code>new BertProcessing(config)</code></a></li> <li><a href="#module_tokenizers..BertProcessing+post_process"><code>.post_process(tokens, [tokens_pair])</code></a><code>PostProcessedOutput</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..BertProcessing_new" class="group"></a> <h3 class="relative group"><a id="new-bertprocessingconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-bertprocessingconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new BertProcessing(config)</span></h3> <table data-svelte-h="svelte-1bxe8xu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration for the post-processor.</p></td> </tr><tr><td>config.cls</td><td><code>Array.&lt;string&gt;</code></td><td><p>The special tokens to add to the beginning of the input.</p></td> </tr><tr><td>config.sep</td><td><code>Array.&lt;string&gt;</code></td><td><p>The special tokens to add to the end of the input.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertProcessing+post_process" class="group"></a> <h3 class="relative group"><a id="bertprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertProcessing.post_process(tokens, [tokens_pair]) ⇒ &lt;code> PostProcessedOutput &lt;/code></span></h3> <p data-svelte-h="svelte-jf8fq9">Adds the special tokens to the beginning and end of the input.</p> <p data-svelte-h="svelte-wj6rsa"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertProcessing"><code>BertProcessing</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - The post-processed tokens with the special tokens added to the beginning and end.</p> <table data-svelte-h="svelte-1t1br86"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td></td><td><p>The input tokens.</p></td> </tr><tr><td>[tokens_pair]</td><td><code>Array.&lt;string&gt;</code></td><td><code></code></td><td><p>An optional second set of input tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..TemplateProcessing" class="group"></a> <h2 class="relative group"><a id="tokenizerstemplateprocessing--code-postprocessor-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerstemplateprocessing--code-postprocessor-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~TemplateProcessing ⇐ &lt;code> PostProcessor &lt;/code></span></h2> <p data-svelte-h="svelte-1byklnf">Post processor that replaces special tokens in a template with actual tokens.</p> <p data-svelte-h="svelte-109ectr"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PostProcessor</code></p> <ul data-svelte-h="svelte-18at4lt"><li><a href="#module_tokenizers..TemplateProcessing">~TemplateProcessing</a><code>PostProcessor</code><ul><li><a href="#new_module_tokenizers..TemplateProcessing_new"><code>new TemplateProcessing(config)</code></a></li> <li><a href="#module_tokenizers..TemplateProcessing+post_process"><code>.post_process(tokens, [tokens_pair])</code></a><code>PostProcessedOutput</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..TemplateProcessing_new" class="group"></a> <h3 class="relative group"><a id="new-templateprocessingconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-templateprocessingconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new TemplateProcessing(config)</span></h3> <p data-svelte-h="svelte-ggislo">Creates a new instance of <code>TemplateProcessing</code>.</p> <table data-svelte-h="svelte-1jenfln"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the post processor.</p></td> </tr><tr><td>config.single</td><td><code>Array</code></td><td><p>The template for a single sequence of tokens.</p></td> </tr><tr><td>config.pair</td><td><code>Array</code></td><td><p>The template for a pair of sequences of tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..TemplateProcessing+post_process" class="group"></a> <h3 class="relative group"><a id="templateprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#templateprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>templateProcessing.post_process(tokens, [tokens_pair]) ⇒ &lt;code> PostProcessedOutput &lt;/code></span></h3> <p data-svelte-h="svelte-14th5ew">Replaces special tokens in the template with actual tokens.</p> <p data-svelte-h="svelte-ayon61"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..TemplateProcessing"><code>TemplateProcessing</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - An object containing the list of tokens with the special tokens replaced with actual tokens.</p> <table data-svelte-h="svelte-bwvwli"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td></td><td><p>The list of tokens for the first sequence.</p></td> </tr><tr><td>[tokens_pair]</td><td><code>Array.&lt;string&gt;</code></td><td><code></code></td><td><p>The list of tokens for the second sequence (optional).</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelPostProcessor" class="group"></a> <h2 class="relative group"><a id="tokenizersbytelevelpostprocessor--code-postprocessor-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbytelevelpostprocessor--code-postprocessor-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ByteLevelPostProcessor ⇐ &lt;code> PostProcessor &lt;/code></span></h2> <p data-svelte-h="svelte-1vpbvt9">A PostProcessor that returns the given tokens as is.</p> <p data-svelte-h="svelte-109ectr"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PostProcessor</code></p> <hr> <a id="module_tokenizers..ByteLevelPostProcessor+post_process" class="group"></a> <h3 class="relative group"><a id="bytelevelpostprocessorpostprocesstokens-tokenspair--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpostprocessorpostprocesstokens-tokenspair--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPostProcessor.post_process(tokens, [tokens_pair]) ⇒ &lt;code> PostProcessedOutput &lt;/code></span></h3> <p data-svelte-h="svelte-1f82mhb">Post process the given tokens.</p> <p data-svelte-h="svelte-1vf7qcj"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ByteLevelPostProcessor"><code>ByteLevelPostProcessor</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - An object containing the post-processed tokens.</p> <table data-svelte-h="svelte-bwvwli"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td></td><td><p>The list of tokens for the first sequence.</p></td> </tr><tr><td>[tokens_pair]</td><td><code>Array.&lt;string&gt;</code></td><td><code></code></td><td><p>The list of tokens for the second sequence (optional).</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessorSequence" class="group"></a> <h2 class="relative group"><a id="tokenizerspostprocessorsequence" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspostprocessorsequence"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PostProcessorSequence</span></h2> <p data-svelte-h="svelte-jlzqc1">A post-processor that applies multiple post-processors in sequence.</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-160svsg"><li><a href="#module_tokenizers..PostProcessorSequence">~PostProcessorSequence</a><ul><li><a href="#new_module_tokenizers..PostProcessorSequence_new"><code>new PostProcessorSequence(config)</code></a></li> <li><a href="#module_tokenizers..PostProcessorSequence+post_process"><code>.post_process(tokens, [tokens_pair])</code></a><code>PostProcessedOutput</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..PostProcessorSequence_new" class="group"></a> <h3 class="relative group"><a id="new-postprocessorsequenceconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-postprocessorsequenceconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PostProcessorSequence(config)</span></h3> <p data-svelte-h="svelte-1i7r42h">Creates a new instance of PostProcessorSequence.</p> <table data-svelte-h="svelte-9gd1xf"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td> </tr><tr><td>config.processors</td><td><code>Array.&lt;Object&gt;</code></td><td><p>The list of post-processors to apply.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessorSequence+post_process" class="group"></a> <h3 class="relative group"><a id="postprocessorsequencepostprocesstokens-tokenspair--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessorsequencepostprocesstokens-tokenspair--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>postProcessorSequence.post_process(tokens, [tokens_pair]) ⇒ &lt;code> PostProcessedOutput &lt;/code></span></h3> <p data-svelte-h="svelte-1f82mhb">Post process the given tokens.</p> <p data-svelte-h="svelte-iyek03"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PostProcessorSequence"><code>PostProcessorSequence</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - An object containing the post-processed tokens.</p> <table data-svelte-h="svelte-bwvwli"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td></td><td><p>The list of tokens for the first sequence.</p></td> </tr><tr><td>[tokens_pair]</td><td><code>Array.&lt;string&gt;</code></td><td><code></code></td><td><p>The list of tokens for the second sequence (optional).</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder" class="group"></a> <h2 class="relative group"><a id="tokenizersdecoder--code-callable-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersdecoder--code-callable-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Decoder ⇐ &lt;code> Callable &lt;/code></span></h2> <p data-svelte-h="svelte-155b6hh">The base class for token decoders.</p> <p data-svelte-h="svelte-1aafbib"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <a href="#Callable"><code>Callable</code></a></p> <ul data-svelte-h="svelte-13kifg7"><li><a href="#module_tokenizers..Decoder">~Decoder</a><a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers..Decoder_new"><code>new Decoder(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers..Decoder+added_tokens"><code>.added_tokens</code></a> : <code>Array.&lt;AddedToken&gt;</code></li> <li><a href="#module_tokenizers..Decoder+_call"><code>._call(tokens)</code></a><code>string</code></li> <li><a href="#module_tokenizers..Decoder+decode"><code>.decode(tokens)</code></a><code>string</code></li> <li><a href="#module_tokenizers..Decoder+decode_chain"><code>.decode_chain(tokens)</code></a><code>Array.&lt;string&gt;</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..Decoder.fromConfig"><code>.fromConfig(config)</code></a><code>Decoder</code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers..Decoder_new" class="group"></a> <h3 class="relative group"><a id="new-decoderconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-decoderconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new Decoder(config)</span></h3> <p data-svelte-h="svelte-1bygwbp">Creates an instance of <code>Decoder</code>.</p> <table data-svelte-h="svelte-tworoc"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder+added_tokens" class="group"></a> <h3 class="relative group"><a id="decoderaddedtokens--code-array--addedtoken--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decoderaddedtokens--code-array--addedtoken--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoder.added_tokens : &lt;code> Array. &lt; AddedToken > &lt;/code></span></h3> <p data-svelte-h="svelte-1wueo48"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a></p> <hr> <a id="module_tokenizers..Decoder+_call" class="group"></a> <h3 class="relative group"><a id="decodercalltokens--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decodercalltokens--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoder._call(tokens) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1qx0w3k">Calls the <code>decode</code> method.</p> <p data-svelte-h="svelte-j6vdfv"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a><br> <strong>Overrides</strong>: <a href="#Callable+_call"><code>_call</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.</p> <table data-svelte-h="svelte-1kujoeu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The list of tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder+decode" class="group"></a> <h3 class="relative group"><a id="decoderdecodetokens--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decoderdecodetokens--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoder.decode(tokens) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1hqb24l">Decodes a list of tokens.</p> <p data-svelte-h="svelte-10rmqcn"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.</p> <table data-svelte-h="svelte-1kujoeu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The list of tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="decoderdecodechaintokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decoderdecodechaintokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoder.decode_chain(tokens) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-wbd0b6">Apply the decoder to a list of tokens.</p> <p data-svelte-h="svelte-jth48o"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The decoded list of tokens.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-5buzwk"><li><code>Error</code> If the `decode_chain` method is not implemented in the subclass.</li></ul> <table data-svelte-h="svelte-1kujoeu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>The list of tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder.fromConfig" class="group"></a> <h3 class="relative group"><a id="decoderfromconfigconfig--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decoderfromconfigconfig--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Decoder.fromConfig(config) ⇒ &lt;code> Decoder &lt;/code></span></h3> <p data-svelte-h="svelte-zfgnbx">Creates a decoder instance based on the provided configuration.</p> <p data-svelte-h="svelte-1tr8nt"><strong>Kind</strong>: static method of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a><br> <strong>Returns</strong>: <code>Decoder</code> - A decoder instance.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-lb27nh"><li><code>Error</code> If an unknown decoder type is provided.</li></ul> <table data-svelte-h="svelte-tworoc"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..FuseDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizersfusedecoder" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersfusedecoder"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~FuseDecoder</span></h2> <p data-svelte-h="svelte-f5h399">Fuse simply fuses all tokens into one big string.
It’s usually the last decoding step anyway, but this decoder
exists incase some decoders need to happen after that step</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..FuseDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="fusedecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fusedecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>fuseDecoder.decode_chain() : &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-1hzjpri"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..FuseDecoder"><code>FuseDecoder</code></a></p> <hr> <a id="module_tokenizers..WordPieceDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizerswordpiecedecoder--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswordpiecedecoder--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~WordPieceDecoder ⇐ &lt;code> Decoder &lt;/code></span></h2> <p data-svelte-h="svelte-1m2xybh">A decoder that decodes a list of WordPiece tokens into a single string.</p> <p data-svelte-h="svelte-nbdqst"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Decoder</code></p> <ul data-svelte-h="svelte-1p8wow"><li><a href="#module_tokenizers..WordPieceDecoder">~WordPieceDecoder</a><code>Decoder</code><ul><li><a href="#new_module_tokenizers..WordPieceDecoder_new"><code>new WordPieceDecoder(config)</code></a></li> <li><a href="#module_tokenizers..WordPieceDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..WordPieceDecoder_new" class="group"></a> <h3 class="relative group"><a id="new-wordpiecedecoderconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-wordpiecedecoderconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new WordPieceDecoder(config)</span></h3> <p data-svelte-h="svelte-emne3c">Creates a new instance of WordPieceDecoder.</p> <table data-svelte-h="svelte-6ky2kn"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td> </tr><tr><td>config.prefix</td><td><code>string</code></td><td><p>The prefix used for WordPiece encoding.</p></td> </tr><tr><td>config.cleanup</td><td><code>boolean</code></td><td><p>Whether to cleanup the decoded string.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WordPieceDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="wordpiecedecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecedecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceDecoder.decode_chain() : &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-100h0ya"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..WordPieceDecoder"><code>WordPieceDecoder</code></a></p> <hr> <a id="module_tokenizers..ByteLevelDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizersbyteleveldecoder--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbyteleveldecoder--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ByteLevelDecoder ⇐ &lt;code> Decoder &lt;/code></span></h2> <p data-svelte-h="svelte-1312arw">Byte-level decoder for tokenization output. Inherits from the <code>Decoder</code> class.</p> <p data-svelte-h="svelte-nbdqst"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Decoder</code></p> <ul data-svelte-h="svelte-pxh1vl"><li><a href="#module_tokenizers..ByteLevelDecoder">~ByteLevelDecoder</a><code>Decoder</code><ul><li><a href="#new_module_tokenizers..ByteLevelDecoder_new"><code>new ByteLevelDecoder(config)</code></a></li> <li><a href="#module_tokenizers..ByteLevelDecoder+convert_tokens_to_string"><code>.convert_tokens_to_string(tokens)</code></a><code>string</code></li> <li><a href="#module_tokenizers..ByteLevelDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..ByteLevelDecoder_new" class="group"></a> <h3 class="relative group"><a id="new-byteleveldecoderconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-byteleveldecoderconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new ByteLevelDecoder(config)</span></h3> <p data-svelte-h="svelte-1sje6rv">Create a <code>ByteLevelDecoder</code> object.</p> <table data-svelte-h="svelte-kkg20v"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>Configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelDecoder+convert_tokens_to_string" class="group"></a> <h3 class="relative group"><a id="byteleveldecoderconverttokenstostringtokens--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#byteleveldecoderconverttokenstostringtokens--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelDecoder.convert_tokens_to_string(tokens) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1b6r5pz">Convert an array of tokens to string by decoding each byte.</p> <p data-svelte-h="svelte-1vs4rgl"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ByteLevelDecoder"><code>ByteLevelDecoder</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.</p> <table data-svelte-h="svelte-unswmu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>Array of tokens to be decoded.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="byteleveldecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#byteleveldecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelDecoder.decode_chain() : &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-1dwsqre"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ByteLevelDecoder"><code>ByteLevelDecoder</code></a></p> <hr> <a id="module_tokenizers..CTCDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizersctcdecoder" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersctcdecoder"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~CTCDecoder</span></h2> <p data-svelte-h="svelte-zf06vq">The CTC (Connectionist Temporal Classification) decoder.
See <a href="https://github.com/huggingface/tokenizers/blob/bb38f390a61883fc2f29d659af696f428d1cda6b/tokenizers/src/decoders/ctc.rs" rel="nofollow">https://github.com/huggingface/tokenizers/blob/bb38f390a61883fc2f29d659af696f428d1cda6b/tokenizers/src/decoders/ctc.rs</a></p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-1lodx12"><li><a href="#module_tokenizers..CTCDecoder">~CTCDecoder</a><ul><li><a href="#module_tokenizers..CTCDecoder+convert_tokens_to_string"><code>.convert_tokens_to_string(tokens)</code></a><code>string</code></li> <li><a href="#module_tokenizers..CTCDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="module_tokenizers..CTCDecoder+convert_tokens_to_string" class="group"></a> <h3 class="relative group"><a id="ctcdecoderconverttokenstostringtokens--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ctcdecoderconverttokenstostringtokens--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ctcDecoder.convert_tokens_to_string(tokens) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1gchkl">Converts a connectionist-temporal-classification (CTC) output tokens into a single string.</p> <p data-svelte-h="svelte-pchset"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..CTCDecoder"><code>CTCDecoder</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.</p> <table data-svelte-h="svelte-unswmu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>Array of tokens to be decoded.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..CTCDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="ctcdecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ctcdecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ctcDecoder.decode_chain() : &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-jnewq"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..CTCDecoder"><code>CTCDecoder</code></a></p> <hr> <a id="module_tokenizers..DecoderSequence" class="group"></a> <h2 class="relative group"><a id="tokenizersdecodersequence--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersdecodersequence--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~DecoderSequence ⇐ &lt;code> Decoder &lt;/code></span></h2> <p data-svelte-h="svelte-16p2zks">Apply a sequence of decoders.</p> <p data-svelte-h="svelte-nbdqst"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Decoder</code></p> <ul data-svelte-h="svelte-19pe06l"><li><a href="#module_tokenizers..DecoderSequence">~DecoderSequence</a><code>Decoder</code><ul><li><a href="#new_module_tokenizers..DecoderSequence_new"><code>new DecoderSequence(config)</code></a></li> <li><a href="#module_tokenizers..DecoderSequence+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..DecoderSequence_new" class="group"></a> <h3 class="relative group"><a id="new-decodersequenceconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-decodersequenceconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new DecoderSequence(config)</span></h3> <p data-svelte-h="svelte-1gk4xdv">Creates a new instance of DecoderSequence.</p> <table data-svelte-h="svelte-1ip0x80"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td> </tr><tr><td>config.decoders</td><td><code>Array.&lt;Object&gt;</code></td><td><p>The list of decoders to apply.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..DecoderSequence+decode_chain" class="group"></a> <h3 class="relative group"><a id="decodersequencedecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decodersequencedecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoderSequence.decode_chain() : &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-1jmxaf6"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..DecoderSequence"><code>DecoderSequence</code></a></p> <hr> <a id="module_tokenizers..MetaspacePreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersmetaspacepretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmetaspacepretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~MetaspacePreTokenizer ⇐ &lt;code> PreTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-o07wl1">This PreTokenizer replaces spaces with the given replacement character, adds a prefix space if requested,
and returns a list of tokens.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-xjpnfg"><li><a href="#module_tokenizers..MetaspacePreTokenizer">~MetaspacePreTokenizer</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..MetaspacePreTokenizer_new"><code>new MetaspacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..MetaspacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..MetaspacePreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-metaspacepretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-metaspacepretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new MetaspacePreTokenizer(config)</span></h3> <table data-svelte-h="svelte-1sg6gza"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td></td><td><p>The configuration object for the MetaspacePreTokenizer.</p></td> </tr><tr><td>config.add_prefix_space</td><td><code>boolean</code></td><td></td><td><p>Whether to add a prefix space to the first token.</p></td> </tr><tr><td>config.replacement</td><td><code>string</code></td><td></td><td><p>The character to replace spaces with.</p></td> </tr><tr><td>[config.str_rep]</td><td><code>string</code></td><td><code>&quot;config.replacement&quot;</code></td><td><p>An optional string representation of the replacement character.</p></td> </tr><tr><td>[config.prepend_scheme]</td><td><code>&#39;first&#39;</code> | <code>&#39;never&#39;</code> | <code>&#39;always&#39;</code></td><td><code>&#39;always&#39;</code></td><td><p>The metaspace prepending scheme.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..MetaspacePreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="metaspacepretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#metaspacepretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>metaspacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-i68fsj">This method takes a string, replaces spaces with the replacement character,
adds a prefix space if requested, and returns a new list of tokens.</p> <p data-svelte-h="svelte-8kprtf"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..MetaspacePreTokenizer"><code>MetaspacePreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - A new list of pre-tokenized tokens.</p> <table data-svelte-h="svelte-mb6l9"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>The options for the pre-tokenization.</p></td> </tr><tr><td>[options.section_index]</td><td><code>number</code></td><td><p>The index of the section to pre-tokenize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..MetaspaceDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizersmetaspacedecoder--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmetaspacedecoder--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~MetaspaceDecoder ⇐ &lt;code> Decoder &lt;/code></span></h2> <p data-svelte-h="svelte-kf7suv">MetaspaceDecoder class extends the Decoder class and decodes Metaspace tokenization.</p> <p data-svelte-h="svelte-nbdqst"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Decoder</code></p> <ul data-svelte-h="svelte-5xe7kn"><li><a href="#module_tokenizers..MetaspaceDecoder">~MetaspaceDecoder</a><code>Decoder</code><ul><li><a href="#new_module_tokenizers..MetaspaceDecoder_new"><code>new MetaspaceDecoder(config)</code></a></li> <li><a href="#module_tokenizers..MetaspaceDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..MetaspaceDecoder_new" class="group"></a> <h3 class="relative group"><a id="new-metaspacedecoderconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-metaspacedecoderconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new MetaspaceDecoder(config)</span></h3> <p data-svelte-h="svelte-44mrh1">Constructs a new MetaspaceDecoder object.</p> <table data-svelte-h="svelte-669i62"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the MetaspaceDecoder.</p></td> </tr><tr><td>config.add_prefix_space</td><td><code>boolean</code></td><td><p>Whether to add a prefix space to the decoded string.</p></td> </tr><tr><td>config.replacement</td><td><code>string</code></td><td><p>The string to replace spaces with.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..MetaspaceDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="metaspacedecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#metaspacedecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>metaspaceDecoder.decode_chain() : &lt;code> * &lt;/code></span></h3> <p data-svelte-h="svelte-hmubey"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..MetaspaceDecoder"><code>MetaspaceDecoder</code></a></p> <hr> <a id="module_tokenizers..Precompiled" class="group"></a> <h2 class="relative group"><a id="tokenizersprecompiled--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersprecompiled--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Precompiled ⇐ &lt;code> Normalizer &lt;/code></span></h2> <p data-svelte-h="svelte-obvisk">A normalizer that applies a precompiled charsmap.
This is useful for applying complex normalizations in C++ and exposing them to JavaScript.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <ul data-svelte-h="svelte-1go7hlr"><li><a href="#module_tokenizers..Precompiled">~Precompiled</a><code>Normalizer</code><ul><li><a href="#new_module_tokenizers..Precompiled_new"><code>new Precompiled(config)</code></a></li> <li><a href="#module_tokenizers..Precompiled+normalize"><code>.normalize(text)</code></a><code>string</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..Precompiled_new" class="group"></a> <h3 class="relative group"><a id="new-precompiledconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-precompiledconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new Precompiled(config)</span></h3> <p data-svelte-h="svelte-vphs3k">Create a new instance of Precompiled normalizer.</p> <table data-svelte-h="svelte-1fprcm1"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the Precompiled normalizer.</p></td> </tr><tr><td>config.precompiled_charsmap</td><td><code>Object</code></td><td><p>The precompiled charsmap object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Precompiled+normalize" class="group"></a> <h3 class="relative group"><a id="precompilednormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#precompilednormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>precompiled.normalize(text) ⇒ &lt;code> string &lt;/code></span></h3> <p data-svelte-h="svelte-1kg0a1i">Normalizes the given text by applying the precompiled charsmap.</p> <p data-svelte-h="svelte-1cck924"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Precompiled"><code>Precompiled</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizerSequence" class="group"></a> <h2 class="relative group"><a id="tokenizerspretokenizersequence--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretokenizersequence--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PreTokenizerSequence ⇐ &lt;code> PreTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-1hneoxf">A pre-tokenizer that applies a sequence of pre-tokenizers to the input text.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-1wq9tve"><li><a href="#module_tokenizers..PreTokenizerSequence">~PreTokenizerSequence</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..PreTokenizerSequence_new"><code>new PreTokenizerSequence(config)</code></a></li> <li><a href="#module_tokenizers..PreTokenizerSequence+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..PreTokenizerSequence_new" class="group"></a> <h3 class="relative group"><a id="new-pretokenizersequenceconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-pretokenizersequenceconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PreTokenizerSequence(config)</span></h3> <p data-svelte-h="svelte-f6z5j5">Creates an instance of PreTokenizerSequence.</p> <table data-svelte-h="svelte-3gpd27"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the pre-tokenizer sequence.</p></td> </tr><tr><td>config.pretokenizers</td><td><code>Array.&lt;Object&gt;</code></td><td><p>An array of pre-tokenizer configurations.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizerSequence+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="pretokenizersequencepretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizersequencepretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTokenizerSequence.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-cexh8w">Applies each pre-tokenizer in the sequence to the input text in turn.</p> <p data-svelte-h="svelte-v7wifj"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PreTokenizerSequence"><code>PreTokenizerSequence</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The pre-tokenized text.</p> <table data-svelte-h="svelte-zcvat0"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WhitespacePreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerswhitespacepretokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswhitespacepretokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~WhitespacePreTokenizer</span></h2> <p data-svelte-h="svelte-1xsklq1">Splits on word boundaries (using the following regular expression: <code>\w+|[^\w\s]+</code>).</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-1pxidrf"><li><a href="#module_tokenizers..WhitespacePreTokenizer">~WhitespacePreTokenizer</a><ul><li><a href="#new_module_tokenizers..WhitespacePreTokenizer_new"><code>new WhitespacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..WhitespacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..WhitespacePreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-whitespacepretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-whitespacepretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new WhitespacePreTokenizer(config)</span></h3> <p data-svelte-h="svelte-1eny2m5">Creates an instance of WhitespacePreTokenizer.</p> <table data-svelte-h="svelte-q9tfiz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the pre-tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WhitespacePreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="whitespacepretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whitespacepretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>whitespacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-j2p7wj">Pre-tokenizes the input text by splitting it on word boundaries.</p> <p data-svelte-h="svelte-rjqdv6"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..WhitespacePreTokenizer"><code>WhitespacePreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of tokens produced by splitting the input text on whitespace.</p> <table data-svelte-h="svelte-12f9sp1"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be pre-tokenized.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WhitespaceSplit" class="group"></a> <h2 class="relative group"><a id="tokenizerswhitespacesplit--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswhitespacesplit--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~WhitespaceSplit ⇐ &lt;code> PreTokenizer &lt;/code></span></h2> <p data-svelte-h="svelte-6e66pa">Splits a string of text by whitespace characters into individual tokens.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-i2abig"><li><a href="#module_tokenizers..WhitespaceSplit">~WhitespaceSplit</a><code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..WhitespaceSplit_new"><code>new WhitespaceSplit(config)</code></a></li> <li><a href="#module_tokenizers..WhitespaceSplit+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..WhitespaceSplit_new" class="group"></a> <h3 class="relative group"><a id="new-whitespacesplitconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-whitespacesplitconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new WhitespaceSplit(config)</span></h3> <p data-svelte-h="svelte-19yr1r7">Creates an instance of WhitespaceSplit.</p> <table data-svelte-h="svelte-q9tfiz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the pre-tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WhitespaceSplit+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="whitespacesplitpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whitespacesplitpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>whitespaceSplit.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-s2po1q">Pre-tokenizes the input text by splitting it on whitespace characters.</p> <p data-svelte-h="svelte-gmi99a"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..WhitespaceSplit"><code>WhitespaceSplit</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of tokens produced by splitting the input text on whitespace.</p> <table data-svelte-h="svelte-12f9sp1"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be pre-tokenized.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ReplacePreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersreplacepretokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersreplacepretokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ReplacePreTokenizer</span></h2> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-n2rage"><li><a href="#module_tokenizers..ReplacePreTokenizer">~ReplacePreTokenizer</a><ul><li><a href="#new_module_tokenizers..ReplacePreTokenizer_new"><code>new ReplacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..ReplacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a><code>Array.&lt;string&gt;</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..ReplacePreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-replacepretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-replacepretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new ReplacePreTokenizer(config)</span></h3> <table data-svelte-h="svelte-8tvu8r"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the pre-tokenizer.</p></td> </tr><tr><td>config.pattern</td><td><code>Object</code></td><td><p>The pattern used to split the text. Can be a string or a regex object.</p></td> </tr><tr><td>config.content</td><td><code>string</code></td><td><p>What to replace the pattern with.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ReplacePreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="replacepretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#replacepretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>replacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h3> <p data-svelte-h="svelte-167jpma">Pre-tokenizes the input text by replacing certain characters.</p> <p data-svelte-h="svelte-1605wdl"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ReplacePreTokenizer"><code>ReplacePreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - An array of tokens produced by replacing certain characters.</p> <table data-svelte-h="svelte-12f9sp1"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be pre-tokenized.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BYTES_TO_UNICODE" class="group"></a> <h2 class="relative group"><a id="tokenizersbytestounicode--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbytestounicode--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BYTES_TO_UNICODE ⇒ &lt;code> Object &lt;/code></span></h2> <p data-svelte-h="svelte-9bnea6">Returns list of utf-8 byte and a mapping to unicode strings.
Specifically avoids mapping to whitespace/control characters the BPE code barfs on.</p> <p data-svelte-h="svelte-6ckyyb"><strong>Kind</strong>: inner constant of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Object</code> - Object with utf-8 byte keys and unicode string values.</p> <hr> <a id="module_tokenizers..loadTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersloadtokenizerpretrainedmodelnameorpath-options--code-promise--array--any---code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersloadtokenizerpretrainedmodelnameorpath-options--code-promise--array--any---code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~loadTokenizer(pretrained_model_name_or_path, options) ⇒ &lt;code> Promise. &lt; Array &lt; any > > &lt;/code></span></h2> <p data-svelte-h="svelte-reckhh">Loads a tokenizer from the specified path.</p> <p data-svelte-h="svelte-13ziayt"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Promise.&lt;Array&lt;any&gt;&gt;</code> - A promise that resolves with information about the loaded tokenizer.</p> <table data-svelte-h="svelte-1p1jwnz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>pretrained_model_name_or_path</td><td><code>string</code></td><td><p>The path to the tokenizer directory.</p></td> </tr><tr><td>options</td><td><code>PretrainedTokenizerOptions</code></td><td><p>Additional options for loading the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..regexSplit" class="group"></a> <h2 class="relative group"><a id="tokenizersregexsplittext-regex--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersregexsplittext-regex--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~regexSplit(text, regex) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h2> <p data-svelte-h="svelte-tsn1ig">Helper function to split a string on a regex, but keep the delimiters.
This is required, because the JavaScript <code>.split()</code> method does not keep the delimiters,
and wrapping in a capturing group causes issues with existing capturing groups (due to nesting).</p> <p data-svelte-h="svelte-j4end5"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The split string.</p> <table data-svelte-h="svelte-guhl6k"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to split.</p></td> </tr><tr><td>regex</td><td><code>RegExp</code></td><td><p>The regex to split on.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..createPattern" class="group"></a> <h2 class="relative group"><a id="tokenizerscreatepatternpattern-invert--code-regexp-code--code-null-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerscreatepatternpattern-invert--code-regexp-code--code-null-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~createPattern(pattern, invert) ⇒ &lt;code> RegExp &lt;/code> | &lt;code> null &lt;/code></span></h2> <p data-svelte-h="svelte-9yqxaa">Helper method to construct a pattern from a config object.</p> <p data-svelte-h="svelte-1tcd95m"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>RegExp</code> | <code>null</code> - The compiled pattern.</p> <table data-svelte-h="svelte-2irxm"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>pattern</td><td><code>Object</code></td><td></td><td><p>The pattern object.</p></td> </tr><tr><td>invert</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to invert the pattern.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..objectToMap" class="group"></a> <h2 class="relative group"><a id="tokenizersobjecttomapobj--code-map--string-any--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersobjecttomapobj--code-map--string-any--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~objectToMap(obj) ⇒ &lt;code> Map. &lt; string, any > &lt;/code></span></h2> <p data-svelte-h="svelte-y4nvw8">Helper function to convert an Object to a Map</p> <p data-svelte-h="svelte-1oim1d9"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Map.&lt;string, any&gt;</code> - The map.</p> <table data-svelte-h="svelte-1ha9dpj"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>obj</td><td><code>Object</code></td><td><p>The object to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..prepareTensorForDecode" class="group"></a> <h2 class="relative group"><a id="tokenizerspreparetensorfordecodetensor--code-array--number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspreparetensorfordecodetensor--code-array--number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~prepareTensorForDecode(tensor) ⇒ &lt;code> Array. &lt; number > &lt;/code></span></h2> <p data-svelte-h="svelte-1sig5im">Helper function to convert a tensor to a list before decoding.</p> <p data-svelte-h="svelte-1qz3zie"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Array.&lt;number&gt;</code> - The tensor as a list.</p> <table data-svelte-h="svelte-1kahhga"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tensor</td><td><code><a href="#Tensor">Tensor</a></code></td><td><p>The tensor to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..clean_up_tokenization" class="group"></a> <h2 class="relative group"><a id="tokenizerscleanuptokenizationtext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerscleanuptokenizationtext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~clean_up_tokenization(text) ⇒ &lt;code> string &lt;/code></span></h2> <p data-svelte-h="svelte-1n3aqy7">Clean up a list of simple English tokenization artifacts like spaces before punctuations and abbreviated forms</p> <p data-svelte-h="svelte-157j3gz"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>string</code> - The cleaned up text.</p> <table data-svelte-h="svelte-my2gd4"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to clean up.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..remove_accents" class="group"></a> <h2 class="relative group"><a id="tokenizersremoveaccentstext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersremoveaccentstext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~remove_accents(text) ⇒ &lt;code> string &lt;/code></span></h2> <p data-svelte-h="svelte-b1xq8m">Helper function to remove accents from a string.</p> <p data-svelte-h="svelte-10igiq5"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>string</code> - The text with accents removed.</p> <table data-svelte-h="svelte-96bvb"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to remove accents from.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..lowercase_and_remove_accent" class="group"></a> <h2 class="relative group"><a id="tokenizerslowercaseandremoveaccenttext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerslowercaseandremoveaccenttext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~lowercase_and_remove_accent(text) ⇒ &lt;code> string &lt;/code></span></h2> <p data-svelte-h="svelte-1kajtfy">Helper function to lowercase a string and remove accents.</p> <p data-svelte-h="svelte-15y2bq4"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>string</code> - The lowercased text with accents removed.</p> <table data-svelte-h="svelte-usftcj"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to lowercase and remove accents from.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..whitespace_split" class="group"></a> <h2 class="relative group"><a id="tokenizerswhitespacesplittext--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswhitespacesplittext--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~whitespace_split(text) ⇒ &lt;code> Array. &lt; string > &lt;/code></span></h2> <p data-svelte-h="svelte-1x0opp4">Split a string on whitespace.</p> <p data-svelte-h="svelte-j4end5"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Array.&lt;string&gt;</code> - The split string.</p> <table data-svelte-h="svelte-h36eua"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to split.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PretrainedTokenizerOptions" class="group"></a> <h2 class="relative group"><a id="tokenizerspretrainedtokenizeroptions--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretrainedtokenizeroptions--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PretrainedTokenizerOptions : &lt;code> Object &lt;/code></span></h2> <p data-svelte-h="svelte-3nuv1e">Additional tokenizer-specific properties.</p> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-1me0ii7"><thead><tr><th>Name</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>[legacy]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not the <code>legacy</code> behavior of the tokenizer should be used.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BPENode" class="group"></a> <h2 class="relative group"><a id="tokenizersbpenode--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbpenode--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BPENode : &lt;code> Object &lt;/code></span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-1ou5uv8"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>token</td><td><code>string</code></td><td><p>The token associated with the node</p></td> </tr><tr><td>bias</td><td><code>number</code></td><td><p>A positional bias for the node.</p></td> </tr><tr><td>[score]</td><td><code>number</code></td><td><p>The score of the node.</p></td> </tr><tr><td>[prev]</td><td><code>BPENode</code></td><td><p>The previous node in the linked list.</p></td> </tr><tr><td>[next]</td><td><code>BPENode</code></td><td><p>The next node in the linked list.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..SplitDelimiterBehavior" class="group"></a> <h2 class="relative group"><a id="tokenizerssplitdelimiterbehavior--code--removed--code--code--isolated--code--code--mergedwithprevious--code--code--mergedwithnext--code--code--contiguous--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerssplitdelimiterbehavior--code--removed--code--code--isolated--code--code--mergedwithprevious--code--code--mergedwithnext--code--code--contiguous--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~SplitDelimiterBehavior : &lt;code> ’ removed ’ &lt;/code> | &lt;code> ’ isolated ’ &lt;/code> | &lt;code> ’ mergedWithPrevious ’ &lt;/code> | &lt;code> ’ mergedWithNext ’ &lt;/code> | &lt;code> ’ contiguous ’ &lt;/code></span></h2> <p data-svelte-h="svelte-ec8jqd"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..PostProcessedOutput" class="group"></a> <h2 class="relative group"><a id="tokenizerspostprocessedoutput--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspostprocessedoutput--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PostProcessedOutput : &lt;code> Object &lt;/code></span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-eksz4k"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.&lt;string&gt;</code></td><td><p>List of token produced by the post-processor.</p></td> </tr><tr><td>[token_type_ids]</td><td><code>Array.&lt;number&gt;</code></td><td><p>List of token type ids produced by the post-processor.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..EncodingSingle" class="group"></a> <h2 class="relative group"><a id="tokenizersencodingsingle--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersencodingsingle--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~EncodingSingle : &lt;code> Object &lt;/code></span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-dv15ku"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>input_ids</td><td><code>Array.&lt;number&gt;</code></td><td><p>List of token ids to be fed to a model.</p></td> </tr><tr><td>attention_mask</td><td><code>Array.&lt;number&gt;</code></td><td><p>List of token type ids to be fed to a model</p></td> </tr><tr><td>[token_type_ids]</td><td><code>Array.&lt;number&gt;</code></td><td><p>List of indices specifying which tokens should be attended to by the model</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Message" class="group"></a> <h2 class="relative group"><a id="tokenizersmessage--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmessage--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Message : &lt;code> Object &lt;/code></span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-sjyk18"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>role</td><td><code>string</code></td><td><p>The role of the message (e.g., &quot;user&quot; or &quot;assistant&quot; or &quot;system&quot;).</p></td> </tr><tr><td>content</td><td><code>string</code></td><td><p>The content of the message.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BatchEncoding" class="group"></a> <h2 class="relative group"><a id="tokenizersbatchencoding--code-array--number--code--code-array--array--number---code--code-tensor-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbatchencoding--code-array--number--code--code-array--array--number---code--code-tensor-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BatchEncoding : &lt;code> Array &lt; number > &lt;/code> | &lt;code> Array &lt; Array &lt; number > > &lt;/code> | &lt;code> Tensor &lt;/code></span></h2> <p data-svelte-h="svelte-1d7gg97">Holds the output of the tokenizer’s call function.</p> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-6ozwz5"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>input_ids</td><td><code>BatchEncodingItem</code></td><td><p>List of token ids to be fed to a model.</p></td> </tr><tr><td>attention_mask</td><td><code>BatchEncodingItem</code></td><td><p>List of indices specifying which tokens should be attended to by the model.</p></td> </tr><tr><td>[token_type_ids]</td><td><code>BatchEncodingItem</code></td><td><p>List of token type ids to be fed to a model.</p></td></tr></tbody></table> <hr> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers.js/blob/main/docs/source/api/tokenizers.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_kuyevp = {
assets: "/docs/transformers.js/pr_1113/en",
base: "/docs/transformers.js/pr_1113/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers.js/pr_1113/en/_app/immutable/entry/start.88a6e140.js"),
import("/docs/transformers.js/pr_1113/en/_app/immutable/entry/app.0003020d.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 14],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
487 kB
·
Xet hash:
883755823a4afc7d21241afd5101240bcab3358cb987ae3cfcbfb0dfc3448815

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.