Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"tokenizers","local":"tokenizers","sections":[{"title":"tokenizers.TokenizerModel ⇐ <code> Callable </code>","local":"tokenizerstokenizermodel--code-callable-code","sections":[{"title":"new TokenizerModel(config)","local":"new-tokenizermodelconfig","sections":[],"depth":3},{"title":"tokenizerModel.vocab : <code> Array. < string > </code>","local":"tokenizermodelvocab--code-array--string--code","sections":[],"depth":3},{"title":"tokenizerModel.tokens_to_ids : <code> Map. < string, number > </code>","local":"tokenizermodeltokenstoids--code-map--string-number--code","sections":[],"depth":3},{"title":"tokenizerModel.fuse_unk : <code> boolean </code>","local":"tokenizermodelfuseunk--code-boolean-code","sections":[],"depth":3},{"title":"tokenizerModel._call(tokens) ⇒ <code> Array. < string > </code>","local":"tokenizermodelcalltokens--code-array--string--code","sections":[],"depth":3},{"title":"tokenizerModel.encode(tokens) ⇒ <code> Array. < string > </code>","local":"tokenizermodelencodetokens--code-array--string--code","sections":[],"depth":3},{"title":"tokenizerModel.convert_tokens_to_ids(tokens) ⇒ <code> Array. < number > </code>","local":"tokenizermodelconverttokenstoidstokens--code-array--number--code","sections":[],"depth":3},{"title":"tokenizerModel.convert_ids_to_tokens(ids) ⇒ <code> Array. < string > </code>","local":"tokenizermodelconvertidstotokensids--code-array--string--code","sections":[],"depth":3},{"title":"TokenizerModel.fromConfig(config, ...args) ⇒ <code> TokenizerModel </code>","local":"tokenizermodelfromconfigconfig-args--code-tokenizermodel-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.PreTrainedTokenizer","local":"tokenizerspretrainedtokenizer","sections":[{"title":"new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)","local":"new-pretrainedtokenizertokenizerjson-tokenizerconfig","sections":[],"depth":3},{"title":"preTrainedTokenizer.added_tokens : <code> Array. < AddedToken > </code>","local":"pretrainedtokenizeraddedtokens--code-array--addedtoken--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.remove_space : <code> boolean </code>","local":"pretrainedtokenizerremovespace--code-boolean-code","sections":[],"depth":3},{"title":"preTrainedTokenizer._call(text, options) ⇒ <code> BatchEncoding </code>","local":"pretrainedtokenizercalltext-options--code-batchencoding-code","sections":[],"depth":3},{"title":"preTrainedTokenizer._encode_text(text) ⇒ <code> Array < string > </code> | <code> null </code>","local":"pretrainedtokenizerencodetexttext--code-array--string--code--code-null-code","sections":[],"depth":3},{"title":"preTrainedTokenizer._tokenize_helper(text, options) ⇒ <code> * </code>","local":"pretrainedtokenizertokenizehelpertext-options--code--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.tokenize(text, options) ⇒ <code> Array. < string > </code>","local":"pretrainedtokenizertokenizetext-options--code-array--string--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.encode(text, options) ⇒ <code> Array. < number > </code>","local":"pretrainedtokenizerencodetext-options--code-array--number--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ <code> Array. < string > </code>","local":"pretrainedtokenizerbatchdecodebatch-decodeargs--code-array--string--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ <code> string </code>","local":"pretrainedtokenizerdecodetokenids-decodeargs--code-string-code","sections":[],"depth":3},{"title":"preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ <code> string </code>","local":"pretrainedtokenizerdecodesingletokenids-decodeargs--code-string-code","sections":[],"depth":3},{"title":"preTrainedTokenizer.get_chat_template(options) ⇒ <code> string </code>","local":"pretrainedtokenizergetchattemplateoptions--code-string-code","sections":[],"depth":3},{"title":"preTrainedTokenizer.apply_chat_template(conversation, options) ⇒ <code> string </code> | <code> Tensor </code> | <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> BatchEncoding </code>","local":"pretrainedtokenizerapplychattemplateconversation-options--code-string-code--code-tensor-code--code-array--number--code--code-array--array--number---code--code-batchencoding-code","sections":[],"depth":3},{"title":"PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code>","local":"pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.BertTokenizer ⇐ <code> PreTrainedTokenizer </code>","local":"tokenizersberttokenizer--code-pretrainedtokenizer-code","sections":[],"depth":2},{"title":"tokenizers.AlbertTokenizer ⇐ <code> PreTrainedTokenizer </code>","local":"tokenizersalberttokenizer--code-pretrainedtokenizer-code","sections":[],"depth":2},{"title":"tokenizers.NllbTokenizer","local":"tokenizersnllbtokenizer","sections":[{"title":"nllbTokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code>","local":"nllbtokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.M2M100Tokenizer","local":"tokenizersm2m100tokenizer","sections":[{"title":"m2M100Tokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code>","local":"m2m100tokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.WhisperTokenizer ⇐ <code> PreTrainedTokenizer </code>","local":"tokenizerswhispertokenizer--code-pretrainedtokenizer-code","sections":[{"title":"whisperTokenizer._decode_asr(sequences, options) ⇒ <code> * </code>","local":"whispertokenizerdecodeasrsequences-options--code--code","sections":[],"depth":3},{"title":"whisperTokenizer.decode() : <code> * </code>","local":"whispertokenizerdecode--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.MarianTokenizer","local":"tokenizersmariantokenizer","sections":[{"title":"new MarianTokenizer(tokenizerJSON, tokenizerConfig)","local":"new-mariantokenizertokenizerjson-tokenizerconfig","sections":[],"depth":3},{"title":"marianTokenizer._encode_text(text) ⇒ <code> Array </code>","local":"mariantokenizerencodetexttext--code-array-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.AutoTokenizer","local":"tokenizersautotokenizer","sections":[{"title":"new AutoTokenizer()","local":"new-autotokenizer","sections":[],"depth":3},{"title":"AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code>","local":"autotokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.is_chinese_char(cp) ⇒ <code> boolean </code>","local":"tokenizersischinesecharcp--code-boolean-code","sections":[],"depth":2},{"title":"tokenizers~AddedToken","local":"tokenizersaddedtoken","sections":[{"title":"new AddedToken(config)","local":"new-addedtokenconfig","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~WordPieceTokenizer ⇐ <code> TokenizerModel </code>","local":"tokenizerswordpiecetokenizer--code-tokenizermodel-code","sections":[{"title":"new WordPieceTokenizer(config)","local":"new-wordpiecetokenizerconfig","sections":[],"depth":3},{"title":"wordPieceTokenizer.tokens_to_ids : <code> Map. < string, number > </code>","local":"wordpiecetokenizertokenstoids--code-map--string-number--code","sections":[],"depth":3},{"title":"wordPieceTokenizer.unk_token_id : <code> number </code>","local":"wordpiecetokenizerunktokenid--code-number-code","sections":[],"depth":3},{"title":"wordPieceTokenizer.unk_token : <code> string </code>","local":"wordpiecetokenizerunktoken--code-string-code","sections":[],"depth":3},{"title":"wordPieceTokenizer.max_input_chars_per_word : <code> number </code>","local":"wordpiecetokenizermaxinputcharsperword--code-number-code","sections":[],"depth":3},{"title":"wordPieceTokenizer.vocab : <code> Array. < string > </code>","local":"wordpiecetokenizervocab--code-array--string--code","sections":[],"depth":3},{"title":"wordPieceTokenizer.encode(tokens) ⇒ <code> Array. < string > </code>","local":"wordpiecetokenizerencodetokens--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Unigram ⇐ <code> TokenizerModel </code>","local":"tokenizersunigram--code-tokenizermodel-code","sections":[{"title":"new Unigram(config, moreConfig)","local":"new-unigramconfig-moreconfig","sections":[],"depth":3},{"title":"unigram.populateNodes(lattice)","local":"unigrampopulatenodeslattice","sections":[],"depth":3},{"title":"unigram.tokenize(normalized) ⇒ <code> Array. < string > </code>","local":"unigramtokenizenormalized--code-array--string--code","sections":[],"depth":3},{"title":"unigram.encode(tokens) ⇒ <code> Array. < string > </code>","local":"unigramencodetokens--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BPE ⇐ <code> TokenizerModel </code>","local":"tokenizersbpe--code-tokenizermodel-code","sections":[{"title":"new BPE(config)","local":"new-bpeconfig","sections":[],"depth":3},{"title":"bpE.tokens_to_ids : <code> Map. < string, number > </code>","local":"bpetokenstoids--code-map--string-number--code","sections":[],"depth":3},{"title":"bpE.merges : <code> * </code>","local":"bpemerges--code--code","sections":[{"title":"merges.config.merges : <code> * </code>","local":"mergesconfigmerges--code--code","sections":[],"depth":4}],"depth":3},{"title":"bpE.cache : <code> Map. < string, Array < string > > </code>","local":"bpecache--code-map--string-array--string---code","sections":[],"depth":3},{"title":"bpE.bpe(token) ⇒ <code> Array. < string > </code>","local":"bpebpetoken--code-array--string--code","sections":[],"depth":3},{"title":"bpE.encode(tokens) ⇒ <code> Array. < string > </code>","local":"bpeencodetokens--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~LegacyTokenizerModel","local":"tokenizerslegacytokenizermodel","sections":[{"title":"new LegacyTokenizerModel(config, moreConfig)","local":"new-legacytokenizermodelconfig-moreconfig","sections":[],"depth":3},{"title":"legacyTokenizerModel.tokens_to_ids : <code> Map. < string, number > </code>","local":"legacytokenizermodeltokenstoids--code-map--string-number--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Normalizer","local":"tokenizersnormalizer","sections":[{"title":"new Normalizer(config)","local":"new-normalizerconfig","sections":[],"depth":3},{"title":"normalizer.normalize(text) ⇒ <code> string </code>","local":"normalizernormalizetext--code-string-code","sections":[],"depth":3},{"title":"normalizer._call(text) ⇒ <code> string </code>","local":"normalizercalltext--code-string-code","sections":[],"depth":3},{"title":"Normalizer.fromConfig(config) ⇒ <code> Normalizer </code>","local":"normalizerfromconfigconfig--code-normalizer-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Replace ⇐ <code> Normalizer </code>","local":"tokenizersreplace--code-normalizer-code","sections":[{"title":"replace.normalize(text) ⇒ <code> string </code>","local":"replacenormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~NFC ⇐ <code> Normalizer </code>","local":"tokenizersnfc--code-normalizer-code","sections":[{"title":"nfC.normalize(text) ⇒ <code> string </code>","local":"nfcnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~NFKC ⇐ <code> Normalizer </code>","local":"tokenizersnfkc--code-normalizer-code","sections":[{"title":"nfkC.normalize(text) ⇒ <code> string </code>","local":"nfkcnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~NFKD ⇐ <code> Normalizer </code>","local":"tokenizersnfkd--code-normalizer-code","sections":[{"title":"nfkD.normalize(text) ⇒ <code> string </code>","local":"nfkdnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~StripNormalizer","local":"tokenizersstripnormalizer","sections":[{"title":"stripNormalizer.normalize(text) ⇒ <code> string </code>","local":"stripnormalizernormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~StripAccents ⇐ <code> Normalizer </code>","local":"tokenizersstripaccents--code-normalizer-code","sections":[{"title":"stripAccents.normalize(text) ⇒ <code> string </code>","local":"stripaccentsnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Lowercase ⇐ <code> Normalizer </code>","local":"tokenizerslowercase--code-normalizer-code","sections":[{"title":"lowercase.normalize(text) ⇒ <code> string </code>","local":"lowercasenormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Prepend ⇐ <code> Normalizer </code>","local":"tokenizersprepend--code-normalizer-code","sections":[{"title":"prepend.normalize(text) ⇒ <code> string </code>","local":"prependnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~NormalizerSequence ⇐ <code> Normalizer </code>","local":"tokenizersnormalizersequence--code-normalizer-code","sections":[{"title":"new NormalizerSequence(config)","local":"new-normalizersequenceconfig","sections":[],"depth":3},{"title":"normalizerSequence.normalize(text) ⇒ <code> string </code>","local":"normalizersequencenormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BertNormalizer ⇐ <code> Normalizer </code>","local":"tokenizersbertnormalizer--code-normalizer-code","sections":[{"title":"bertNormalizer._tokenize_chinese_chars(text) ⇒ <code> string </code>","local":"bertnormalizertokenizechinesecharstext--code-string-code","sections":[],"depth":3},{"title":"bertNormalizer.stripAccents(text) ⇒ <code> string </code>","local":"bertnormalizerstripaccentstext--code-string-code","sections":[],"depth":3},{"title":"bertNormalizer.normalize(text) ⇒ <code> string </code>","local":"bertnormalizernormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PreTokenizer ⇐ <code> Callable </code>","local":"tokenizerspretokenizer--code-callable-code","sections":[{"title":"preTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"pretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3},{"title":"preTokenizer.pre_tokenize(text, [options]) ⇒ <code> Array. < string > </code>","local":"pretokenizerpretokenizetext-options--code-array--string--code","sections":[],"depth":3},{"title":"preTokenizer._call(text, [options]) ⇒ <code> Array. < string > </code>","local":"pretokenizercalltext-options--code-array--string--code","sections":[],"depth":3},{"title":"PreTokenizer.fromConfig(config) ⇒ <code> PreTokenizer </code>","local":"pretokenizerfromconfigconfig--code-pretokenizer-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BertPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizersbertpretokenizer--code-pretokenizer-code","sections":[{"title":"new BertPreTokenizer(config)","local":"new-bertpretokenizerconfig","sections":[],"depth":3},{"title":"bertPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"bertpretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~ByteLevelPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizersbytelevelpretokenizer--code-pretokenizer-code","sections":[{"title":"new ByteLevelPreTokenizer(config)","local":"new-bytelevelpretokenizerconfig","sections":[],"depth":3},{"title":"byteLevelPreTokenizer.add_prefix_space : <code> boolean </code>","local":"bytelevelpretokenizeraddprefixspace--code-boolean-code","sections":[],"depth":3},{"title":"byteLevelPreTokenizer.trim_offsets : <code> boolean </code>","local":"bytelevelpretokenizertrimoffsets--code-boolean-code","sections":[],"depth":3},{"title":"byteLevelPreTokenizer.use_regex : <code> boolean </code>","local":"bytelevelpretokenizeruseregex--code-boolean-code","sections":[],"depth":3},{"title":"byteLevelPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"bytelevelpretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~SplitPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizerssplitpretokenizer--code-pretokenizer-code","sections":[{"title":"new SplitPreTokenizer(config)","local":"new-splitpretokenizerconfig","sections":[],"depth":3},{"title":"splitPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"splitpretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PunctuationPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizerspunctuationpretokenizer--code-pretokenizer-code","sections":[{"title":"new PunctuationPreTokenizer(config)","local":"new-punctuationpretokenizerconfig","sections":[],"depth":3},{"title":"punctuationPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"punctuationpretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~DigitsPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizersdigitspretokenizer--code-pretokenizer-code","sections":[{"title":"new DigitsPreTokenizer(config)","local":"new-digitspretokenizerconfig","sections":[],"depth":3},{"title":"digitsPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"digitspretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PostProcessor ⇐ <code> Callable </code>","local":"tokenizerspostprocessor--code-callable-code","sections":[{"title":"new PostProcessor(config)","local":"new-postprocessorconfig","sections":[],"depth":3},{"title":"postProcessor.post_process(tokens, ...args) ⇒ <code> PostProcessedOutput </code>","local":"postprocessorpostprocesstokens-args--code-postprocessedoutput-code","sections":[],"depth":3},{"title":"postProcessor._call(tokens, ...args) ⇒ <code> PostProcessedOutput </code>","local":"postprocessorcalltokens-args--code-postprocessedoutput-code","sections":[],"depth":3},{"title":"PostProcessor.fromConfig(config) ⇒ <code> PostProcessor </code>","local":"postprocessorfromconfigconfig--code-postprocessor-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BertProcessing","local":"tokenizersbertprocessing","sections":[{"title":"new BertProcessing(config)","local":"new-bertprocessingconfig","sections":[],"depth":3},{"title":"bertProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>","local":"bertprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~TemplateProcessing ⇐ <code> PostProcessor </code>","local":"tokenizerstemplateprocessing--code-postprocessor-code","sections":[{"title":"new TemplateProcessing(config)","local":"new-templateprocessingconfig","sections":[],"depth":3},{"title":"templateProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>","local":"templateprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~ByteLevelPostProcessor ⇐ <code> PostProcessor </code>","local":"tokenizersbytelevelpostprocessor--code-postprocessor-code","sections":[{"title":"byteLevelPostProcessor.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>","local":"bytelevelpostprocessorpostprocesstokens-tokenspair--code-postprocessedoutput-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PostProcessorSequence","local":"tokenizerspostprocessorsequence","sections":[{"title":"new PostProcessorSequence(config)","local":"new-postprocessorsequenceconfig","sections":[],"depth":3},{"title":"postProcessorSequence.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>","local":"postprocessorsequencepostprocesstokens-tokenspair--code-postprocessedoutput-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Decoder ⇐ <code> Callable </code>","local":"tokenizersdecoder--code-callable-code","sections":[{"title":"new Decoder(config)","local":"new-decoderconfig","sections":[],"depth":3},{"title":"decoder.added_tokens : <code> Array. < AddedToken > </code>","local":"decoderaddedtokens--code-array--addedtoken--code","sections":[],"depth":3},{"title":"decoder._call(tokens) ⇒ <code> string </code>","local":"decodercalltokens--code-string-code","sections":[],"depth":3},{"title":"decoder.decode(tokens) ⇒ <code> string </code>","local":"decoderdecodetokens--code-string-code","sections":[],"depth":3},{"title":"decoder.decode_chain(tokens) ⇒ <code> Array. < string > </code>","local":"decoderdecodechaintokens--code-array--string--code","sections":[],"depth":3},{"title":"Decoder.fromConfig(config) ⇒ <code> Decoder </code>","local":"decoderfromconfigconfig--code-decoder-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~FuseDecoder","local":"tokenizersfusedecoder","sections":[{"title":"fuseDecoder.decode_chain() : <code> * </code>","local":"fusedecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~WordPieceDecoder ⇐ <code> Decoder </code>","local":"tokenizerswordpiecedecoder--code-decoder-code","sections":[{"title":"new WordPieceDecoder(config)","local":"new-wordpiecedecoderconfig","sections":[],"depth":3},{"title":"wordPieceDecoder.decode_chain() : <code> * </code>","local":"wordpiecedecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~ByteLevelDecoder ⇐ <code> Decoder </code>","local":"tokenizersbyteleveldecoder--code-decoder-code","sections":[{"title":"new ByteLevelDecoder(config)","local":"new-byteleveldecoderconfig","sections":[],"depth":3},{"title":"byteLevelDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code>","local":"byteleveldecoderconverttokenstostringtokens--code-string-code","sections":[],"depth":3},{"title":"byteLevelDecoder.decode_chain() : <code> * </code>","local":"byteleveldecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~CTCDecoder","local":"tokenizersctcdecoder","sections":[{"title":"ctcDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code>","local":"ctcdecoderconverttokenstostringtokens--code-string-code","sections":[],"depth":3},{"title":"ctcDecoder.decode_chain() : <code> * </code>","local":"ctcdecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~DecoderSequence ⇐ <code> Decoder </code>","local":"tokenizersdecodersequence--code-decoder-code","sections":[{"title":"new DecoderSequence(config)","local":"new-decodersequenceconfig","sections":[],"depth":3},{"title":"decoderSequence.decode_chain() : <code> * </code>","local":"decodersequencedecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~MetaspacePreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizersmetaspacepretokenizer--code-pretokenizer-code","sections":[{"title":"new MetaspacePreTokenizer(config)","local":"new-metaspacepretokenizerconfig","sections":[],"depth":3},{"title":"metaspacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"metaspacepretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~MetaspaceDecoder ⇐ <code> Decoder </code>","local":"tokenizersmetaspacedecoder--code-decoder-code","sections":[{"title":"new MetaspaceDecoder(config)","local":"new-metaspacedecoderconfig","sections":[],"depth":3},{"title":"metaspaceDecoder.decode_chain() : <code> * </code>","local":"metaspacedecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Precompiled ⇐ <code> Normalizer </code>","local":"tokenizersprecompiled--code-normalizer-code","sections":[{"title":"new Precompiled(config)","local":"new-precompiledconfig","sections":[],"depth":3},{"title":"precompiled.normalize(text) ⇒ <code> string </code>","local":"precompilednormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PreTokenizerSequence ⇐ <code> PreTokenizer </code>","local":"tokenizerspretokenizersequence--code-pretokenizer-code","sections":[{"title":"new PreTokenizerSequence(config)","local":"new-pretokenizersequenceconfig","sections":[],"depth":3},{"title":"preTokenizerSequence.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"pretokenizersequencepretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~WhitespacePreTokenizer","local":"tokenizerswhitespacepretokenizer","sections":[{"title":"new WhitespacePreTokenizer(config)","local":"new-whitespacepretokenizerconfig","sections":[],"depth":3},{"title":"whitespacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"whitespacepretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~WhitespaceSplit ⇐ <code> PreTokenizer </code>","local":"tokenizerswhitespacesplit--code-pretokenizer-code","sections":[{"title":"new WhitespaceSplit(config)","local":"new-whitespacesplitconfig","sections":[],"depth":3},{"title":"whitespaceSplit.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"whitespacesplitpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~ReplacePreTokenizer","local":"tokenizersreplacepretokenizer","sections":[{"title":"new ReplacePreTokenizer(config)","local":"new-replacepretokenizerconfig","sections":[],"depth":3},{"title":"replacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"replacepretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BYTES_TO_UNICODE ⇒ <code> Object </code>","local":"tokenizersbytestounicode--code-object-code","sections":[],"depth":2},{"title":"tokenizers~loadTokenizer(pretrained_model_name_or_path, options) ⇒ <code> Promise. < Array < any > > </code>","local":"tokenizersloadtokenizerpretrainedmodelnameorpath-options--code-promise--array--any---code","sections":[],"depth":2},{"title":"tokenizers~regexSplit(text, regex) ⇒ <code> Array. < string > </code>","local":"tokenizersregexsplittext-regex--code-array--string--code","sections":[],"depth":2},{"title":"tokenizers~createPattern(pattern, invert) ⇒ <code> RegExp </code> | <code> null </code>","local":"tokenizerscreatepatternpattern-invert--code-regexp-code--code-null-code","sections":[],"depth":2},{"title":"tokenizers~objectToMap(obj) ⇒ <code> Map. < string, any > </code>","local":"tokenizersobjecttomapobj--code-map--string-any--code","sections":[],"depth":2},{"title":"tokenizers~prepareTensorForDecode(tensor) ⇒ <code> Array. < number > </code>","local":"tokenizerspreparetensorfordecodetensor--code-array--number--code","sections":[],"depth":2},{"title":"tokenizers~clean_up_tokenization(text) ⇒ <code> string </code>","local":"tokenizerscleanuptokenizationtext--code-string-code","sections":[],"depth":2},{"title":"tokenizers~remove_accents(text) ⇒ <code> string </code>","local":"tokenizersremoveaccentstext--code-string-code","sections":[],"depth":2},{"title":"tokenizers~lowercase_and_remove_accent(text) ⇒ <code> string </code>","local":"tokenizerslowercaseandremoveaccenttext--code-string-code","sections":[],"depth":2},{"title":"tokenizers~whitespace_split(text) ⇒ <code> Array. < string > </code>","local":"tokenizerswhitespacesplittext--code-array--string--code","sections":[],"depth":2},{"title":"tokenizers~PretrainedTokenizerOptions : <code> Object </code>","local":"tokenizerspretrainedtokenizeroptions--code-object-code","sections":[],"depth":2},{"title":"tokenizers~BPENode : <code> Object </code>","local":"tokenizersbpenode--code-object-code","sections":[],"depth":2},{"title":"tokenizers~SplitDelimiterBehavior : <code> ’ removed ’ </code> | <code> ’ isolated ’ </code> | <code> ’ mergedWithPrevious ’ </code> | <code> ’ mergedWithNext ’ </code> | <code> ’ contiguous ’ </code>","local":"tokenizerssplitdelimiterbehavior--code--removed--code--code--isolated--code--code--mergedwithprevious--code--code--mergedwithnext--code--code--contiguous--code","sections":[],"depth":2},{"title":"tokenizers~PostProcessedOutput : <code> Object </code>","local":"tokenizerspostprocessedoutput--code-object-code","sections":[],"depth":2},{"title":"tokenizers~EncodingSingle : <code> Object </code>","local":"tokenizersencodingsingle--code-object-code","sections":[],"depth":2},{"title":"tokenizers~Message : <code> Object </code>","local":"tokenizersmessage--code-object-code","sections":[],"depth":2},{"title":"tokenizers~BatchEncoding : <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> Tensor </code>","local":"tokenizersbatchencoding--code-array--number--code--code-array--array--number---code--code-tensor-code","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers.js/pr_1113/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/entry/start.88a6e140.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/scheduler.0219f8bd.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/singletons.c59c6d8d.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/paths.8e090985.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/entry/app.0003020d.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/index.f61edf3b.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/nodes/0.25c65ab2.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/nodes/14.95d1e23c.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/CodeBlock.38e566ae.js"> | |
| <link rel="modulepreload" href="/docs/transformers.js/pr_1113/en/_app/immutable/chunks/EditOnGithub.48fa589f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"tokenizers","local":"tokenizers","sections":[{"title":"tokenizers.TokenizerModel ⇐ <code> Callable </code>","local":"tokenizerstokenizermodel--code-callable-code","sections":[{"title":"new TokenizerModel(config)","local":"new-tokenizermodelconfig","sections":[],"depth":3},{"title":"tokenizerModel.vocab : <code> Array. < string > </code>","local":"tokenizermodelvocab--code-array--string--code","sections":[],"depth":3},{"title":"tokenizerModel.tokens_to_ids : <code> Map. < string, number > </code>","local":"tokenizermodeltokenstoids--code-map--string-number--code","sections":[],"depth":3},{"title":"tokenizerModel.fuse_unk : <code> boolean </code>","local":"tokenizermodelfuseunk--code-boolean-code","sections":[],"depth":3},{"title":"tokenizerModel._call(tokens) ⇒ <code> Array. < string > </code>","local":"tokenizermodelcalltokens--code-array--string--code","sections":[],"depth":3},{"title":"tokenizerModel.encode(tokens) ⇒ <code> Array. < string > </code>","local":"tokenizermodelencodetokens--code-array--string--code","sections":[],"depth":3},{"title":"tokenizerModel.convert_tokens_to_ids(tokens) ⇒ <code> Array. < number > </code>","local":"tokenizermodelconverttokenstoidstokens--code-array--number--code","sections":[],"depth":3},{"title":"tokenizerModel.convert_ids_to_tokens(ids) ⇒ <code> Array. < string > </code>","local":"tokenizermodelconvertidstotokensids--code-array--string--code","sections":[],"depth":3},{"title":"TokenizerModel.fromConfig(config, ...args) ⇒ <code> TokenizerModel </code>","local":"tokenizermodelfromconfigconfig-args--code-tokenizermodel-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.PreTrainedTokenizer","local":"tokenizerspretrainedtokenizer","sections":[{"title":"new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)","local":"new-pretrainedtokenizertokenizerjson-tokenizerconfig","sections":[],"depth":3},{"title":"preTrainedTokenizer.added_tokens : <code> Array. < AddedToken > </code>","local":"pretrainedtokenizeraddedtokens--code-array--addedtoken--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.remove_space : <code> boolean </code>","local":"pretrainedtokenizerremovespace--code-boolean-code","sections":[],"depth":3},{"title":"preTrainedTokenizer._call(text, options) ⇒ <code> BatchEncoding </code>","local":"pretrainedtokenizercalltext-options--code-batchencoding-code","sections":[],"depth":3},{"title":"preTrainedTokenizer._encode_text(text) ⇒ <code> Array < string > </code> | <code> null </code>","local":"pretrainedtokenizerencodetexttext--code-array--string--code--code-null-code","sections":[],"depth":3},{"title":"preTrainedTokenizer._tokenize_helper(text, options) ⇒ <code> * </code>","local":"pretrainedtokenizertokenizehelpertext-options--code--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.tokenize(text, options) ⇒ <code> Array. < string > </code>","local":"pretrainedtokenizertokenizetext-options--code-array--string--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.encode(text, options) ⇒ <code> Array. < number > </code>","local":"pretrainedtokenizerencodetext-options--code-array--number--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ <code> Array. < string > </code>","local":"pretrainedtokenizerbatchdecodebatch-decodeargs--code-array--string--code","sections":[],"depth":3},{"title":"preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ <code> string </code>","local":"pretrainedtokenizerdecodetokenids-decodeargs--code-string-code","sections":[],"depth":3},{"title":"preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ <code> string </code>","local":"pretrainedtokenizerdecodesingletokenids-decodeargs--code-string-code","sections":[],"depth":3},{"title":"preTrainedTokenizer.get_chat_template(options) ⇒ <code> string </code>","local":"pretrainedtokenizergetchattemplateoptions--code-string-code","sections":[],"depth":3},{"title":"preTrainedTokenizer.apply_chat_template(conversation, options) ⇒ <code> string </code> | <code> Tensor </code> | <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> BatchEncoding </code>","local":"pretrainedtokenizerapplychattemplateconversation-options--code-string-code--code-tensor-code--code-array--number--code--code-array--array--number---code--code-batchencoding-code","sections":[],"depth":3},{"title":"PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code>","local":"pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.BertTokenizer ⇐ <code> PreTrainedTokenizer </code>","local":"tokenizersberttokenizer--code-pretrainedtokenizer-code","sections":[],"depth":2},{"title":"tokenizers.AlbertTokenizer ⇐ <code> PreTrainedTokenizer </code>","local":"tokenizersalberttokenizer--code-pretrainedtokenizer-code","sections":[],"depth":2},{"title":"tokenizers.NllbTokenizer","local":"tokenizersnllbtokenizer","sections":[{"title":"nllbTokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code>","local":"nllbtokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.M2M100Tokenizer","local":"tokenizersm2m100tokenizer","sections":[{"title":"m2M100Tokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code>","local":"m2m100tokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.WhisperTokenizer ⇐ <code> PreTrainedTokenizer </code>","local":"tokenizerswhispertokenizer--code-pretrainedtokenizer-code","sections":[{"title":"whisperTokenizer._decode_asr(sequences, options) ⇒ <code> * </code>","local":"whispertokenizerdecodeasrsequences-options--code--code","sections":[],"depth":3},{"title":"whisperTokenizer.decode() : <code> * </code>","local":"whispertokenizerdecode--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.MarianTokenizer","local":"tokenizersmariantokenizer","sections":[{"title":"new MarianTokenizer(tokenizerJSON, tokenizerConfig)","local":"new-mariantokenizertokenizerjson-tokenizerconfig","sections":[],"depth":3},{"title":"marianTokenizer._encode_text(text) ⇒ <code> Array </code>","local":"mariantokenizerencodetexttext--code-array-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.AutoTokenizer","local":"tokenizersautotokenizer","sections":[{"title":"new AutoTokenizer()","local":"new-autotokenizer","sections":[],"depth":3},{"title":"AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code>","local":"autotokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers.is_chinese_char(cp) ⇒ <code> boolean </code>","local":"tokenizersischinesecharcp--code-boolean-code","sections":[],"depth":2},{"title":"tokenizers~AddedToken","local":"tokenizersaddedtoken","sections":[{"title":"new AddedToken(config)","local":"new-addedtokenconfig","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~WordPieceTokenizer ⇐ <code> TokenizerModel </code>","local":"tokenizerswordpiecetokenizer--code-tokenizermodel-code","sections":[{"title":"new WordPieceTokenizer(config)","local":"new-wordpiecetokenizerconfig","sections":[],"depth":3},{"title":"wordPieceTokenizer.tokens_to_ids : <code> Map. < string, number > </code>","local":"wordpiecetokenizertokenstoids--code-map--string-number--code","sections":[],"depth":3},{"title":"wordPieceTokenizer.unk_token_id : <code> number </code>","local":"wordpiecetokenizerunktokenid--code-number-code","sections":[],"depth":3},{"title":"wordPieceTokenizer.unk_token : <code> string </code>","local":"wordpiecetokenizerunktoken--code-string-code","sections":[],"depth":3},{"title":"wordPieceTokenizer.max_input_chars_per_word : <code> number </code>","local":"wordpiecetokenizermaxinputcharsperword--code-number-code","sections":[],"depth":3},{"title":"wordPieceTokenizer.vocab : <code> Array. < string > </code>","local":"wordpiecetokenizervocab--code-array--string--code","sections":[],"depth":3},{"title":"wordPieceTokenizer.encode(tokens) ⇒ <code> Array. < string > </code>","local":"wordpiecetokenizerencodetokens--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Unigram ⇐ <code> TokenizerModel </code>","local":"tokenizersunigram--code-tokenizermodel-code","sections":[{"title":"new Unigram(config, moreConfig)","local":"new-unigramconfig-moreconfig","sections":[],"depth":3},{"title":"unigram.populateNodes(lattice)","local":"unigrampopulatenodeslattice","sections":[],"depth":3},{"title":"unigram.tokenize(normalized) ⇒ <code> Array. < string > </code>","local":"unigramtokenizenormalized--code-array--string--code","sections":[],"depth":3},{"title":"unigram.encode(tokens) ⇒ <code> Array. < string > </code>","local":"unigramencodetokens--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BPE ⇐ <code> TokenizerModel </code>","local":"tokenizersbpe--code-tokenizermodel-code","sections":[{"title":"new BPE(config)","local":"new-bpeconfig","sections":[],"depth":3},{"title":"bpE.tokens_to_ids : <code> Map. < string, number > </code>","local":"bpetokenstoids--code-map--string-number--code","sections":[],"depth":3},{"title":"bpE.merges : <code> * </code>","local":"bpemerges--code--code","sections":[{"title":"merges.config.merges : <code> * </code>","local":"mergesconfigmerges--code--code","sections":[],"depth":4}],"depth":3},{"title":"bpE.cache : <code> Map. < string, Array < string > > </code>","local":"bpecache--code-map--string-array--string---code","sections":[],"depth":3},{"title":"bpE.bpe(token) ⇒ <code> Array. < string > </code>","local":"bpebpetoken--code-array--string--code","sections":[],"depth":3},{"title":"bpE.encode(tokens) ⇒ <code> Array. < string > </code>","local":"bpeencodetokens--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~LegacyTokenizerModel","local":"tokenizerslegacytokenizermodel","sections":[{"title":"new LegacyTokenizerModel(config, moreConfig)","local":"new-legacytokenizermodelconfig-moreconfig","sections":[],"depth":3},{"title":"legacyTokenizerModel.tokens_to_ids : <code> Map. < string, number > </code>","local":"legacytokenizermodeltokenstoids--code-map--string-number--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Normalizer","local":"tokenizersnormalizer","sections":[{"title":"new Normalizer(config)","local":"new-normalizerconfig","sections":[],"depth":3},{"title":"normalizer.normalize(text) ⇒ <code> string </code>","local":"normalizernormalizetext--code-string-code","sections":[],"depth":3},{"title":"normalizer._call(text) ⇒ <code> string </code>","local":"normalizercalltext--code-string-code","sections":[],"depth":3},{"title":"Normalizer.fromConfig(config) ⇒ <code> Normalizer </code>","local":"normalizerfromconfigconfig--code-normalizer-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Replace ⇐ <code> Normalizer </code>","local":"tokenizersreplace--code-normalizer-code","sections":[{"title":"replace.normalize(text) ⇒ <code> string </code>","local":"replacenormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~NFC ⇐ <code> Normalizer </code>","local":"tokenizersnfc--code-normalizer-code","sections":[{"title":"nfC.normalize(text) ⇒ <code> string </code>","local":"nfcnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~NFKC ⇐ <code> Normalizer </code>","local":"tokenizersnfkc--code-normalizer-code","sections":[{"title":"nfkC.normalize(text) ⇒ <code> string </code>","local":"nfkcnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~NFKD ⇐ <code> Normalizer </code>","local":"tokenizersnfkd--code-normalizer-code","sections":[{"title":"nfkD.normalize(text) ⇒ <code> string </code>","local":"nfkdnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~StripNormalizer","local":"tokenizersstripnormalizer","sections":[{"title":"stripNormalizer.normalize(text) ⇒ <code> string </code>","local":"stripnormalizernormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~StripAccents ⇐ <code> Normalizer </code>","local":"tokenizersstripaccents--code-normalizer-code","sections":[{"title":"stripAccents.normalize(text) ⇒ <code> string </code>","local":"stripaccentsnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Lowercase ⇐ <code> Normalizer </code>","local":"tokenizerslowercase--code-normalizer-code","sections":[{"title":"lowercase.normalize(text) ⇒ <code> string </code>","local":"lowercasenormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Prepend ⇐ <code> Normalizer </code>","local":"tokenizersprepend--code-normalizer-code","sections":[{"title":"prepend.normalize(text) ⇒ <code> string </code>","local":"prependnormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~NormalizerSequence ⇐ <code> Normalizer </code>","local":"tokenizersnormalizersequence--code-normalizer-code","sections":[{"title":"new NormalizerSequence(config)","local":"new-normalizersequenceconfig","sections":[],"depth":3},{"title":"normalizerSequence.normalize(text) ⇒ <code> string </code>","local":"normalizersequencenormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BertNormalizer ⇐ <code> Normalizer </code>","local":"tokenizersbertnormalizer--code-normalizer-code","sections":[{"title":"bertNormalizer._tokenize_chinese_chars(text) ⇒ <code> string </code>","local":"bertnormalizertokenizechinesecharstext--code-string-code","sections":[],"depth":3},{"title":"bertNormalizer.stripAccents(text) ⇒ <code> string </code>","local":"bertnormalizerstripaccentstext--code-string-code","sections":[],"depth":3},{"title":"bertNormalizer.normalize(text) ⇒ <code> string </code>","local":"bertnormalizernormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PreTokenizer ⇐ <code> Callable </code>","local":"tokenizerspretokenizer--code-callable-code","sections":[{"title":"preTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"pretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3},{"title":"preTokenizer.pre_tokenize(text, [options]) ⇒ <code> Array. < string > </code>","local":"pretokenizerpretokenizetext-options--code-array--string--code","sections":[],"depth":3},{"title":"preTokenizer._call(text, [options]) ⇒ <code> Array. < string > </code>","local":"pretokenizercalltext-options--code-array--string--code","sections":[],"depth":3},{"title":"PreTokenizer.fromConfig(config) ⇒ <code> PreTokenizer </code>","local":"pretokenizerfromconfigconfig--code-pretokenizer-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BertPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizersbertpretokenizer--code-pretokenizer-code","sections":[{"title":"new BertPreTokenizer(config)","local":"new-bertpretokenizerconfig","sections":[],"depth":3},{"title":"bertPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"bertpretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~ByteLevelPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizersbytelevelpretokenizer--code-pretokenizer-code","sections":[{"title":"new ByteLevelPreTokenizer(config)","local":"new-bytelevelpretokenizerconfig","sections":[],"depth":3},{"title":"byteLevelPreTokenizer.add_prefix_space : <code> boolean </code>","local":"bytelevelpretokenizeraddprefixspace--code-boolean-code","sections":[],"depth":3},{"title":"byteLevelPreTokenizer.trim_offsets : <code> boolean </code>","local":"bytelevelpretokenizertrimoffsets--code-boolean-code","sections":[],"depth":3},{"title":"byteLevelPreTokenizer.use_regex : <code> boolean </code>","local":"bytelevelpretokenizeruseregex--code-boolean-code","sections":[],"depth":3},{"title":"byteLevelPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"bytelevelpretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~SplitPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizerssplitpretokenizer--code-pretokenizer-code","sections":[{"title":"new SplitPreTokenizer(config)","local":"new-splitpretokenizerconfig","sections":[],"depth":3},{"title":"splitPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"splitpretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PunctuationPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizerspunctuationpretokenizer--code-pretokenizer-code","sections":[{"title":"new PunctuationPreTokenizer(config)","local":"new-punctuationpretokenizerconfig","sections":[],"depth":3},{"title":"punctuationPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"punctuationpretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~DigitsPreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizersdigitspretokenizer--code-pretokenizer-code","sections":[{"title":"new DigitsPreTokenizer(config)","local":"new-digitspretokenizerconfig","sections":[],"depth":3},{"title":"digitsPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"digitspretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PostProcessor ⇐ <code> Callable </code>","local":"tokenizerspostprocessor--code-callable-code","sections":[{"title":"new PostProcessor(config)","local":"new-postprocessorconfig","sections":[],"depth":3},{"title":"postProcessor.post_process(tokens, ...args) ⇒ <code> PostProcessedOutput </code>","local":"postprocessorpostprocesstokens-args--code-postprocessedoutput-code","sections":[],"depth":3},{"title":"postProcessor._call(tokens, ...args) ⇒ <code> PostProcessedOutput </code>","local":"postprocessorcalltokens-args--code-postprocessedoutput-code","sections":[],"depth":3},{"title":"PostProcessor.fromConfig(config) ⇒ <code> PostProcessor </code>","local":"postprocessorfromconfigconfig--code-postprocessor-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BertProcessing","local":"tokenizersbertprocessing","sections":[{"title":"new BertProcessing(config)","local":"new-bertprocessingconfig","sections":[],"depth":3},{"title":"bertProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>","local":"bertprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~TemplateProcessing ⇐ <code> PostProcessor </code>","local":"tokenizerstemplateprocessing--code-postprocessor-code","sections":[{"title":"new TemplateProcessing(config)","local":"new-templateprocessingconfig","sections":[],"depth":3},{"title":"templateProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>","local":"templateprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~ByteLevelPostProcessor ⇐ <code> PostProcessor </code>","local":"tokenizersbytelevelpostprocessor--code-postprocessor-code","sections":[{"title":"byteLevelPostProcessor.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>","local":"bytelevelpostprocessorpostprocesstokens-tokenspair--code-postprocessedoutput-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PostProcessorSequence","local":"tokenizerspostprocessorsequence","sections":[{"title":"new PostProcessorSequence(config)","local":"new-postprocessorsequenceconfig","sections":[],"depth":3},{"title":"postProcessorSequence.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code>","local":"postprocessorsequencepostprocesstokens-tokenspair--code-postprocessedoutput-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Decoder ⇐ <code> Callable </code>","local":"tokenizersdecoder--code-callable-code","sections":[{"title":"new Decoder(config)","local":"new-decoderconfig","sections":[],"depth":3},{"title":"decoder.added_tokens : <code> Array. < AddedToken > </code>","local":"decoderaddedtokens--code-array--addedtoken--code","sections":[],"depth":3},{"title":"decoder._call(tokens) ⇒ <code> string </code>","local":"decodercalltokens--code-string-code","sections":[],"depth":3},{"title":"decoder.decode(tokens) ⇒ <code> string </code>","local":"decoderdecodetokens--code-string-code","sections":[],"depth":3},{"title":"decoder.decode_chain(tokens) ⇒ <code> Array. < string > </code>","local":"decoderdecodechaintokens--code-array--string--code","sections":[],"depth":3},{"title":"Decoder.fromConfig(config) ⇒ <code> Decoder </code>","local":"decoderfromconfigconfig--code-decoder-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~FuseDecoder","local":"tokenizersfusedecoder","sections":[{"title":"fuseDecoder.decode_chain() : <code> * </code>","local":"fusedecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~WordPieceDecoder ⇐ <code> Decoder </code>","local":"tokenizerswordpiecedecoder--code-decoder-code","sections":[{"title":"new WordPieceDecoder(config)","local":"new-wordpiecedecoderconfig","sections":[],"depth":3},{"title":"wordPieceDecoder.decode_chain() : <code> * </code>","local":"wordpiecedecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~ByteLevelDecoder ⇐ <code> Decoder </code>","local":"tokenizersbyteleveldecoder--code-decoder-code","sections":[{"title":"new ByteLevelDecoder(config)","local":"new-byteleveldecoderconfig","sections":[],"depth":3},{"title":"byteLevelDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code>","local":"byteleveldecoderconverttokenstostringtokens--code-string-code","sections":[],"depth":3},{"title":"byteLevelDecoder.decode_chain() : <code> * </code>","local":"byteleveldecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~CTCDecoder","local":"tokenizersctcdecoder","sections":[{"title":"ctcDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code>","local":"ctcdecoderconverttokenstostringtokens--code-string-code","sections":[],"depth":3},{"title":"ctcDecoder.decode_chain() : <code> * </code>","local":"ctcdecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~DecoderSequence ⇐ <code> Decoder </code>","local":"tokenizersdecodersequence--code-decoder-code","sections":[{"title":"new DecoderSequence(config)","local":"new-decodersequenceconfig","sections":[],"depth":3},{"title":"decoderSequence.decode_chain() : <code> * </code>","local":"decodersequencedecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~MetaspacePreTokenizer ⇐ <code> PreTokenizer </code>","local":"tokenizersmetaspacepretokenizer--code-pretokenizer-code","sections":[{"title":"new MetaspacePreTokenizer(config)","local":"new-metaspacepretokenizerconfig","sections":[],"depth":3},{"title":"metaspacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"metaspacepretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~MetaspaceDecoder ⇐ <code> Decoder </code>","local":"tokenizersmetaspacedecoder--code-decoder-code","sections":[{"title":"new MetaspaceDecoder(config)","local":"new-metaspacedecoderconfig","sections":[],"depth":3},{"title":"metaspaceDecoder.decode_chain() : <code> * </code>","local":"metaspacedecoderdecodechain--code--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~Precompiled ⇐ <code> Normalizer </code>","local":"tokenizersprecompiled--code-normalizer-code","sections":[{"title":"new Precompiled(config)","local":"new-precompiledconfig","sections":[],"depth":3},{"title":"precompiled.normalize(text) ⇒ <code> string </code>","local":"precompilednormalizetext--code-string-code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~PreTokenizerSequence ⇐ <code> PreTokenizer </code>","local":"tokenizerspretokenizersequence--code-pretokenizer-code","sections":[{"title":"new PreTokenizerSequence(config)","local":"new-pretokenizersequenceconfig","sections":[],"depth":3},{"title":"preTokenizerSequence.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"pretokenizersequencepretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~WhitespacePreTokenizer","local":"tokenizerswhitespacepretokenizer","sections":[{"title":"new WhitespacePreTokenizer(config)","local":"new-whitespacepretokenizerconfig","sections":[],"depth":3},{"title":"whitespacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"whitespacepretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~WhitespaceSplit ⇐ <code> PreTokenizer </code>","local":"tokenizerswhitespacesplit--code-pretokenizer-code","sections":[{"title":"new WhitespaceSplit(config)","local":"new-whitespacesplitconfig","sections":[],"depth":3},{"title":"whitespaceSplit.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"whitespacesplitpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~ReplacePreTokenizer","local":"tokenizersreplacepretokenizer","sections":[{"title":"new ReplacePreTokenizer(config)","local":"new-replacepretokenizerconfig","sections":[],"depth":3},{"title":"replacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code>","local":"replacepretokenizerpretokenizetexttext-options--code-array--string--code","sections":[],"depth":3}],"depth":2},{"title":"tokenizers~BYTES_TO_UNICODE ⇒ <code> Object </code>","local":"tokenizersbytestounicode--code-object-code","sections":[],"depth":2},{"title":"tokenizers~loadTokenizer(pretrained_model_name_or_path, options) ⇒ <code> Promise. < Array < any > > </code>","local":"tokenizersloadtokenizerpretrainedmodelnameorpath-options--code-promise--array--any---code","sections":[],"depth":2},{"title":"tokenizers~regexSplit(text, regex) ⇒ <code> Array. < string > </code>","local":"tokenizersregexsplittext-regex--code-array--string--code","sections":[],"depth":2},{"title":"tokenizers~createPattern(pattern, invert) ⇒ <code> RegExp </code> | <code> null </code>","local":"tokenizerscreatepatternpattern-invert--code-regexp-code--code-null-code","sections":[],"depth":2},{"title":"tokenizers~objectToMap(obj) ⇒ <code> Map. < string, any > </code>","local":"tokenizersobjecttomapobj--code-map--string-any--code","sections":[],"depth":2},{"title":"tokenizers~prepareTensorForDecode(tensor) ⇒ <code> Array. < number > </code>","local":"tokenizerspreparetensorfordecodetensor--code-array--number--code","sections":[],"depth":2},{"title":"tokenizers~clean_up_tokenization(text) ⇒ <code> string </code>","local":"tokenizerscleanuptokenizationtext--code-string-code","sections":[],"depth":2},{"title":"tokenizers~remove_accents(text) ⇒ <code> string </code>","local":"tokenizersremoveaccentstext--code-string-code","sections":[],"depth":2},{"title":"tokenizers~lowercase_and_remove_accent(text) ⇒ <code> string </code>","local":"tokenizerslowercaseandremoveaccenttext--code-string-code","sections":[],"depth":2},{"title":"tokenizers~whitespace_split(text) ⇒ <code> Array. < string > </code>","local":"tokenizerswhitespacesplittext--code-array--string--code","sections":[],"depth":2},{"title":"tokenizers~PretrainedTokenizerOptions : <code> Object </code>","local":"tokenizerspretrainedtokenizeroptions--code-object-code","sections":[],"depth":2},{"title":"tokenizers~BPENode : <code> Object </code>","local":"tokenizersbpenode--code-object-code","sections":[],"depth":2},{"title":"tokenizers~SplitDelimiterBehavior : <code> ’ removed ’ </code> | <code> ’ isolated ’ </code> | <code> ’ mergedWithPrevious ’ </code> | <code> ’ mergedWithNext ’ </code> | <code> ’ contiguous ’ </code>","local":"tokenizerssplitdelimiterbehavior--code--removed--code--code--isolated--code--code--mergedwithprevious--code--code--mergedwithnext--code--code--contiguous--code","sections":[],"depth":2},{"title":"tokenizers~PostProcessedOutput : <code> Object </code>","local":"tokenizerspostprocessedoutput--code-object-code","sections":[],"depth":2},{"title":"tokenizers~EncodingSingle : <code> Object </code>","local":"tokenizersencodingsingle--code-object-code","sections":[],"depth":2},{"title":"tokenizers~Message : <code> Object </code>","local":"tokenizersmessage--code-object-code","sections":[],"depth":2},{"title":"tokenizers~BatchEncoding : <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> Tensor </code>","local":"tokenizersbatchencoding--code-array--number--code--code-array--array--number---code--code-tensor-code","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <a id="module_tokenizers" class="group"></a> <h1 class="relative group"><a id="tokenizers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers</span></h1> <p data-svelte-h="svelte-ost35k">Tokenizers are used to prepare textual inputs for a model.</p> <p data-svelte-h="svelte-gbl9g8"><strong>Example:</strong> Create an <code>AutoTokenizer</code> and use it to tokenize a sentence. | |
| This will automatically detect the tokenizer type based on the tokenizer class defined in <code>tokenizer.json</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">'@huggingface/transformers'</span>; | |
| <span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">'Xenova/bert-base-uncased'</span>); | |
| <span class="hljs-keyword">const</span> { input_ids } = <span class="hljs-keyword">await</span> <span class="hljs-title function_">tokenizer</span>(<span class="hljs-string">'I love transformers!'</span>); | |
| <span class="hljs-comment">// Tensor {</span> | |
| <span class="hljs-comment">// data: BigInt64Array(6) [101n, 1045n, 2293n, 19081n, 999n, 102n],</span> | |
| <span class="hljs-comment">// dims: [1, 6],</span> | |
| <span class="hljs-comment">// type: 'int64',</span> | |
| <span class="hljs-comment">// size: 6,</span> | |
| <span class="hljs-comment">// }</span><!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-nkk3er"><li><a href="#module_tokenizers">tokenizers</a><ul><li><em>static</em><ul><li><a href="#module_tokenizers.TokenizerModel">.TokenizerModel</a> ⇐ <a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers.TokenizerModel_new"><code>new TokenizerModel(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers.TokenizerModel+vocab"><code>.vocab</code></a> : <code>Array.<string></code></li> <li><a href="#module_tokenizers.TokenizerModel+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.<string, number></code></li> <li><a href="#module_tokenizers.TokenizerModel+fuse_unk"><code>.fuse_unk</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers.TokenizerModel+_call"><code>._call(tokens)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers.TokenizerModel+encode"><code>.encode(tokens)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers.TokenizerModel+convert_tokens_to_ids"><code>.convert_tokens_to_ids(tokens)</code></a> ⇒ <code>Array.<number></code></li> <li><a href="#module_tokenizers.TokenizerModel+convert_ids_to_tokens"><code>.convert_ids_to_tokens(ids)</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers.TokenizerModel.fromConfig"><code>.fromConfig(config, ...args)</code></a> ⇒ <code>TokenizerModel</code></li></ul></li></ul></li> <li><a href="#module_tokenizers.PreTrainedTokenizer">.PreTrainedTokenizer</a><ul><li><a href="#new_module_tokenizers.PreTrainedTokenizer_new"><code>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers.PreTrainedTokenizer+added_tokens"><code>.added_tokens</code></a> : <code>Array.<AddedToken></code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+remove_space"><code>.remove_space</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_call"><code>._call(text, options)</code></a> ⇒ <code>BatchEncoding</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_encode_text"><code>._encode_text(text)</code></a> ⇒ <code>Array<string></code> | <code>null</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_tokenize_helper"><code>._tokenize_helper(text, options)</code></a> ⇒ <code>*</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+tokenize"><code>.tokenize(text, options)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+encode"><code>.encode(text, options)</code></a> ⇒ <code>Array.<number></code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+batch_decode"><code>.batch_decode(batch, decode_args)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode"><code>.decode(token_ids, [decode_args])</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode_single"><code>.decode_single(token_ids, decode_args)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+get_chat_template"><code>.get_chat_template(options)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+apply_chat_template"><code>.apply_chat_template(conversation, options)</code></a> ⇒ <code>string</code> | <a href="#Tensor"><code>Tensor</code></a> | <code>Array<number></code> | <code>Array<Array<number>></code> | <code>BatchEncoding</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers.PreTrainedTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a> ⇒ <code>Promise.<PreTrainedTokenizer></code></li></ul></li></ul></li> <li><a href="#module_tokenizers.BertTokenizer">.BertTokenizer</a> ⇐ <code>PreTrainedTokenizer</code></li> <li><a href="#module_tokenizers.AlbertTokenizer">.AlbertTokenizer</a> ⇐ <code>PreTrainedTokenizer</code></li> <li><a href="#module_tokenizers.NllbTokenizer">.NllbTokenizer</a><ul><li><a href="#module_tokenizers.NllbTokenizer+_build_translation_inputs"><code>._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs)</code></a> ⇒ <code>Object</code></li></ul></li> <li><a href="#module_tokenizers.M2M100Tokenizer">.M2M100Tokenizer</a><ul><li><a href="#module_tokenizers.M2M100Tokenizer+_build_translation_inputs"><code>._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs)</code></a> ⇒ <code>Object</code></li></ul></li> <li><a href="#module_tokenizers.WhisperTokenizer">.WhisperTokenizer</a> ⇐ <code>PreTrainedTokenizer</code><ul><li><a href="#module_tokenizers.WhisperTokenizer+_decode_asr"><code>._decode_asr(sequences, options)</code></a> ⇒ <code>*</code></li> <li><a href="#module_tokenizers.WhisperTokenizer+decode"><code>.decode()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers.MarianTokenizer">.MarianTokenizer</a><ul><li><a href="#new_module_tokenizers.MarianTokenizer_new"><code>new MarianTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><a href="#module_tokenizers.MarianTokenizer+_encode_text"><code>._encode_text(text)</code></a> ⇒ <code>Array</code></li></ul></li> <li><a href="#module_tokenizers.AutoTokenizer">.AutoTokenizer</a><ul><li><a href="#new_module_tokenizers.AutoTokenizer_new"><code>new AutoTokenizer()</code></a></li> <li><a href="#module_tokenizers.AutoTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a> ⇒ <code>Promise.<PreTrainedTokenizer></code></li></ul></li> <li><a href="#module_tokenizers.is_chinese_char"><code>.is_chinese_char(cp)</code></a> ⇒ <code>boolean</code></li></ul></li> <li><em>inner</em><ul><li><a href="#module_tokenizers..AddedToken">~AddedToken</a><ul><li><a href="#new_module_tokenizers..AddedToken_new"><code>new AddedToken(config)</code></a></li></ul></li> <li><a href="#module_tokenizers..WordPieceTokenizer">~WordPieceTokenizer</a> ⇐ <code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..WordPieceTokenizer_new"><code>new WordPieceTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..WordPieceTokenizer+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.<string, number></code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+unk_token_id"><code>.unk_token_id</code></a> : <code>number</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+unk_token"><code>.unk_token</code></a> : <code>string</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+max_input_chars_per_word"><code>.max_input_chars_per_word</code></a> : <code>number</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+vocab"><code>.vocab</code></a> : <code>Array.<string></code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+encode"><code>.encode(tokens)</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..Unigram">~Unigram</a> ⇐ <code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..Unigram_new"><code>new Unigram(config, moreConfig)</code></a></li> <li><a href="#module_tokenizers..Unigram+populateNodes"><code>.populateNodes(lattice)</code></a></li> <li><a href="#module_tokenizers..Unigram+tokenize"><code>.tokenize(normalized)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers..Unigram+encode"><code>.encode(tokens)</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..BPE">~BPE</a> ⇐ <code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..BPE_new"><code>new BPE(config)</code></a></li> <li><a href="#module_tokenizers..BPE+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.<string, number></code></li> <li><a href="#module_tokenizers..BPE+merges"><code>.merges</code></a> : <code>*</code><ul><li><a href="#module_tokenizers..BPE+merges.config.merges"><code>.config.merges</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..BPE+cache"><code>.cache</code></a> : <code>Map.<string, Array<string>></code></li> <li><a href="#module_tokenizers..BPE+bpe"><code>.bpe(token)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers..BPE+encode"><code>.encode(tokens)</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..LegacyTokenizerModel">~LegacyTokenizerModel</a><ul><li><a href="#new_module_tokenizers..LegacyTokenizerModel_new"><code>new LegacyTokenizerModel(config, moreConfig)</code></a></li> <li><a href="#module_tokenizers..LegacyTokenizerModel+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.<string, number></code></li></ul></li> <li><em><a href="#module_tokenizers..Normalizer">~Normalizer</a></em><ul><li><em><a href="#new_module_tokenizers..Normalizer_new"><code>new Normalizer(config)</code></a></em></li> <li><em>instance</em><ul><li><strong><a href="#module_tokenizers..Normalizer+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></strong></li> <li><em><a href="#module_tokenizers..Normalizer+_call"><code>._call(text)</code></a> ⇒ <code>string</code></em></li></ul></li> <li><em>static</em><ul><li><em><a href="#module_tokenizers..Normalizer.fromConfig"><code>.fromConfig(config)</code></a> ⇒ <code>Normalizer</code></em></li></ul></li></ul></li> <li><a href="#module_tokenizers..Replace">~Replace</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..Replace+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..NFC">~NFC</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..NFC+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..NFKC">~NFKC</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..NFKC+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..NFKD">~NFKD</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..NFKD+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..StripNormalizer">~StripNormalizer</a><ul><li><a href="#module_tokenizers..StripNormalizer+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..StripAccents">~StripAccents</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..StripAccents+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..Lowercase">~Lowercase</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..Lowercase+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..Prepend">~Prepend</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..Prepend+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..NormalizerSequence">~NormalizerSequence</a> ⇐ <code>Normalizer</code><ul><li><a href="#new_module_tokenizers..NormalizerSequence_new"><code>new NormalizerSequence(config)</code></a></li> <li><a href="#module_tokenizers..NormalizerSequence+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..BertNormalizer">~BertNormalizer</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..BertNormalizer+_tokenize_chinese_chars"><code>._tokenize_chinese_chars(text)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..BertNormalizer+stripAccents"><code>.stripAccents(text)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..BertNormalizer+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..PreTokenizer">~PreTokenizer</a> ⇐ <a href="#Callable"><code>Callable</code></a><ul><li><em>instance</em><ul><li><em><a href="#module_tokenizers..PreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></em></li> <li><a href="#module_tokenizers..PreTokenizer+pre_tokenize"><code>.pre_tokenize(text, [options])</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers..PreTokenizer+_call"><code>._call(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..PreTokenizer.fromConfig"><code>.fromConfig(config)</code></a> ⇒ <code>PreTokenizer</code></li></ul></li></ul></li> <li><a href="#module_tokenizers..BertPreTokenizer">~BertPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..BertPreTokenizer_new"><code>new BertPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..BertPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer">~ByteLevelPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..ByteLevelPreTokenizer_new"><code>new ByteLevelPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+add_prefix_space"><code>.add_prefix_space</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+trim_offsets"><code>.trim_offsets</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+use_regex"><code>.use_regex</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..SplitPreTokenizer">~SplitPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..SplitPreTokenizer_new"><code>new SplitPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..SplitPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..PunctuationPreTokenizer">~PunctuationPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..PunctuationPreTokenizer_new"><code>new PunctuationPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..PunctuationPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..DigitsPreTokenizer">~DigitsPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..DigitsPreTokenizer_new"><code>new DigitsPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..DigitsPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..PostProcessor">~PostProcessor</a> ⇐ <a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers..PostProcessor_new"><code>new PostProcessor(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers..PostProcessor+post_process"><code>.post_process(tokens, ...args)</code></a> ⇒ <code>PostProcessedOutput</code></li> <li><a href="#module_tokenizers..PostProcessor+_call"><code>._call(tokens, ...args)</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..PostProcessor.fromConfig"><code>.fromConfig(config)</code></a> ⇒ <code>PostProcessor</code></li></ul></li></ul></li> <li><a href="#module_tokenizers..BertProcessing">~BertProcessing</a><ul><li><a href="#new_module_tokenizers..BertProcessing_new"><code>new BertProcessing(config)</code></a></li> <li><a href="#module_tokenizers..BertProcessing+post_process"><code>.post_process(tokens, [tokens_pair])</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li> <li><a href="#module_tokenizers..TemplateProcessing">~TemplateProcessing</a> ⇐ <code>PostProcessor</code><ul><li><a href="#new_module_tokenizers..TemplateProcessing_new"><code>new TemplateProcessing(config)</code></a></li> <li><a href="#module_tokenizers..TemplateProcessing+post_process"><code>.post_process(tokens, [tokens_pair])</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li> <li><a href="#module_tokenizers..ByteLevelPostProcessor">~ByteLevelPostProcessor</a> ⇐ <code>PostProcessor</code><ul><li><a href="#module_tokenizers..ByteLevelPostProcessor+post_process"><code>.post_process(tokens, [tokens_pair])</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li> <li><a href="#module_tokenizers..PostProcessorSequence">~PostProcessorSequence</a><ul><li><a href="#new_module_tokenizers..PostProcessorSequence_new"><code>new PostProcessorSequence(config)</code></a></li> <li><a href="#module_tokenizers..PostProcessorSequence+post_process"><code>.post_process(tokens, [tokens_pair])</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li> <li><a href="#module_tokenizers..Decoder">~Decoder</a> ⇐ <a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers..Decoder_new"><code>new Decoder(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers..Decoder+added_tokens"><code>.added_tokens</code></a> : <code>Array.<AddedToken></code></li> <li><a href="#module_tokenizers..Decoder+_call"><code>._call(tokens)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..Decoder+decode"><code>.decode(tokens)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..Decoder+decode_chain"><code>.decode_chain(tokens)</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..Decoder.fromConfig"><code>.fromConfig(config)</code></a> ⇒ <code>Decoder</code></li></ul></li></ul></li> <li><a href="#module_tokenizers..FuseDecoder">~FuseDecoder</a><ul><li><a href="#module_tokenizers..FuseDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..WordPieceDecoder">~WordPieceDecoder</a> ⇐ <code>Decoder</code><ul><li><a href="#new_module_tokenizers..WordPieceDecoder_new"><code>new WordPieceDecoder(config)</code></a></li> <li><a href="#module_tokenizers..WordPieceDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..ByteLevelDecoder">~ByteLevelDecoder</a> ⇐ <code>Decoder</code><ul><li><a href="#new_module_tokenizers..ByteLevelDecoder_new"><code>new ByteLevelDecoder(config)</code></a></li> <li><a href="#module_tokenizers..ByteLevelDecoder+convert_tokens_to_string"><code>.convert_tokens_to_string(tokens)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..ByteLevelDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..CTCDecoder">~CTCDecoder</a><ul><li><a href="#module_tokenizers..CTCDecoder+convert_tokens_to_string"><code>.convert_tokens_to_string(tokens)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..CTCDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..DecoderSequence">~DecoderSequence</a> ⇐ <code>Decoder</code><ul><li><a href="#new_module_tokenizers..DecoderSequence_new"><code>new DecoderSequence(config)</code></a></li> <li><a href="#module_tokenizers..DecoderSequence+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..MetaspacePreTokenizer">~MetaspacePreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..MetaspacePreTokenizer_new"><code>new MetaspacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..MetaspacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..MetaspaceDecoder">~MetaspaceDecoder</a> ⇐ <code>Decoder</code><ul><li><a href="#new_module_tokenizers..MetaspaceDecoder_new"><code>new MetaspaceDecoder(config)</code></a></li> <li><a href="#module_tokenizers..MetaspaceDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..Precompiled">~Precompiled</a> ⇐ <code>Normalizer</code><ul><li><a href="#new_module_tokenizers..Precompiled_new"><code>new Precompiled(config)</code></a></li> <li><a href="#module_tokenizers..Precompiled+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li> <li><a href="#module_tokenizers..PreTokenizerSequence">~PreTokenizerSequence</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..PreTokenizerSequence_new"><code>new PreTokenizerSequence(config)</code></a></li> <li><a href="#module_tokenizers..PreTokenizerSequence+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..WhitespacePreTokenizer">~WhitespacePreTokenizer</a><ul><li><a href="#new_module_tokenizers..WhitespacePreTokenizer_new"><code>new WhitespacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..WhitespacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..WhitespaceSplit">~WhitespaceSplit</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..WhitespaceSplit_new"><code>new WhitespaceSplit(config)</code></a></li> <li><a href="#module_tokenizers..WhitespaceSplit+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..ReplacePreTokenizer">~ReplacePreTokenizer</a><ul><li><a href="#new_module_tokenizers..ReplacePreTokenizer_new"><code>new ReplacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..ReplacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><a href="#module_tokenizers..BYTES_TO_UNICODE"><code>~BYTES_TO_UNICODE</code></a> ⇒ <code>Object</code></li> <li><a href="#module_tokenizers..loadTokenizer"><code>~loadTokenizer(pretrained_model_name_or_path, options)</code></a> ⇒ <code>Promise.<Array<any>></code></li> <li><a href="#module_tokenizers..regexSplit"><code>~regexSplit(text, regex)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers..createPattern"><code>~createPattern(pattern, invert)</code></a> ⇒ <code>RegExp</code> | <code>null</code></li> <li><a href="#module_tokenizers..objectToMap"><code>~objectToMap(obj)</code></a> ⇒ <code>Map.<string, any></code></li> <li><a href="#module_tokenizers..prepareTensorForDecode"><code>~prepareTensorForDecode(tensor)</code></a> ⇒ <code>Array.<number></code></li> <li><a href="#module_tokenizers..clean_up_tokenization"><code>~clean_up_tokenization(text)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..remove_accents"><code>~remove_accents(text)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..lowercase_and_remove_accent"><code>~lowercase_and_remove_accent(text)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..whitespace_split"><code>~whitespace_split(text)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers..PretrainedTokenizerOptions"><code>~PretrainedTokenizerOptions</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..BPENode"><code>~BPENode</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..SplitDelimiterBehavior"><code>~SplitDelimiterBehavior</code></a> : <code>’removed’</code> | <code>’isolated’</code> | <code>’mergedWithPrevious’</code> | <code>’mergedWithNext’</code> | <code>’contiguous’</code></li> <li><a href="#module_tokenizers..PostProcessedOutput"><code>~PostProcessedOutput</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..EncodingSingle"><code>~EncodingSingle</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..Message"><code>~Message</code></a> : <code>Object</code></li> <li><a href="#module_tokenizers..BatchEncoding"><code>~BatchEncoding</code></a> : <code>Array<number></code> | <code>Array<Array<number>></code> | <a href="#Tensor"><code>Tensor</code></a></li></ul></li></ul></li></ul> <hr> <a id="module_tokenizers.TokenizerModel" class="group"></a> <h2 class="relative group"><a id="tokenizerstokenizermodel--code-callable-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerstokenizermodel--code-callable-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.TokenizerModel ⇐ <code> Callable </code></span></h2> <p data-svelte-h="svelte-6wtrbd">Abstract base class for tokenizer models.</p> <p data-svelte-h="svelte-1ut0prd"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <a href="#Callable"><code>Callable</code></a></p> <ul data-svelte-h="svelte-1b39pjq"><li><a href="#module_tokenizers.TokenizerModel">.TokenizerModel</a> ⇐ <a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers.TokenizerModel_new"><code>new TokenizerModel(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers.TokenizerModel+vocab"><code>.vocab</code></a> : <code>Array.<string></code></li> <li><a href="#module_tokenizers.TokenizerModel+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.<string, number></code></li> <li><a href="#module_tokenizers.TokenizerModel+fuse_unk"><code>.fuse_unk</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers.TokenizerModel+_call"><code>._call(tokens)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers.TokenizerModel+encode"><code>.encode(tokens)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers.TokenizerModel+convert_tokens_to_ids"><code>.convert_tokens_to_ids(tokens)</code></a> ⇒ <code>Array.<number></code></li> <li><a href="#module_tokenizers.TokenizerModel+convert_ids_to_tokens"><code>.convert_ids_to_tokens(ids)</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers.TokenizerModel.fromConfig"><code>.fromConfig(config, ...args)</code></a> ⇒ <code>TokenizerModel</code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers.TokenizerModel_new" class="group"></a> <h3 class="relative group"><a id="new-tokenizermodelconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-tokenizermodelconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new TokenizerModel(config)</span></h3> <p data-svelte-h="svelte-1c6qq54">Creates a new instance of TokenizerModel.</p> <table data-svelte-h="svelte-14oz1tm"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the TokenizerModel.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel+vocab" class="group"></a> <h3 class="relative group"><a id="tokenizermodelvocab--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelvocab--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.vocab : <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-t0dblq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a></p> <hr> <a id="module_tokenizers.TokenizerModel+tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="tokenizermodeltokenstoids--code-map--string-number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodeltokenstoids--code-map--string-number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.tokens_to_ids : <code> Map. < string, number > </code></span></h3> <p data-svelte-h="svelte-186upcr">A mapping of tokens to ids.</p> <p data-svelte-h="svelte-t0dblq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a></p> <hr> <a id="module_tokenizers.TokenizerModel+fuse_unk" class="group"></a> <h3 class="relative group"><a id="tokenizermodelfuseunk--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelfuseunk--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.fuse_unk : <code> boolean </code></span></h3> <p data-svelte-h="svelte-14z39rg">Whether to fuse unknown tokens when encoding. Defaults to false.</p> <p data-svelte-h="svelte-t0dblq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a></p> <hr> <a id="module_tokenizers.TokenizerModel+_call" class="group"></a> <h3 class="relative group"><a id="tokenizermodelcalltokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelcalltokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel._call(tokens) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-72rkk4">Internal function to call the TokenizerModel instance.</p> <p data-svelte-h="svelte-ivoqgv"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Overrides</strong>: <a href="#Callable+_call"><code>_call</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The encoded tokens.</p> <table data-svelte-h="svelte-aw190w"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel+encode" class="group"></a> <h3 class="relative group"><a id="tokenizermodelencodetokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelencodetokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.encode(tokens) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-n03492">Encodes a list of tokens into a list of token IDs.</p> <p data-svelte-h="svelte-c8nyr7"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The encoded tokens.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-73au6u"><li>Will throw an error if not implemented in a subclass.</li></ul> <table data-svelte-h="svelte-aw190w"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel+convert_tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="tokenizermodelconverttokenstoidstokens--code-array--number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelconverttokenstoidstokens--code-array--number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.convert_tokens_to_ids(tokens) ⇒ <code> Array. < number > </code></span></h3> <p data-svelte-h="svelte-kt6n9f">Converts a list of tokens into a list of token IDs.</p> <p data-svelte-h="svelte-2fhfjm"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Returns</strong>: <code>Array.<number></code> - The converted token IDs.</p> <table data-svelte-h="svelte-4i3luh"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The tokens to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel+convert_ids_to_tokens" class="group"></a> <h3 class="relative group"><a id="tokenizermodelconvertidstotokensids--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelconvertidstotokensids--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizerModel.convert_ids_to_tokens(ids) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-1dytmg7">Converts a list of token IDs into a list of tokens.</p> <p data-svelte-h="svelte-kiokn"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The converted tokens.</p> <table data-svelte-h="svelte-1fwo8eg"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>ids</td><td><code>Array<number></code> | <code>Array<bigint></code></td><td><p>The token IDs to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.TokenizerModel.fromConfig" class="group"></a> <h3 class="relative group"><a id="tokenizermodelfromconfigconfig-args--code-tokenizermodel-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizermodelfromconfigconfig-args--code-tokenizermodel-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TokenizerModel.fromConfig(config, ...args) ⇒ <code> TokenizerModel </code></span></h3> <p data-svelte-h="svelte-owdb7i">Instantiates a new TokenizerModel instance based on the configuration object provided.</p> <p data-svelte-h="svelte-ni7m8l"><strong>Kind</strong>: static method of <a href="#module_tokenizers.TokenizerModel"><code>TokenizerModel</code></a><br> <strong>Returns</strong>: <code>TokenizerModel</code> - A new instance of a TokenizerModel.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1jzd58b"><li>Will throw an error if the TokenizerModel type in the config is not recognized.</li></ul> <table data-svelte-h="svelte-10vnea5"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the TokenizerModel.</p></td> </tr><tr><td>...args</td><td><code>*</code></td><td><p>Optional arguments to pass to the specific TokenizerModel constructor.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerspretrainedtokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretrainedtokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.PreTrainedTokenizer</span></h2> <p data-svelte-h="svelte-wbz8zs"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-lojjgj"><li><a href="#module_tokenizers.PreTrainedTokenizer">.PreTrainedTokenizer</a><ul><li><a href="#new_module_tokenizers.PreTrainedTokenizer_new"><code>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers.PreTrainedTokenizer+added_tokens"><code>.added_tokens</code></a> : <code>Array.<AddedToken></code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+remove_space"><code>.remove_space</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_call"><code>._call(text, options)</code></a> ⇒ <code>BatchEncoding</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_encode_text"><code>._encode_text(text)</code></a> ⇒ <code>Array<string></code> | <code>null</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+_tokenize_helper"><code>._tokenize_helper(text, options)</code></a> ⇒ <code>*</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+tokenize"><code>.tokenize(text, options)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+encode"><code>.encode(text, options)</code></a> ⇒ <code>Array.<number></code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+batch_decode"><code>.batch_decode(batch, decode_args)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode"><code>.decode(token_ids, [decode_args])</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+decode_single"><code>.decode_single(token_ids, decode_args)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+get_chat_template"><code>.get_chat_template(options)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers.PreTrainedTokenizer+apply_chat_template"><code>.apply_chat_template(conversation, options)</code></a> ⇒ <code>string</code> | <a href="#Tensor"><code>Tensor</code></a> | <code>Array<number></code> | <code>Array<Array<number>></code> | <code>BatchEncoding</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers.PreTrainedTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a> ⇒ <code>Promise.<PreTrainedTokenizer></code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers.PreTrainedTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-pretrainedtokenizertokenizerjson-tokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-pretrainedtokenizertokenizerjson-tokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PreTrainedTokenizer(tokenizerJSON, tokenizerConfig)</span></h3> <p data-svelte-h="svelte-1vkx5qp">Create a new PreTrainedTokenizer instance.</p> <table data-svelte-h="svelte-19pzyzr"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokenizerJSON</td><td><code>Object</code></td><td><p>The JSON of the tokenizer.</p></td> </tr><tr><td>tokenizerConfig</td><td><code>Object</code></td><td><p>The config of the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+added_tokens" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizeraddedtokens--code-array--addedtoken--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizeraddedtokens--code-array--addedtoken--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.added_tokens : <code> Array. < AddedToken > </code></span></h3> <p data-svelte-h="svelte-c201sq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a></p> <hr> <a id="module_tokenizers.PreTrainedTokenizer+remove_space" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerremovespace--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerremovespace--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.remove_space : <code> boolean </code></span></h3> <p data-svelte-h="svelte-1p46594">Whether or not to strip the text when tokenizing (removing excess spaces before and after the string).</p> <p data-svelte-h="svelte-c201sq"><strong>Kind</strong>: instance property of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a></p> <hr> <a id="module_tokenizers.PreTrainedTokenizer+_call" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizercalltext-options--code-batchencoding-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizercalltext-options--code-batchencoding-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer._call(text, options) ⇒ <code> BatchEncoding </code></span></h3> <p data-svelte-h="svelte-1il2i3r">Encode/tokenize the given text(s).</p> <p data-svelte-h="svelte-17ot913"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>BatchEncoding</code> - Object to be passed to the model.</p> <table data-svelte-h="svelte-1tazz35"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>Array<string></code></td><td></td><td><p>The text to tokenize.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.text_pair]</td><td><code>string</code> | <code>Array<string></code></td><td><code>null</code></td><td><p>Optional second sequence to be encoded. If set, must be the same type as text.</p></td> </tr><tr><td>[options.padding]</td><td><code>boolean</code> | <code>'max_length'</code></td><td><code>false</code></td><td><p>Whether to pad the input sequences.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td> </tr><tr><td>[options.truncation]</td><td><code>boolean</code></td><td><code></code></td><td><p>Whether to truncate the input sequences.</p></td> </tr><tr><td>[options.max_length]</td><td><code>number</code></td><td><code></code></td><td><p>Maximum length of the returned list and optionally padding length.</p></td> </tr><tr><td>[options.return_tensor]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to return the results as Tensors or arrays.</p></td> </tr><tr><td>[options.return_token_type_ids]</td><td><code>boolean</code></td><td><code></code></td><td><p>Whether to return the token type ids.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+_encode_text" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerencodetexttext--code-array--string--code--code-null-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerencodetexttext--code-array--string--code--code-null-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer._encode_text(text) ⇒ <code> Array < string > </code> | <code> null </code></span></h3> <p data-svelte-h="svelte-hojn9c">Encodes a single text using the preprocessor pipeline of the tokenizer.</p> <p data-svelte-h="svelte-18rc32p"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array<string></code> | <code>null</code> - The encoded tokens.</p> <table data-svelte-h="svelte-x8hb9q"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>null</code></td><td><p>The text to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+_tokenize_helper" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizertokenizehelpertext-options--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertokenizehelpertext-options--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer._tokenize_helper(text, options) ⇒ <code> * </code></span></h3> <p data-svelte-h="svelte-1rkj6gn">Internal helper function to tokenize a text, and optionally a pair of texts.</p> <p data-svelte-h="svelte-g6epud"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>*</code> - An object containing the tokens and optionally the token type IDs.</p> <table data-svelte-h="svelte-c4289e"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td></td><td><p>The text to tokenize.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.pair]</td><td><code>string</code></td><td><code>null</code></td><td><p>The optional second text to tokenize.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+tokenize" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizertokenizetext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizertokenizetext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.tokenize(text, options) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-1tkmx0p">Converts a string into a sequence of tokens.</p> <p data-svelte-h="svelte-ukfz5o"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The list of tokens.</p> <table data-svelte-h="svelte-azbat1"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td></td><td><p>The sequence to be encoded.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.pair]</td><td><code>string</code></td><td></td><td><p>A second sequence to be encoded with the first.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+encode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerencodetext-options--code-array--number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerencodetext-options--code-array--number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.encode(text, options) ⇒ <code> Array. < number > </code></span></h3> <p data-svelte-h="svelte-sj7zk5">Encodes a single text or a pair of texts using the model’s tokenizer.</p> <p data-svelte-h="svelte-qcuvtk"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<number></code> - An array of token IDs representing the encoded text(s).</p> <table data-svelte-h="svelte-1gq42bo"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td></td><td><p>The text to encode.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.text_pair]</td><td><code>string</code></td><td><code>null</code></td><td><p>The optional second text to encode.</p></td> </tr><tr><td>[options.add_special_tokens]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether or not to add the special tokens associated with the corresponding model.</p></td> </tr><tr><td>[options.return_token_type_ids]</td><td><code>boolean</code></td><td><code></code></td><td><p>Whether to return token_type_ids.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+batch_decode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerbatchdecodebatch-decodeargs--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerbatchdecodebatch-decodeargs--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.batch_decode(batch, decode_args) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-y8f29l">Decode a batch of tokenized sequences.</p> <p data-svelte-h="svelte-m72phd"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - List of decoded sequences.</p> <table data-svelte-h="svelte-mde7o5"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>batch</td><td><code>Array<Array<number>></code> | <code><a href="#Tensor">Tensor</a></code></td><td><p>List/Tensor of tokenized input sequences.</p></td> </tr><tr><td>decode_args</td><td><code>Object</code></td><td><p>(Optional) Object with decoding arguments.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+decode" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerdecodetokenids-decodeargs--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodetokenids-decodeargs--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.decode(token_ids, [decode_args]) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-168jmgj">Decodes a sequence of token IDs back to a string.</p> <p data-svelte-h="svelte-1ferp3f"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1r89r1j"><li><code>Error</code> If `token_ids` is not a non-empty array of integers.</li></ul> <table data-svelte-h="svelte-nsz5oo"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>token_ids</td><td><code>Array<number></code> | <code>Array<bigint></code> | <code><a href="#Tensor">Tensor</a></code></td><td></td><td><p>List/Tensor of token IDs to decode.</p></td> </tr><tr><td>[decode_args]</td><td><code>Object</code></td><td><code>{}</code></td><td></td> </tr><tr><td>[decode_args.skip_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>If true, special tokens are removed from the output string.</p></td> </tr><tr><td>[decode_args.clean_up_tokenization_spaces]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>If true, spaces before punctuations and abbreviated forms are removed.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+decode_single" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerdecodesingletokenids-decodeargs--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerdecodesingletokenids-decodeargs--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.decode_single(token_ids, decode_args) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-19e6tws">Decode a single list of token ids to a string.</p> <p data-svelte-h="svelte-7zxcdh"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string</p> <table data-svelte-h="svelte-18jnxrz"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>token_ids</td><td><code>Array<number></code> | <code>Array<bigint></code></td><td></td><td><p>List of token ids to decode</p></td> </tr><tr><td>decode_args</td><td><code>Object</code></td><td></td><td><p>Optional arguments for decoding</p></td> </tr><tr><td>[decode_args.skip_special_tokens]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to skip special tokens during decoding</p></td> </tr><tr><td>[decode_args.clean_up_tokenization_spaces]</td><td><code>boolean</code></td><td><code></code></td><td><p>Whether to clean up tokenization spaces during decoding. | |
| If null, the value is set to <code>this.decoder.cleanup</code> if it exists, falling back to <code>this.clean_up_tokenization_spaces</code> if it exists, falling back to <code>true</code>.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+get_chat_template" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizergetchattemplateoptions--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizergetchattemplateoptions--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.get_chat_template(options) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1hrpjri">Retrieve the chat template string used for tokenizing chat messages. This template is used | |
| internally by the <code>apply_chat_template</code> method and can also be used externally to retrieve the model’s chat | |
| template for better generation tracking.</p> <p data-svelte-h="svelte-1xojn6p"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The chat template string.</p> <table data-svelte-h="svelte-1tql3fk"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.chat_template]</td><td><code>string</code></td><td><code>null</code></td><td><p>A Jinja template or the name of a template to use for this conversion. | |
| It is usually not necessary to pass anything to this argument, | |
| as the model's template will be used by default.</p></td> </tr><tr><td>[options.tools]</td><td><code>Array.<Object></code></td><td><code></code></td><td><p>A list of tools (callable functions) that will be accessible to the model. If the template does not | |
| support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema, | |
| giving the name, description and argument types for the tool. See our | |
| <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use">chat templating guide</a> | |
| for more information.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer+apply_chat_template" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerapplychattemplateconversation-options--code-string-code--code-tensor-code--code-array--number--code--code-array--array--number---code--code-batchencoding-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerapplychattemplateconversation-options--code-string-code--code-tensor-code--code-array--number--code--code-array--array--number---code--code-batchencoding-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTrainedTokenizer.apply_chat_template(conversation, options) ⇒ <code> string </code> | <code> Tensor </code> | <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> BatchEncoding </code></span></h3> <p data-svelte-h="svelte-qfc73q">Converts a list of message objects with <code>"role"</code> and <code>"content"</code> keys to a list of token | |
| ids. This method is intended for use with chat models, and will read the tokenizer’s chat_template attribute to | |
| determine the format and control tokens to use when converting.</p> <p data-svelte-h="svelte-aagj55">See <a href="https://huggingface.co/docs/transformers/chat_templating" rel="nofollow">here</a> for more information.</p> <p data-svelte-h="svelte-h5rmtt"><strong>Example:</strong> Applying a chat template to a conversation.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> { <span class="hljs-title class_">AutoTokenizer</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">"@huggingface/transformers"</span>; | |
| <span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">"Xenova/mistral-tokenizer-v1"</span>); | |
| <span class="hljs-keyword">const</span> chat = [ | |
| { <span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"Hello, how are you?"</span> }, | |
| { <span class="hljs-string">"role"</span>: <span class="hljs-string">"assistant"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'm doing great. How can I help you today?"</span> }, | |
| { <span class="hljs-string">"role"</span>: <span class="hljs-string">"user"</span>, <span class="hljs-string">"content"</span>: <span class="hljs-string">"I'd like to show off how chat templating works!"</span> }, | |
| ] | |
| <span class="hljs-keyword">const</span> text = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">false</span> }); | |
| <span class="hljs-comment">// "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"</span> | |
| <span class="hljs-keyword">const</span> input_ids = tokenizer.<span class="hljs-title function_">apply_chat_template</span>(chat, { <span class="hljs-attr">tokenize</span>: <span class="hljs-literal">true</span>, <span class="hljs-attr">return_tensor</span>: <span class="hljs-literal">false</span> }); | |
| <span class="hljs-comment">// [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-p0ysxs"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>string</code> | <a href="#Tensor"><code>Tensor</code></a> | <code>Array<number></code> | <code>Array<Array<number>></code> | <code>BatchEncoding</code> - The tokenized output.</p> <table data-svelte-h="svelte-1fytlyx"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>conversation</td><td><code>Array.<Message></code></td><td></td><td><p>A list of message objects with <code>"role"</code> and <code>"content"</code> keys, | |
| representing the chat history so far.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td></td><td><p>An optional object containing the following properties:</p></td> </tr><tr><td>[options.chat_template]</td><td><code>string</code></td><td><code>null</code></td><td><p>A Jinja template to use for this conversion. If | |
| this is not passed, the model's chat template will be used instead.</p></td> </tr><tr><td>[options.tools]</td><td><code>Array.<Object></code></td><td><code></code></td><td><p>A list of tools (callable functions) that will be accessible to the model. If the template does not | |
| support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema, | |
| giving the name, description and argument types for the tool. See our | |
| <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use">chat templating guide</a> | |
| for more information.</p></td> </tr><tr><td>[options.documents]</td><td><code>*</code></td><td><code></code></td><td><p>A list of dicts representing documents that will be accessible to the model if it is performing RAG | |
| (retrieval-augmented generation). If the template does not support RAG, this argument will have no | |
| effect. We recommend that each document should be a dict containing "title" and "text" keys. Please | |
| see the RAG section of the <a href="https://huggingface.co/docs/transformers/main/en/chat_templating#arguments-for-RAG">chat templating guide</a> | |
| for examples of passing documents with chat templates.</p></td> </tr><tr><td>[options.add_generation_prompt]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to end the prompt with the token(s) that indicate | |
| the start of an assistant message. This is useful when you want to generate a response from the model. | |
| Note that this argument will be passed to the chat template, and so it must be supported in the | |
| template for this argument to have any effect.</p></td> </tr><tr><td>[options.tokenize]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to tokenize the output. If false, the output will be a string.</p></td> </tr><tr><td>[options.padding]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to pad sequences to the maximum length. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.truncation]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.max_length]</td><td><code>number</code></td><td><code></code></td><td><p>Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false. | |
| If not specified, the tokenizer's <code>max_length</code> attribute will be used as a default.</p></td> </tr><tr><td>[options.return_tensor]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.return_dict]</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to return a dictionary with named outputs. Has no effect if tokenize is false.</p></td> </tr><tr><td>[options.tokenizer_kwargs]</td><td><code>Object</code></td><td><code>{}</code></td><td><p>Additional options to pass to the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.PreTrainedTokenizer.from_pretrained" class="group"></a> <h3 class="relative group"><a id="pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretrainedtokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTrainedTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code></span></h3> <p data-svelte-h="svelte-1fjlsit">Loads a pre-trained tokenizer from the given <code>pretrained_model_name_or_path</code>.</p> <p data-svelte-h="svelte-3dl2f7"><strong>Kind</strong>: static method of <a href="#module_tokenizers.PreTrainedTokenizer"><code>PreTrainedTokenizer</code></a><br> <strong>Returns</strong>: <code>Promise.<PreTrainedTokenizer></code> - A new instance of the <code>PreTrainedTokenizer</code> class.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1ehlqi8"><li><code>Error</code> Throws an error if the tokenizer.json or tokenizer_config.json files are not found in the `pretrained_model_name_or_path`.</li></ul> <table data-svelte-h="svelte-i35q8n"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>pretrained_model_name_or_path</td><td><code>string</code></td><td><p>The path to the pre-trained tokenizer.</p></td> </tr><tr><td>options</td><td><code>PretrainedTokenizerOptions</code></td><td><p>Additional options for loading the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.BertTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersberttokenizer--code-pretrainedtokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersberttokenizer--code-pretrainedtokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.BertTokenizer ⇐ <code> PreTrainedTokenizer </code></span></h2> <p data-svelte-h="svelte-y7f9tl">BertTokenizer is a class used to tokenize text for BERT models.</p> <p data-svelte-h="svelte-1g9fk6q"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTrainedTokenizer</code></p> <hr> <a id="module_tokenizers.AlbertTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersalberttokenizer--code-pretrainedtokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersalberttokenizer--code-pretrainedtokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.AlbertTokenizer ⇐ <code> PreTrainedTokenizer </code></span></h2> <p data-svelte-h="svelte-1i5oqqh">Albert tokenizer</p> <p data-svelte-h="svelte-1g9fk6q"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTrainedTokenizer</code></p> <hr> <a id="module_tokenizers.NllbTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersnllbtokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnllbtokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.NllbTokenizer</span></h2> <p data-svelte-h="svelte-169llpf">The NllbTokenizer class is used to tokenize text for NLLB (“No Language Left Behind”) models.</p> <p data-svelte-h="svelte-5qkw4u">No Language Left Behind (NLLB) is a first-of-its-kind, AI breakthrough project | |
| that open-sources models capable of delivering high-quality translations directly | |
| between any pair of 200+ languages — including low-resource languages like Asturian, | |
| Luganda, Urdu and more. It aims to help people communicate with anyone, anywhere, | |
| regardless of their language preferences. For more information, check out their | |
| <a href="https://arxiv.org/abs/2207.04672" rel="nofollow">paper</a>.</p> <p data-svelte-h="svelte-1bnq2ss">For a list of supported languages (along with their language codes),</p> <p data-svelte-h="svelte-ixckw7"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>See</strong>: <a href="https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200" rel="nofollow">https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200</a></p> <hr> <a id="module_tokenizers.NllbTokenizer+_build_translation_inputs" class="group"></a> <h3 class="relative group"><a id="nllbtokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nllbtokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>nllbTokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code></span></h3> <p data-svelte-h="svelte-17djevv">Helper function to build translation inputs for an <code>NllbTokenizer</code>.</p> <p data-svelte-h="svelte-151azqd"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.NllbTokenizer"><code>NllbTokenizer</code></a><br> <strong>Returns</strong>: <code>Object</code> - Object to be passed to the model.</p> <table data-svelte-h="svelte-nmwt0y"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>raw_inputs</td><td><code>string</code> | <code>Array<string></code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>tokenizer_options</td><td><code>Object</code></td><td><p>Options to be sent to the tokenizer</p></td> </tr><tr><td>generate_kwargs</td><td><code>Object</code></td><td><p>Generation options.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.M2M100Tokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersm2m100tokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersm2m100tokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.M2M100Tokenizer</span></h2> <p data-svelte-h="svelte-ggx2qy">The M2M100Tokenizer class is used to tokenize text for M2M100 (“Many-to-Many”) models.</p> <p data-svelte-h="svelte-matuab">M2M100 is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many | |
| multilingual translation. It was introduced in this <a href="https://arxiv.org/abs/2010.11125" rel="nofollow">paper</a> | |
| and first released in <a href="https://github.com/pytorch/fairseq/tree/master/examples/m2m_100" rel="nofollow">this</a> repository.</p> <p data-svelte-h="svelte-1bnq2ss">For a list of supported languages (along with their language codes),</p> <p data-svelte-h="svelte-17rdxyz"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>See</strong>: <a href="https://huggingface.co/facebook/m2m100_418M#languages-covered" rel="nofollow">https://huggingface.co/facebook/m2m100_418M#languages-covered</a></p> <hr> <a id="module_tokenizers.M2M100Tokenizer+_build_translation_inputs" class="group"></a> <h3 class="relative group"><a id="m2m100tokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#m2m100tokenizerbuildtranslationinputsrawinputs-tokenizeroptions-generatekwargs--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>m2M100Tokenizer._build_translation_inputs(raw_inputs, tokenizer_options, generate_kwargs) ⇒ <code> Object </code></span></h3> <p data-svelte-h="svelte-rixacq">Helper function to build translation inputs for an <code>M2M100Tokenizer</code>.</p> <p data-svelte-h="svelte-nwf7ij"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.M2M100Tokenizer"><code>M2M100Tokenizer</code></a><br> <strong>Returns</strong>: <code>Object</code> - Object to be passed to the model.</p> <table data-svelte-h="svelte-nmwt0y"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>raw_inputs</td><td><code>string</code> | <code>Array<string></code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>tokenizer_options</td><td><code>Object</code></td><td><p>Options to be sent to the tokenizer</p></td> </tr><tr><td>generate_kwargs</td><td><code>Object</code></td><td><p>Generation options.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.WhisperTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerswhispertokenizer--code-pretrainedtokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswhispertokenizer--code-pretrainedtokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.WhisperTokenizer ⇐ <code> PreTrainedTokenizer </code></span></h2> <p data-svelte-h="svelte-yc5dgi">WhisperTokenizer tokenizer</p> <p data-svelte-h="svelte-1g9fk6q"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTrainedTokenizer</code></p> <ul data-svelte-h="svelte-6hj9gd"><li><a href="#module_tokenizers.WhisperTokenizer">.WhisperTokenizer</a> ⇐ <code>PreTrainedTokenizer</code><ul><li><a href="#module_tokenizers.WhisperTokenizer+_decode_asr"><code>._decode_asr(sequences, options)</code></a> ⇒ <code>*</code></li> <li><a href="#module_tokenizers.WhisperTokenizer+decode"><code>.decode()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="module_tokenizers.WhisperTokenizer+_decode_asr" class="group"></a> <h3 class="relative group"><a id="whispertokenizerdecodeasrsequences-options--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whispertokenizerdecodeasrsequences-options--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>whisperTokenizer._decode_asr(sequences, options) ⇒ <code> * </code></span></h3> <p data-svelte-h="svelte-1ldfc4">Decodes automatic speech recognition (ASR) sequences.</p> <p data-svelte-h="svelte-1dbusbf"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.WhisperTokenizer"><code>WhisperTokenizer</code></a><br> <strong>Returns</strong>: <code>*</code> - The decoded sequences.</p> <table data-svelte-h="svelte-16is2zf"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>sequences</td><td><code>*</code></td><td><p>The sequences to decode.</p></td> </tr><tr><td>options</td><td><code>Object</code></td><td><p>The options to use for decoding.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.WhisperTokenizer+decode" class="group"></a> <h3 class="relative group"><a id="whispertokenizerdecode--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whispertokenizerdecode--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>whisperTokenizer.decode() : <code> * </code></span></h3> <p data-svelte-h="svelte-1d8g5ae"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.WhisperTokenizer"><code>WhisperTokenizer</code></a></p> <hr> <a id="module_tokenizers.MarianTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersmariantokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmariantokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.MarianTokenizer</span></h2> <p data-svelte-h="svelte-18k70d9"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Todo</strong></p> <ul data-svelte-h="svelte-1y8rly"><li>This model is not yet supported by Hugging Face’s “fast” tokenizers library (<a href="https://github.com/huggingface/tokenizers" rel="nofollow">https://github.com/huggingface/tokenizers</a>). | |
| Therefore, this implementation (which is based on fast tokenizers) may produce slightly inaccurate results.</li></ul> <ul data-svelte-h="svelte-1ynv7b0"><li><a href="#module_tokenizers.MarianTokenizer">.MarianTokenizer</a><ul><li><a href="#new_module_tokenizers.MarianTokenizer_new"><code>new MarianTokenizer(tokenizerJSON, tokenizerConfig)</code></a></li> <li><a href="#module_tokenizers.MarianTokenizer+_encode_text"><code>._encode_text(text)</code></a> ⇒ <code>Array</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers.MarianTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-mariantokenizertokenizerjson-tokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-mariantokenizertokenizerjson-tokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new MarianTokenizer(tokenizerJSON, tokenizerConfig)</span></h3> <p data-svelte-h="svelte-12gr81">Create a new MarianTokenizer instance.</p> <table data-svelte-h="svelte-19pzyzr"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokenizerJSON</td><td><code>Object</code></td><td><p>The JSON of the tokenizer.</p></td> </tr><tr><td>tokenizerConfig</td><td><code>Object</code></td><td><p>The config of the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.MarianTokenizer+_encode_text" class="group"></a> <h3 class="relative group"><a id="mariantokenizerencodetexttext--code-array-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mariantokenizerencodetexttext--code-array-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>marianTokenizer._encode_text(text) ⇒ <code> Array </code></span></h3> <p data-svelte-h="svelte-12lkjoz">Encodes a single text. Overriding this method is necessary since the language codes | |
| must be removed before encoding with sentencepiece model.</p> <p data-svelte-h="svelte-zlwn3v"><strong>Kind</strong>: instance method of <a href="#module_tokenizers.MarianTokenizer"><code>MarianTokenizer</code></a><br> <strong>Returns</strong>: <code>Array</code> - The encoded tokens.<br> <strong>See</strong>: <a href="https://github.com/huggingface/transformers/blob/12d51db243a00726a548a43cc333390ebae731e3/src/transformers/models/marian/tokenization_marian.py#L204-L213" rel="nofollow">https://github.com/huggingface/transformers/blob/12d51db243a00726a548a43cc333390ebae731e3/src/transformers/models/marian/tokenization_marian.py#L204-L213</a></p> <table data-svelte-h="svelte-x8hb9q"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>null</code></td><td><p>The text to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.AutoTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersautotokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersautotokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.AutoTokenizer</span></h2> <p data-svelte-h="svelte-1hb1bdc">Helper class which is used to instantiate pretrained tokenizers with the <code>from_pretrained</code> function. | |
| The chosen tokenizer class is determined by the type specified in the tokenizer config.</p> <p data-svelte-h="svelte-wbz8zs"><strong>Kind</strong>: static class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-m4wzai"><li><a href="#module_tokenizers.AutoTokenizer">.AutoTokenizer</a><ul><li><a href="#new_module_tokenizers.AutoTokenizer_new"><code>new AutoTokenizer()</code></a></li> <li><a href="#module_tokenizers.AutoTokenizer.from_pretrained"><code>.from_pretrained(pretrained_model_name_or_path, options)</code></a> ⇒ <code>Promise.<PreTrainedTokenizer></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers.AutoTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-autotokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-autotokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new AutoTokenizer()</span></h3> <p data-svelte-h="svelte-1aswmub"><strong>Example</strong></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(<span class="hljs-string">'Xenova/bert-base-uncased'</span>);<!-- HTML_TAG_END --></pre></div> <hr> <a id="module_tokenizers.AutoTokenizer.from_pretrained" class="group"></a> <h3 class="relative group"><a id="autotokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#autotokenizerfrompretrainedpretrainedmodelnameorpath-options--code-promise--pretrainedtokenizer--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>AutoTokenizer.from_pretrained(pretrained_model_name_or_path, options) ⇒ <code> Promise. < PreTrainedTokenizer > </code></span></h3> <p data-svelte-h="svelte-s7y3dm">Instantiate one of the tokenizer classes of the library from a pretrained model.</p> <p data-svelte-h="svelte-weoe0h">The tokenizer class to instantiate is selected based on the <code>tokenizer_class</code> property of the config object | |
| (either passed as an argument or loaded from <code>pretrained_model_name_or_path</code> if possible)</p> <p data-svelte-h="svelte-13ehx3q"><strong>Kind</strong>: static method of <a href="#module_tokenizers.AutoTokenizer"><code>AutoTokenizer</code></a><br> <strong>Returns</strong>: <code>Promise.<PreTrainedTokenizer></code> - A new instance of the PreTrainedTokenizer class.</p> <table data-svelte-h="svelte-1eafp23"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>pretrained_model_name_or_path</td><td><code>string</code></td><td><p>The name or path of the pretrained model. Can be either:</p> <ul><li>A string, the <em>model id</em> of a pretrained tokenizer hosted inside a model repo on huggingface.co. | |
| Valid model ids can be located at the root-level, like <code>bert-base-uncased</code>, or namespaced under a | |
| user or organization name, like <code>dbmdz/bert-base-german-cased</code>.</li> <li>A path to a <em>directory</em> containing tokenizer files, e.g., <code>./my_model_directory/</code>.</li></ul></td> </tr><tr><td>options</td><td><code>PretrainedTokenizerOptions</code></td><td><p>Additional options for loading the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers.is_chinese_char" class="group"></a> <h2 class="relative group"><a id="tokenizersischinesecharcp--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersischinesecharcp--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers.is_chinese_char(cp) ⇒ <code> boolean </code></span></h2> <p data-svelte-h="svelte-arkn8x">Checks whether the given Unicode codepoint represents a CJK (Chinese, Japanese, or Korean) character.</p> <p data-svelte-h="svelte-1crm30w">A “chinese character” is defined as anything in the CJK Unicode block: | |
| <a href="https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)" rel="nofollow">https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)</a></p> <p data-svelte-h="svelte-1vcx9k0">Note that the CJK Unicode block is NOT all Japanese and Korean characters, despite its name. | |
| The modern Korean Hangul alphabet is a different block, as is Japanese Hiragana and Katakana. | |
| Those alphabets are used to write space-separated words, so they are not treated specially | |
| and are handled like all other languages.</p> <p data-svelte-h="svelte-10qiwse"><strong>Kind</strong>: static method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>boolean</code> - True if the codepoint represents a CJK character, false otherwise.</p> <table data-svelte-h="svelte-oxo98v"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>cp</td><td><code>number</code> | <code>bigint</code></td><td><p>The Unicode codepoint to check.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..AddedToken" class="group"></a> <h2 class="relative group"><a id="tokenizersaddedtoken" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersaddedtoken"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~AddedToken</span></h2> <p data-svelte-h="svelte-bxhodd">Represent a token added by the user on top of the existing Model vocabulary. | |
| AddedToken can be configured to specify the behavior they should have in various situations like:</p> <ul data-svelte-h="svelte-uthgcg"><li>Whether they should only match single words</li> <li>Whether to include any whitespace on its left or right</li></ul> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="new_module_tokenizers..AddedToken_new" class="group"></a> <h3 class="relative group"><a id="new-addedtokenconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-addedtokenconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new AddedToken(config)</span></h3> <p data-svelte-h="svelte-1jn9qcj">Creates a new instance of AddedToken.</p> <table data-svelte-h="svelte-lcj2ea"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td></td><td><p>Added token configuration object.</p></td> </tr><tr><td>config.content</td><td><code>string</code></td><td></td><td><p>The content of the added token.</p></td> </tr><tr><td>config.id</td><td><code>number</code></td><td></td><td><p>The id of the added token.</p></td> </tr><tr><td>[config.single_word]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token must be a single word or can break words.</p></td> </tr><tr><td>[config.lstrip]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token should strip whitespaces on its left.</p></td> </tr><tr><td>[config.rstrip]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token should strip whitespaces on its right.</p></td> </tr><tr><td>[config.normalized]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token should be normalized.</p></td> </tr><tr><td>[config.special]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether this token is special.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WordPieceTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerswordpiecetokenizer--code-tokenizermodel-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswordpiecetokenizer--code-tokenizermodel-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~WordPieceTokenizer ⇐ <code> TokenizerModel </code></span></h2> <p data-svelte-h="svelte-os971x">A subclass of TokenizerModel that uses WordPiece encoding to encode tokens.</p> <p data-svelte-h="svelte-66xmtz"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>TokenizerModel</code></p> <ul data-svelte-h="svelte-j7wu8g"><li><a href="#module_tokenizers..WordPieceTokenizer">~WordPieceTokenizer</a> ⇐ <code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..WordPieceTokenizer_new"><code>new WordPieceTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..WordPieceTokenizer+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.<string, number></code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+unk_token_id"><code>.unk_token_id</code></a> : <code>number</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+unk_token"><code>.unk_token</code></a> : <code>string</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+max_input_chars_per_word"><code>.max_input_chars_per_word</code></a> : <code>number</code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+vocab"><code>.vocab</code></a> : <code>Array.<string></code></li> <li><a href="#module_tokenizers..WordPieceTokenizer+encode"><code>.encode(tokens)</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..WordPieceTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-wordpiecetokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-wordpiecetokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new WordPieceTokenizer(config)</span></h3> <table data-svelte-h="svelte-mywrud"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td></td><td><p>The configuration object.</p></td> </tr><tr><td>config.vocab</td><td><code>Object</code></td><td></td><td><p>A mapping of tokens to ids.</p></td> </tr><tr><td>config.unk_token</td><td><code>string</code></td><td></td><td><p>The unknown token string.</p></td> </tr><tr><td>config.continuing_subword_prefix</td><td><code>string</code></td><td></td><td><p>The prefix to use for continuing subwords.</p></td> </tr><tr><td>[config.max_input_chars_per_word]</td><td><code>number</code></td><td><code>100</code></td><td><p>The maximum number of characters per word.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WordPieceTokenizer+tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizertokenstoids--code-map--string-number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizertokenstoids--code-map--string-number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.tokens_to_ids : <code> Map. < string, number > </code></span></h3> <p data-svelte-h="svelte-186upcr">A mapping of tokens to ids.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+unk_token_id" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizerunktokenid--code-number-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizerunktokenid--code-number-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.unk_token_id : <code> number </code></span></h3> <p data-svelte-h="svelte-16n6zoj">The id of the unknown token.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+unk_token" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizerunktoken--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizerunktoken--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.unk_token : <code> string </code></span></h3> <p data-svelte-h="svelte-1wc0kv1">The unknown token string.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+max_input_chars_per_word" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizermaxinputcharsperword--code-number-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizermaxinputcharsperword--code-number-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.max_input_chars_per_word : <code> number </code></span></h3> <p data-svelte-h="svelte-1tq6mkg">The maximum number of characters allowed per word.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+vocab" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizervocab--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizervocab--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.vocab : <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-ps7hhj">An array of tokens.</p> <p data-svelte-h="svelte-hi5a3m"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a></p> <hr> <a id="module_tokenizers..WordPieceTokenizer+encode" class="group"></a> <h3 class="relative group"><a id="wordpiecetokenizerencodetokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecetokenizerencodetokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceTokenizer.encode(tokens) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-hvbi1z">Encodes an array of tokens using WordPiece encoding.</p> <p data-svelte-h="svelte-1exk1td"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..WordPieceTokenizer"><code>WordPieceTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of encoded tokens.</p> <table data-svelte-h="svelte-aw190w"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Unigram" class="group"></a> <h2 class="relative group"><a id="tokenizersunigram--code-tokenizermodel-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersunigram--code-tokenizermodel-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Unigram ⇐ <code> TokenizerModel </code></span></h2> <p data-svelte-h="svelte-168cdr8">Class representing a Unigram tokenizer model.</p> <p data-svelte-h="svelte-66xmtz"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>TokenizerModel</code></p> <ul data-svelte-h="svelte-cjspr2"><li><a href="#module_tokenizers..Unigram">~Unigram</a> ⇐ <code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..Unigram_new"><code>new Unigram(config, moreConfig)</code></a></li> <li><a href="#module_tokenizers..Unigram+populateNodes"><code>.populateNodes(lattice)</code></a></li> <li><a href="#module_tokenizers..Unigram+tokenize"><code>.tokenize(normalized)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers..Unigram+encode"><code>.encode(tokens)</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..Unigram_new" class="group"></a> <h3 class="relative group"><a id="new-unigramconfig-moreconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-unigramconfig-moreconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new Unigram(config, moreConfig)</span></h3> <p data-svelte-h="svelte-k7r2g2">Create a new Unigram tokenizer model.</p> <table data-svelte-h="svelte-1t06ejl"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the Unigram model.</p></td> </tr><tr><td>config.unk_id</td><td><code>number</code></td><td><p>The ID of the unknown token</p></td> </tr><tr><td>config.vocab</td><td><code>Array.<Array<any>></code></td><td><p>A 2D array representing a mapping of tokens to scores.</p></td> </tr><tr><td>moreConfig</td><td><code>Object</code></td><td><p>Additional configuration object for the Unigram model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Unigram+populateNodes" class="group"></a> <h3 class="relative group"><a id="unigrampopulatenodeslattice" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unigrampopulatenodeslattice"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>unigram.populateNodes(lattice)</span></h3> <p data-svelte-h="svelte-13ypkqy">Populates lattice nodes.</p> <p data-svelte-h="svelte-1wapgb2"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Unigram"><code>Unigram</code></a></p> <table data-svelte-h="svelte-1djdg5g"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>lattice</td><td><code>TokenLattice</code></td><td><p>The token lattice to populate with nodes.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Unigram+tokenize" class="group"></a> <h3 class="relative group"><a id="unigramtokenizenormalized--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unigramtokenizenormalized--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>unigram.tokenize(normalized) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-s5rlnu">Encodes an array of tokens into an array of subtokens using the unigram model.</p> <p data-svelte-h="svelte-1txbxlz"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Unigram"><code>Unigram</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of subtokens obtained by encoding the input tokens using the unigram model.</p> <table data-svelte-h="svelte-14yooq"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>normalized</td><td><code>string</code></td><td><p>The normalized string.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Unigram+encode" class="group"></a> <h3 class="relative group"><a id="unigramencodetokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unigramencodetokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>unigram.encode(tokens) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-1igffgy">Encodes an array of tokens using Unigram encoding.</p> <p data-svelte-h="svelte-onyfvx"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Unigram"><code>Unigram</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of encoded tokens.</p> <table data-svelte-h="svelte-aw190w"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BPE" class="group"></a> <h2 class="relative group"><a id="tokenizersbpe--code-tokenizermodel-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbpe--code-tokenizermodel-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BPE ⇐ <code> TokenizerModel </code></span></h2> <p data-svelte-h="svelte-1ki6zy5">BPE class for encoding text into Byte-Pair-Encoding (BPE) tokens.</p> <p data-svelte-h="svelte-66xmtz"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>TokenizerModel</code></p> <ul data-svelte-h="svelte-tgr5re"><li><a href="#module_tokenizers..BPE">~BPE</a> ⇐ <code>TokenizerModel</code><ul><li><a href="#new_module_tokenizers..BPE_new"><code>new BPE(config)</code></a></li> <li><a href="#module_tokenizers..BPE+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.<string, number></code></li> <li><a href="#module_tokenizers..BPE+merges"><code>.merges</code></a> : <code>*</code><ul><li><a href="#module_tokenizers..BPE+merges.config.merges"><code>.config.merges</code></a> : <code>*</code></li></ul></li> <li><a href="#module_tokenizers..BPE+cache"><code>.cache</code></a> : <code>Map.<string, Array<string>></code></li> <li><a href="#module_tokenizers..BPE+bpe"><code>.bpe(token)</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers..BPE+encode"><code>.encode(tokens)</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..BPE_new" class="group"></a> <h3 class="relative group"><a id="new-bpeconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-bpeconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new BPE(config)</span></h3> <p data-svelte-h="svelte-1hluawr">Create a BPE instance.</p> <table data-svelte-h="svelte-1pjqs45"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td></td><td><p>The configuration object for BPE.</p></td> </tr><tr><td>config.vocab</td><td><code>Object</code></td><td></td><td><p>A mapping of tokens to ids.</p></td> </tr><tr><td>config.merges</td><td><code>*</code></td><td></td><td><p>An array of BPE merges as strings.</p></td> </tr><tr><td>config.unk_token</td><td><code>string</code></td><td></td><td><p>The unknown token used for out of vocabulary words.</p></td> </tr><tr><td>config.end_of_word_suffix</td><td><code>string</code></td><td></td><td><p>The suffix to place at the end of each word.</p></td> </tr><tr><td>[config.continuing_subword_suffix]</td><td><code>string</code></td><td></td><td><p>The suffix to insert between words.</p></td> </tr><tr><td>[config.byte_fallback]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether to use spm byte-fallback trick (defaults to False)</p></td> </tr><tr><td>[config.ignore_merges]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not to match tokens with the vocab before using merges.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BPE+tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="bpetokenstoids--code-map--string-number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpetokenstoids--code-map--string-number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.tokens_to_ids : <code> Map. < string, number > </code></span></h3> <p data-svelte-h="svelte-vbft5q"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..BPE"><code>BPE</code></a></p> <hr> <a id="module_tokenizers..BPE+merges" class="group"></a> <h3 class="relative group"><a id="bpemerges--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpemerges--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.merges : <code> * </code></span></h3> <p data-svelte-h="svelte-vbft5q"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..BPE"><code>BPE</code></a></p> <hr> <a id="module_tokenizers..BPE+merges.config.merges" class="group"></a> <h4 class="relative group"><a id="mergesconfigmerges--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mergesconfigmerges--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>merges.config.merges : <code> * </code></span></h4> <p data-svelte-h="svelte-1nha7op"><strong>Kind</strong>: static property of <a href="#module_tokenizers..BPE+merges"><code>merges</code></a></p> <hr> <a id="module_tokenizers..BPE+cache" class="group"></a> <h3 class="relative group"><a id="bpecache--code-map--string-array--string---code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpecache--code-map--string-array--string---code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.cache : <code> Map. < string, Array < string > > </code></span></h3> <p data-svelte-h="svelte-vbft5q"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..BPE"><code>BPE</code></a></p> <hr> <a id="module_tokenizers..BPE+bpe" class="group"></a> <h3 class="relative group"><a id="bpebpetoken--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpebpetoken--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.bpe(token) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-a8vvsp">Apply Byte-Pair-Encoding (BPE) to a given token. Efficient heap-based priority | |
| queue implementation adapted from <a href="https://github.com/belladoreai/llama-tokenizer-js" rel="nofollow">https://github.com/belladoreai/llama-tokenizer-js</a>.</p> <p data-svelte-h="svelte-vghqp2"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BPE"><code>BPE</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The BPE encoded tokens.</p> <table data-svelte-h="svelte-ef47i8"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>token</td><td><code>string</code></td><td><p>The token to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BPE+encode" class="group"></a> <h3 class="relative group"><a id="bpeencodetokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bpeencodetokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bpE.encode(tokens) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-1gsp3x1">Encodes the input sequence of tokens using the BPE algorithm and returns the resulting subword tokens.</p> <p data-svelte-h="svelte-1xk2nb1"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BPE"><code>BPE</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The resulting subword tokens after applying the BPE algorithm to the input sequence of tokens.</p> <table data-svelte-h="svelte-170f8q"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The input sequence of tokens to encode.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..LegacyTokenizerModel" class="group"></a> <h2 class="relative group"><a id="tokenizerslegacytokenizermodel" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerslegacytokenizermodel"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~LegacyTokenizerModel</span></h2> <p data-svelte-h="svelte-aoayky">Legacy tokenizer class for tokenizers with only a vocabulary.</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-1wfo8qw"><li><a href="#module_tokenizers..LegacyTokenizerModel">~LegacyTokenizerModel</a><ul><li><a href="#new_module_tokenizers..LegacyTokenizerModel_new"><code>new LegacyTokenizerModel(config, moreConfig)</code></a></li> <li><a href="#module_tokenizers..LegacyTokenizerModel+tokens_to_ids"><code>.tokens_to_ids</code></a> : <code>Map.<string, number></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..LegacyTokenizerModel_new" class="group"></a> <h3 class="relative group"><a id="new-legacytokenizermodelconfig-moreconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-legacytokenizermodelconfig-moreconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new LegacyTokenizerModel(config, moreConfig)</span></h3> <p data-svelte-h="svelte-190m3yr">Create a LegacyTokenizerModel instance.</p> <table data-svelte-h="svelte-6s4tsa"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for LegacyTokenizerModel.</p></td> </tr><tr><td>config.vocab</td><td><code>Object</code></td><td><p>A (possibly nested) mapping of tokens to ids.</p></td> </tr><tr><td>moreConfig</td><td><code>Object</code></td><td><p>Additional configuration object for the LegacyTokenizerModel model.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..LegacyTokenizerModel+tokens_to_ids" class="group"></a> <h3 class="relative group"><a id="legacytokenizermodeltokenstoids--code-map--string-number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#legacytokenizermodeltokenstoids--code-map--string-number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>legacyTokenizerModel.tokens_to_ids : <code> Map. < string, number > </code></span></h3> <p data-svelte-h="svelte-w0hzr2"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..LegacyTokenizerModel"><code>LegacyTokenizerModel</code></a></p> <hr> <a id="module_tokenizers..Normalizer" class="group"></a> <h2 class="relative group"><a id="tokenizersnormalizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnormalizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Normalizer</span></h2> <p data-svelte-h="svelte-10jww09">A base class for text normalization.</p> <p data-svelte-h="svelte-jwwxx4"><strong>Kind</strong>: inner abstract class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-130c6f2"><li><em><a href="#module_tokenizers..Normalizer">~Normalizer</a></em><ul><li><em><a href="#new_module_tokenizers..Normalizer_new"><code>new Normalizer(config)</code></a></em></li> <li><em>instance</em><ul><li><strong><a href="#module_tokenizers..Normalizer+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></strong></li> <li><em><a href="#module_tokenizers..Normalizer+_call"><code>._call(text)</code></a> ⇒ <code>string</code></em></li></ul></li> <li><em>static</em><ul><li><em><a href="#module_tokenizers..Normalizer.fromConfig"><code>.fromConfig(config)</code></a> ⇒ <code>Normalizer</code></em></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers..Normalizer_new" class="group"></a> <h3 class="relative group"><a id="new-normalizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-normalizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new Normalizer(config)</span></h3> <table data-svelte-h="svelte-tpsyfn"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the normalizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Normalizer+normalize" class="group"></a> <h3 class="relative group"><a id="normalizernormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#normalizernormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>normalizer.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-ax8bnv">Normalize the input text.</p> <p data-svelte-h="svelte-lf79s"><strong>Kind</strong>: instance abstract method of <a href="#module_tokenizers..Normalizer"><code>Normalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1ceb94n"><li><code>Error</code> If this method is not implemented in a subclass.</li></ul> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Normalizer+_call" class="group"></a> <h3 class="relative group"><a id="normalizercalltext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#normalizercalltext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>normalizer._call(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-13yzawo">Alias for <a href="Normalizer#normalize">Normalizer#normalize</a>.</p> <p data-svelte-h="svelte-hak1rq"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Normalizer"><code>Normalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Normalizer.fromConfig" class="group"></a> <h3 class="relative group"><a id="normalizerfromconfigconfig--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#normalizerfromconfigconfig--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Normalizer.fromConfig(config) ⇒ <code> Normalizer </code></span></h3> <p data-svelte-h="svelte-1drayrb">Factory method for creating normalizers from config objects.</p> <p data-svelte-h="svelte-1v9m58b"><strong>Kind</strong>: static method of <a href="#module_tokenizers..Normalizer"><code>Normalizer</code></a><br> <strong>Returns</strong>: <code>Normalizer</code> - A Normalizer object.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-h8agyt"><li><code>Error</code> If an unknown Normalizer type is specified in the config.</li></ul> <table data-svelte-h="svelte-tpsyfn"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the normalizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Replace" class="group"></a> <h2 class="relative group"><a id="tokenizersreplace--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersreplace--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Replace ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-mx5gat">Replace normalizer that replaces occurrences of a pattern with a given string or regular expression.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..Replace+normalize" class="group"></a> <h3 class="relative group"><a id="replacenormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#replacenormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>replace.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-z30qdq">Normalize the input text by replacing the pattern with the content.</p> <p data-svelte-h="svelte-6bh9xi"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Replace"><code>Replace</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text after replacing the pattern with the content.</p> <table data-svelte-h="svelte-ci8if4"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text to be normalized.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NFC" class="group"></a> <h2 class="relative group"><a id="tokenizersnfc--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnfc--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~NFC ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-v0gnhd">A normalizer that applies Unicode normalization form C (NFC) to the input text.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..NFC+normalize" class="group"></a> <h3 class="relative group"><a id="nfcnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nfcnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>nfC.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1wx1017">Normalize the input text by applying Unicode normalization form C (NFC).</p> <p data-svelte-h="svelte-1ysyaya"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..NFC"><code>NFC</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-ci8if4"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text to be normalized.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NFKC" class="group"></a> <h2 class="relative group"><a id="tokenizersnfkc--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnfkc--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~NFKC ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-rjq8j">NFKC Normalizer.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..NFKC+normalize" class="group"></a> <h3 class="relative group"><a id="nfkcnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nfkcnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>nfkC.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1e9f3f3">Normalize text using NFKC normalization.</p> <p data-svelte-h="svelte-1xa1tv2"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..NFKC"><code>NFKC</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1n56lec"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be normalized.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NFKD" class="group"></a> <h2 class="relative group"><a id="tokenizersnfkd--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnfkd--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~NFKD ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-1k1jv4k">NFKD Normalizer.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..NFKD+normalize" class="group"></a> <h3 class="relative group"><a id="nfkdnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nfkdnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>nfkD.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-ezhhkq">Normalize text using NFKD normalization.</p> <p data-svelte-h="svelte-11z17sm"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..NFKD"><code>NFKD</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1n56lec"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be normalized.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..StripNormalizer" class="group"></a> <h2 class="relative group"><a id="tokenizersstripnormalizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersstripnormalizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~StripNormalizer</span></h2> <p data-svelte-h="svelte-4eye56">A normalizer that strips leading and/or trailing whitespace from the input text.</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..StripNormalizer+normalize" class="group"></a> <h3 class="relative group"><a id="stripnormalizernormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stripnormalizernormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>stripNormalizer.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-vhri9a">Strip leading and/or trailing whitespace from the input text.</p> <p data-svelte-h="svelte-zw21ea"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..StripNormalizer"><code>StripNormalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-u57eej"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..StripAccents" class="group"></a> <h2 class="relative group"><a id="tokenizersstripaccents--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersstripaccents--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~StripAccents ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-1laj15h">StripAccents normalizer removes all accents from the text.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..StripAccents+normalize" class="group"></a> <h3 class="relative group"><a id="stripaccentsnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stripaccentsnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>stripAccents.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1j27yzo">Remove all accents from the text.</p> <p data-svelte-h="svelte-13bo68j"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..StripAccents"><code>StripAccents</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text without accents.</p> <table data-svelte-h="svelte-u57eej"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Lowercase" class="group"></a> <h2 class="relative group"><a id="tokenizerslowercase--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerslowercase--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Lowercase ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-1njfgof">A Normalizer that lowercases the input string.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..Lowercase+normalize" class="group"></a> <h3 class="relative group"><a id="lowercasenormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lowercasenormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>lowercase.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1d19cn0">Lowercases the input string.</p> <p data-svelte-h="svelte-1h5axm"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Lowercase"><code>Lowercase</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Prepend" class="group"></a> <h2 class="relative group"><a id="tokenizersprepend--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersprepend--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Prepend ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-sliawd">A Normalizer that prepends a string to the input string.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <hr> <a id="module_tokenizers..Prepend+normalize" class="group"></a> <h3 class="relative group"><a id="prependnormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#prependnormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>prepend.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1elztq5">Prepends the input string.</p> <p data-svelte-h="svelte-131i334"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Prepend"><code>Prepend</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NormalizerSequence" class="group"></a> <h2 class="relative group"><a id="tokenizersnormalizersequence--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersnormalizersequence--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~NormalizerSequence ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-1752gus">A Normalizer that applies a sequence of Normalizers.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <ul data-svelte-h="svelte-17ix58l"><li><a href="#module_tokenizers..NormalizerSequence">~NormalizerSequence</a> ⇐ <code>Normalizer</code><ul><li><a href="#new_module_tokenizers..NormalizerSequence_new"><code>new NormalizerSequence(config)</code></a></li> <li><a href="#module_tokenizers..NormalizerSequence+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..NormalizerSequence_new" class="group"></a> <h3 class="relative group"><a id="new-normalizersequenceconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-normalizersequenceconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new NormalizerSequence(config)</span></h3> <p data-svelte-h="svelte-11kq2wb">Create a new instance of NormalizerSequence.</p> <table data-svelte-h="svelte-5ym4bl"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td> </tr><tr><td>config.normalizers</td><td><code>Array.<Object></code></td><td><p>An array of Normalizer configuration objects.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..NormalizerSequence+normalize" class="group"></a> <h3 class="relative group"><a id="normalizersequencenormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#normalizersequencenormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>normalizerSequence.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1edabpq">Apply a sequence of Normalizers to the input text.</p> <p data-svelte-h="svelte-pymlm6"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..NormalizerSequence"><code>NormalizerSequence</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertNormalizer" class="group"></a> <h2 class="relative group"><a id="tokenizersbertnormalizer--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbertnormalizer--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BertNormalizer ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-1l2tjxd">A class representing a normalizer used in BERT tokenization.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <ul data-svelte-h="svelte-ld6a7d"><li><a href="#module_tokenizers..BertNormalizer">~BertNormalizer</a> ⇐ <code>Normalizer</code><ul><li><a href="#module_tokenizers..BertNormalizer+_tokenize_chinese_chars"><code>._tokenize_chinese_chars(text)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..BertNormalizer+stripAccents"><code>.stripAccents(text)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..BertNormalizer+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li></ul> <hr> <a id="module_tokenizers..BertNormalizer+_tokenize_chinese_chars" class="group"></a> <h3 class="relative group"><a id="bertnormalizertokenizechinesecharstext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertnormalizertokenizechinesecharstext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertNormalizer._tokenize_chinese_chars(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1bijj0e">Adds whitespace around any CJK (Chinese, Japanese, or Korean) character in the input text.</p> <p data-svelte-h="svelte-185sdhq"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertNormalizer"><code>BertNormalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The tokenized text with whitespace added around CJK characters.</p> <table data-svelte-h="svelte-cxfvn5"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The input text to tokenize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertNormalizer+stripAccents" class="group"></a> <h3 class="relative group"><a id="bertnormalizerstripaccentstext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertnormalizerstripaccentstext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertNormalizer.stripAccents(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1bhx3l9">Strips accents from the given text.</p> <p data-svelte-h="svelte-1dzzyok"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertNormalizer"><code>BertNormalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The text with accents removed.</p> <table data-svelte-h="svelte-o2vd1j"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to strip accents from.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertNormalizer+normalize" class="group"></a> <h3 class="relative group"><a id="bertnormalizernormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertnormalizernormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertNormalizer.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-8ayr2g">Normalizes the given text based on the configuration.</p> <p data-svelte-h="svelte-1j7ytyy"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertNormalizer"><code>BertNormalizer</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerspretokenizer--code-callable-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretokenizer--code-callable-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PreTokenizer ⇐ <code> Callable </code></span></h2> <p data-svelte-h="svelte-1jqub8o">A callable class representing a pre-tokenizer used in tokenization. Subclasses | |
| should implement the <code>pre_tokenize_text</code> method to define the specific pre-tokenization logic.</p> <p data-svelte-h="svelte-1aafbib"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <a href="#Callable"><code>Callable</code></a></p> <ul data-svelte-h="svelte-1bn4cl6"><li><a href="#module_tokenizers..PreTokenizer">~PreTokenizer</a> ⇐ <a href="#Callable"><code>Callable</code></a><ul><li><em>instance</em><ul><li><em><a href="#module_tokenizers..PreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></em></li> <li><a href="#module_tokenizers..PreTokenizer+pre_tokenize"><code>.pre_tokenize(text, [options])</code></a> ⇒ <code>Array.<string></code></li> <li><a href="#module_tokenizers..PreTokenizer+_call"><code>._call(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..PreTokenizer.fromConfig"><code>.fromConfig(config)</code></a> ⇒ <code>PreTokenizer</code></li></ul></li></ul></li></ul> <hr> <a id="module_tokenizers..PreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="pretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-19addtz">Method that should be implemented by subclasses to define the specific pre-tokenization logic.</p> <p data-svelte-h="svelte-1spca8v"><strong>Kind</strong>: instance abstract method of <a href="#module_tokenizers..PreTokenizer"><code>PreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The pre-tokenized text.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-1q3adi"><li><code>Error</code> If the method is not implemented in the subclass.</li></ul> <table data-svelte-h="svelte-zcvat0"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizer+pre_tokenize" class="group"></a> <h3 class="relative group"><a id="pretokenizerpretokenizetext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizerpretokenizetext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTokenizer.pre_tokenize(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-1oc7xq7">Tokenizes the given text into pre-tokens.</p> <p data-svelte-h="svelte-mvzmzf"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PreTokenizer"><code>PreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of pre-tokens.</p> <table data-svelte-h="svelte-1q2ym19"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>Array<string></code></td><td><p>The text or array of texts to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizer+_call" class="group"></a> <h3 class="relative group"><a id="pretokenizercalltext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizercalltext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTokenizer._call(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-baebiw">Alias for <a href="PreTokenizer#pre_tokenize">PreTokenizer#pre_tokenize</a>.</p> <p data-svelte-h="svelte-m5jkl3"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PreTokenizer"><code>PreTokenizer</code></a><br> <strong>Overrides</strong>: <a href="#Callable+_call"><code>_call</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of pre-tokens.</p> <table data-svelte-h="svelte-1q2ym19"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code> | <code>Array<string></code></td><td><p>The text or array of texts to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizer.fromConfig" class="group"></a> <h3 class="relative group"><a id="pretokenizerfromconfigconfig--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizerfromconfigconfig--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PreTokenizer.fromConfig(config) ⇒ <code> PreTokenizer </code></span></h3> <p data-svelte-h="svelte-redbex">Factory method that returns an instance of a subclass of <code>PreTokenizer</code> based on the provided configuration.</p> <p data-svelte-h="svelte-1tdb68h"><strong>Kind</strong>: static method of <a href="#module_tokenizers..PreTokenizer"><code>PreTokenizer</code></a><br> <strong>Returns</strong>: <code>PreTokenizer</code> - An instance of a subclass of <code>PreTokenizer</code>.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-rns9ge"><li><code>Error</code> If the provided configuration object does not correspond to any known pre-tokenizer.</li></ul> <table data-svelte-h="svelte-1ty8cz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>A configuration object for the pre-tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersbertpretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbertpretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BertPreTokenizer ⇐ <code> PreTokenizer </code></span></h2> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-1a68u8"><li><a href="#module_tokenizers..BertPreTokenizer">~BertPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..BertPreTokenizer_new"><code>new BertPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..BertPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..BertPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-bertpretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-bertpretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new BertPreTokenizer(config)</span></h3> <p data-svelte-h="svelte-gtgeht">A PreTokenizer that splits text into wordpieces using a basic tokenization scheme | |
| similar to that used in the original implementation of BERT.</p> <table data-svelte-h="svelte-tworoc"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="bertpretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertpretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-tgmicg">Tokenizes a single text using the BERT pre-tokenization scheme.</p> <p data-svelte-h="svelte-16xyhnz"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertPreTokenizer"><code>BertPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersbytelevelpretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbytelevelpretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ByteLevelPreTokenizer ⇐ <code> PreTokenizer </code></span></h2> <p data-svelte-h="svelte-34r8p">A pre-tokenizer that splits text into Byte-Pair-Encoding (BPE) subwords.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-1owpbdc"><li><a href="#module_tokenizers..ByteLevelPreTokenizer">~ByteLevelPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..ByteLevelPreTokenizer_new"><code>new ByteLevelPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+add_prefix_space"><code>.add_prefix_space</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+trim_offsets"><code>.trim_offsets</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+use_regex"><code>.use_regex</code></a> : <code>boolean</code></li> <li><a href="#module_tokenizers..ByteLevelPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..ByteLevelPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-bytelevelpretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-bytelevelpretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new ByteLevelPreTokenizer(config)</span></h3> <p data-svelte-h="svelte-7elsye">Creates a new instance of the <code>ByteLevelPreTokenizer</code> class.</p> <table data-svelte-h="svelte-tworoc"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer+add_prefix_space" class="group"></a> <h3 class="relative group"><a id="bytelevelpretokenizeraddprefixspace--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpretokenizeraddprefixspace--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPreTokenizer.add_prefix_space : <code> boolean </code></span></h3> <p data-svelte-h="svelte-141kpye">Whether to add a leading space to the first word.This allows to treat the leading word just as any other word.</p> <p data-svelte-h="svelte-c8mfrk"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..ByteLevelPreTokenizer"><code>ByteLevelPreTokenizer</code></a></p> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer+trim_offsets" class="group"></a> <h3 class="relative group"><a id="bytelevelpretokenizertrimoffsets--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpretokenizertrimoffsets--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPreTokenizer.trim_offsets : <code> boolean </code></span></h3> <p data-svelte-h="svelte-1pv6ugb">Whether the post processing step should trim offsetsto avoid including whitespaces.</p> <p data-svelte-h="svelte-1jwkwcb"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..ByteLevelPreTokenizer"><code>ByteLevelPreTokenizer</code></a><br> <strong>Todo</strong></p> <ul data-svelte-h="svelte-1tkofaw"><li>Use this in the pretokenization step.</li></ul> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer+use_regex" class="group"></a> <h3 class="relative group"><a id="bytelevelpretokenizeruseregex--code-boolean-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpretokenizeruseregex--code-boolean-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPreTokenizer.use_regex : <code> boolean </code></span></h3> <p data-svelte-h="svelte-1o4txfk">Whether to use the standard GPT2 regex for whitespace splitting.Set it to False if you want to use your own splitting. Defaults to true.</p> <p data-svelte-h="svelte-c8mfrk"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..ByteLevelPreTokenizer"><code>ByteLevelPreTokenizer</code></a></p> <hr> <a id="module_tokenizers..ByteLevelPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="bytelevelpretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-dyllm1">Tokenizes a single piece of text using byte-level tokenization.</p> <p data-svelte-h="svelte-9ja9mh"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ByteLevelPreTokenizer"><code>ByteLevelPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..SplitPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerssplitpretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerssplitpretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~SplitPreTokenizer ⇐ <code> PreTokenizer </code></span></h2> <p data-svelte-h="svelte-6fldli">Splits text using a given pattern.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-19mec1z"><li><a href="#module_tokenizers..SplitPreTokenizer">~SplitPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..SplitPreTokenizer_new"><code>new SplitPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..SplitPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..SplitPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-splitpretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-splitpretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new SplitPreTokenizer(config)</span></h3> <table data-svelte-h="svelte-18pcmyh"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the pre-tokenizer.</p></td> </tr><tr><td>config.pattern</td><td><code>Object</code></td><td><p>The pattern used to split the text. Can be a string or a regex object.</p></td> </tr><tr><td>config.pattern.String</td><td><code>string</code> | <code>undefined</code></td><td><p>The string to use for splitting. Only defined if the pattern is a string.</p></td> </tr><tr><td>config.pattern.Regex</td><td><code>string</code> | <code>undefined</code></td><td><p>The regex to use for splitting. Only defined if the pattern is a regex.</p></td> </tr><tr><td>config.behavior</td><td><code>SplitDelimiterBehavior</code></td><td><p>The behavior to use when splitting.</p></td> </tr><tr><td>config.invert</td><td><code>boolean</code></td><td><p>Whether to split (invert=false) or match (invert=true) the pattern.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..SplitPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="splitpretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#splitpretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>splitPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-1e9v9pn">Tokenizes text by splitting it using the given pattern.</p> <p data-svelte-h="svelte-1nb2x3d"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..SplitPreTokenizer"><code>SplitPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PunctuationPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerspunctuationpretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspunctuationpretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PunctuationPreTokenizer ⇐ <code> PreTokenizer </code></span></h2> <p data-svelte-h="svelte-o97k8y">Splits text based on punctuation.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-4kya8j"><li><a href="#module_tokenizers..PunctuationPreTokenizer">~PunctuationPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..PunctuationPreTokenizer_new"><code>new PunctuationPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..PunctuationPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..PunctuationPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-punctuationpretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-punctuationpretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PunctuationPreTokenizer(config)</span></h3> <table data-svelte-h="svelte-1t0eat8"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the pre-tokenizer.</p></td> </tr><tr><td>config.behavior</td><td><code>SplitDelimiterBehavior</code></td><td><p>The behavior to use when splitting.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PunctuationPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="punctuationpretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#punctuationpretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>punctuationPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-1e9v9pn">Tokenizes text by splitting it using the given pattern.</p> <p data-svelte-h="svelte-pgfgex"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PunctuationPreTokenizer"><code>PunctuationPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..DigitsPreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersdigitspretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersdigitspretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~DigitsPreTokenizer ⇐ <code> PreTokenizer </code></span></h2> <p data-svelte-h="svelte-1i900bk">Splits text based on digits.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-fuzbfp"><li><a href="#module_tokenizers..DigitsPreTokenizer">~DigitsPreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..DigitsPreTokenizer_new"><code>new DigitsPreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..DigitsPreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..DigitsPreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-digitspretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-digitspretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new DigitsPreTokenizer(config)</span></h3> <table data-svelte-h="svelte-1rz32no"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the pre-tokenizer.</p></td> </tr><tr><td>config.individual_digits</td><td><code>boolean</code></td><td><p>Whether to split on individual digits.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..DigitsPreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="digitspretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#digitspretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>digitsPreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-1e9v9pn">Tokenizes text by splitting it using the given pattern.</p> <p data-svelte-h="svelte-1k2jvw7"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..DigitsPreTokenizer"><code>DigitsPreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of tokens.</p> <table data-svelte-h="svelte-61uqmw"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessor" class="group"></a> <h2 class="relative group"><a id="tokenizerspostprocessor--code-callable-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspostprocessor--code-callable-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PostProcessor ⇐ <code> Callable </code></span></h2> <p data-svelte-h="svelte-1aafbib"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <a href="#Callable"><code>Callable</code></a></p> <ul data-svelte-h="svelte-er11m4"><li><a href="#module_tokenizers..PostProcessor">~PostProcessor</a> ⇐ <a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers..PostProcessor_new"><code>new PostProcessor(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers..PostProcessor+post_process"><code>.post_process(tokens, ...args)</code></a> ⇒ <code>PostProcessedOutput</code></li> <li><a href="#module_tokenizers..PostProcessor+_call"><code>._call(tokens, ...args)</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..PostProcessor.fromConfig"><code>.fromConfig(config)</code></a> ⇒ <code>PostProcessor</code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers..PostProcessor_new" class="group"></a> <h3 class="relative group"><a id="new-postprocessorconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-postprocessorconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PostProcessor(config)</span></h3> <table data-svelte-h="svelte-m3g71k"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration for the post-processor.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessor+post_process" class="group"></a> <h3 class="relative group"><a id="postprocessorpostprocesstokens-args--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessorpostprocesstokens-args--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>postProcessor.post_process(tokens, ...args) ⇒ <code> PostProcessedOutput </code></span></h3> <p data-svelte-h="svelte-14whgj2">Method to be implemented in subclass to apply post-processing on the given tokens.</p> <p data-svelte-h="svelte-ufqaef"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PostProcessor"><code>PostProcessor</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - The post-processed tokens.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-6hfrhb"><li><code>Error</code> If the method is not implemented in subclass.</li></ul> <table data-svelte-h="svelte-x1f9dp"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array</code></td><td><p>The input tokens to be post-processed.</p></td> </tr><tr><td>...args</td><td><code>*</code></td><td><p>Additional arguments required by the post-processing logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessor+_call" class="group"></a> <h3 class="relative group"><a id="postprocessorcalltokens-args--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessorcalltokens-args--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>postProcessor._call(tokens, ...args) ⇒ <code> PostProcessedOutput </code></span></h3> <p data-svelte-h="svelte-1h8wzy0">Alias for <a href="PostProcessor#post_process">PostProcessor#post_process</a>.</p> <p data-svelte-h="svelte-pyh1lv"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PostProcessor"><code>PostProcessor</code></a><br> <strong>Overrides</strong>: <a href="#Callable+_call"><code>_call</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - The post-processed tokens.</p> <table data-svelte-h="svelte-bnut61"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array</code></td><td><p>The text or array of texts to post-process.</p></td> </tr><tr><td>...args</td><td><code>*</code></td><td><p>Additional arguments required by the post-processing logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessor.fromConfig" class="group"></a> <h3 class="relative group"><a id="postprocessorfromconfigconfig--code-postprocessor-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessorfromconfigconfig--code-postprocessor-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PostProcessor.fromConfig(config) ⇒ <code> PostProcessor </code></span></h3> <p data-svelte-h="svelte-44djt6">Factory method to create a PostProcessor object from a configuration object.</p> <p data-svelte-h="svelte-1b7ak99"><strong>Kind</strong>: static method of <a href="#module_tokenizers..PostProcessor"><code>PostProcessor</code></a><br> <strong>Returns</strong>: <code>PostProcessor</code> - A PostProcessor object created from the given configuration.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-13th0qu"><li><code>Error</code> If an unknown PostProcessor type is encountered.</li></ul> <table data-svelte-h="svelte-v4jm0i"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>Configuration object representing a PostProcessor.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertProcessing" class="group"></a> <h2 class="relative group"><a id="tokenizersbertprocessing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbertprocessing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BertProcessing</span></h2> <p data-svelte-h="svelte-jv2j77">A post-processor that adds special tokens to the beginning and end of the input.</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-1fzs36r"><li><a href="#module_tokenizers..BertProcessing">~BertProcessing</a><ul><li><a href="#new_module_tokenizers..BertProcessing_new"><code>new BertProcessing(config)</code></a></li> <li><a href="#module_tokenizers..BertProcessing+post_process"><code>.post_process(tokens, [tokens_pair])</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..BertProcessing_new" class="group"></a> <h3 class="relative group"><a id="new-bertprocessingconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-bertprocessingconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new BertProcessing(config)</span></h3> <table data-svelte-h="svelte-1bxe8xu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration for the post-processor.</p></td> </tr><tr><td>config.cls</td><td><code>Array.<string></code></td><td><p>The special tokens to add to the beginning of the input.</p></td> </tr><tr><td>config.sep</td><td><code>Array.<string></code></td><td><p>The special tokens to add to the end of the input.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BertProcessing+post_process" class="group"></a> <h3 class="relative group"><a id="bertprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bertprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>bertProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code></span></h3> <p data-svelte-h="svelte-jf8fq9">Adds the special tokens to the beginning and end of the input.</p> <p data-svelte-h="svelte-wj6rsa"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..BertProcessing"><code>BertProcessing</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - The post-processed tokens with the special tokens added to the beginning and end.</p> <table data-svelte-h="svelte-1t1br86"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td></td><td><p>The input tokens.</p></td> </tr><tr><td>[tokens_pair]</td><td><code>Array.<string></code></td><td><code></code></td><td><p>An optional second set of input tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..TemplateProcessing" class="group"></a> <h2 class="relative group"><a id="tokenizerstemplateprocessing--code-postprocessor-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerstemplateprocessing--code-postprocessor-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~TemplateProcessing ⇐ <code> PostProcessor </code></span></h2> <p data-svelte-h="svelte-1byklnf">Post processor that replaces special tokens in a template with actual tokens.</p> <p data-svelte-h="svelte-109ectr"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PostProcessor</code></p> <ul data-svelte-h="svelte-18at4lt"><li><a href="#module_tokenizers..TemplateProcessing">~TemplateProcessing</a> ⇐ <code>PostProcessor</code><ul><li><a href="#new_module_tokenizers..TemplateProcessing_new"><code>new TemplateProcessing(config)</code></a></li> <li><a href="#module_tokenizers..TemplateProcessing+post_process"><code>.post_process(tokens, [tokens_pair])</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..TemplateProcessing_new" class="group"></a> <h3 class="relative group"><a id="new-templateprocessingconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-templateprocessingconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new TemplateProcessing(config)</span></h3> <p data-svelte-h="svelte-ggislo">Creates a new instance of <code>TemplateProcessing</code>.</p> <table data-svelte-h="svelte-1jenfln"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the post processor.</p></td> </tr><tr><td>config.single</td><td><code>Array</code></td><td><p>The template for a single sequence of tokens.</p></td> </tr><tr><td>config.pair</td><td><code>Array</code></td><td><p>The template for a pair of sequences of tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..TemplateProcessing+post_process" class="group"></a> <h3 class="relative group"><a id="templateprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#templateprocessingpostprocesstokens-tokenspair--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>templateProcessing.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code></span></h3> <p data-svelte-h="svelte-14th5ew">Replaces special tokens in the template with actual tokens.</p> <p data-svelte-h="svelte-ayon61"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..TemplateProcessing"><code>TemplateProcessing</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - An object containing the list of tokens with the special tokens replaced with actual tokens.</p> <table data-svelte-h="svelte-bwvwli"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td></td><td><p>The list of tokens for the first sequence.</p></td> </tr><tr><td>[tokens_pair]</td><td><code>Array.<string></code></td><td><code></code></td><td><p>The list of tokens for the second sequence (optional).</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelPostProcessor" class="group"></a> <h2 class="relative group"><a id="tokenizersbytelevelpostprocessor--code-postprocessor-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbytelevelpostprocessor--code-postprocessor-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ByteLevelPostProcessor ⇐ <code> PostProcessor </code></span></h2> <p data-svelte-h="svelte-1vpbvt9">A PostProcessor that returns the given tokens as is.</p> <p data-svelte-h="svelte-109ectr"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PostProcessor</code></p> <hr> <a id="module_tokenizers..ByteLevelPostProcessor+post_process" class="group"></a> <h3 class="relative group"><a id="bytelevelpostprocessorpostprocesstokens-tokenspair--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bytelevelpostprocessorpostprocesstokens-tokenspair--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelPostProcessor.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code></span></h3> <p data-svelte-h="svelte-1f82mhb">Post process the given tokens.</p> <p data-svelte-h="svelte-1vf7qcj"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ByteLevelPostProcessor"><code>ByteLevelPostProcessor</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - An object containing the post-processed tokens.</p> <table data-svelte-h="svelte-bwvwli"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td></td><td><p>The list of tokens for the first sequence.</p></td> </tr><tr><td>[tokens_pair]</td><td><code>Array.<string></code></td><td><code></code></td><td><p>The list of tokens for the second sequence (optional).</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessorSequence" class="group"></a> <h2 class="relative group"><a id="tokenizerspostprocessorsequence" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspostprocessorsequence"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PostProcessorSequence</span></h2> <p data-svelte-h="svelte-jlzqc1">A post-processor that applies multiple post-processors in sequence.</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-160svsg"><li><a href="#module_tokenizers..PostProcessorSequence">~PostProcessorSequence</a><ul><li><a href="#new_module_tokenizers..PostProcessorSequence_new"><code>new PostProcessorSequence(config)</code></a></li> <li><a href="#module_tokenizers..PostProcessorSequence+post_process"><code>.post_process(tokens, [tokens_pair])</code></a> ⇒ <code>PostProcessedOutput</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..PostProcessorSequence_new" class="group"></a> <h3 class="relative group"><a id="new-postprocessorsequenceconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-postprocessorsequenceconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PostProcessorSequence(config)</span></h3> <p data-svelte-h="svelte-1i7r42h">Creates a new instance of PostProcessorSequence.</p> <table data-svelte-h="svelte-9gd1xf"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td> </tr><tr><td>config.processors</td><td><code>Array.<Object></code></td><td><p>The list of post-processors to apply.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PostProcessorSequence+post_process" class="group"></a> <h3 class="relative group"><a id="postprocessorsequencepostprocesstokens-tokenspair--code-postprocessedoutput-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#postprocessorsequencepostprocesstokens-tokenspair--code-postprocessedoutput-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>postProcessorSequence.post_process(tokens, [tokens_pair]) ⇒ <code> PostProcessedOutput </code></span></h3> <p data-svelte-h="svelte-1f82mhb">Post process the given tokens.</p> <p data-svelte-h="svelte-iyek03"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PostProcessorSequence"><code>PostProcessorSequence</code></a><br> <strong>Returns</strong>: <code>PostProcessedOutput</code> - An object containing the post-processed tokens.</p> <table data-svelte-h="svelte-bwvwli"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td></td><td><p>The list of tokens for the first sequence.</p></td> </tr><tr><td>[tokens_pair]</td><td><code>Array.<string></code></td><td><code></code></td><td><p>The list of tokens for the second sequence (optional).</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder" class="group"></a> <h2 class="relative group"><a id="tokenizersdecoder--code-callable-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersdecoder--code-callable-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Decoder ⇐ <code> Callable </code></span></h2> <p data-svelte-h="svelte-155b6hh">The base class for token decoders.</p> <p data-svelte-h="svelte-1aafbib"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <a href="#Callable"><code>Callable</code></a></p> <ul data-svelte-h="svelte-13kifg7"><li><a href="#module_tokenizers..Decoder">~Decoder</a> ⇐ <a href="#Callable"><code>Callable</code></a><ul><li><a href="#new_module_tokenizers..Decoder_new"><code>new Decoder(config)</code></a></li> <li><em>instance</em><ul><li><a href="#module_tokenizers..Decoder+added_tokens"><code>.added_tokens</code></a> : <code>Array.<AddedToken></code></li> <li><a href="#module_tokenizers..Decoder+_call"><code>._call(tokens)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..Decoder+decode"><code>.decode(tokens)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..Decoder+decode_chain"><code>.decode_chain(tokens)</code></a> ⇒ <code>Array.<string></code></li></ul></li> <li><em>static</em><ul><li><a href="#module_tokenizers..Decoder.fromConfig"><code>.fromConfig(config)</code></a> ⇒ <code>Decoder</code></li></ul></li></ul></li></ul> <hr> <a id="new_module_tokenizers..Decoder_new" class="group"></a> <h3 class="relative group"><a id="new-decoderconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-decoderconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new Decoder(config)</span></h3> <p data-svelte-h="svelte-1bygwbp">Creates an instance of <code>Decoder</code>.</p> <table data-svelte-h="svelte-tworoc"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder+added_tokens" class="group"></a> <h3 class="relative group"><a id="decoderaddedtokens--code-array--addedtoken--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decoderaddedtokens--code-array--addedtoken--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoder.added_tokens : <code> Array. < AddedToken > </code></span></h3> <p data-svelte-h="svelte-1wueo48"><strong>Kind</strong>: instance property of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a></p> <hr> <a id="module_tokenizers..Decoder+_call" class="group"></a> <h3 class="relative group"><a id="decodercalltokens--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decodercalltokens--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoder._call(tokens) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1qx0w3k">Calls the <code>decode</code> method.</p> <p data-svelte-h="svelte-j6vdfv"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a><br> <strong>Overrides</strong>: <a href="#Callable+_call"><code>_call</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.</p> <table data-svelte-h="svelte-1kujoeu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The list of tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder+decode" class="group"></a> <h3 class="relative group"><a id="decoderdecodetokens--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decoderdecodetokens--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoder.decode(tokens) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1hqb24l">Decodes a list of tokens.</p> <p data-svelte-h="svelte-10rmqcn"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.</p> <table data-svelte-h="svelte-1kujoeu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The list of tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="decoderdecodechaintokens--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decoderdecodechaintokens--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoder.decode_chain(tokens) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-wbd0b6">Apply the decoder to a list of tokens.</p> <p data-svelte-h="svelte-jth48o"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The decoded list of tokens.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-5buzwk"><li><code>Error</code> If the `decode_chain` method is not implemented in the subclass.</li></ul> <table data-svelte-h="svelte-1kujoeu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>The list of tokens.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Decoder.fromConfig" class="group"></a> <h3 class="relative group"><a id="decoderfromconfigconfig--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decoderfromconfigconfig--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Decoder.fromConfig(config) ⇒ <code> Decoder </code></span></h3> <p data-svelte-h="svelte-zfgnbx">Creates a decoder instance based on the provided configuration.</p> <p data-svelte-h="svelte-1tr8nt"><strong>Kind</strong>: static method of <a href="#module_tokenizers..Decoder"><code>Decoder</code></a><br> <strong>Returns</strong>: <code>Decoder</code> - A decoder instance.<br> <strong>Throws</strong>:</p> <ul data-svelte-h="svelte-lb27nh"><li><code>Error</code> If an unknown decoder type is provided.</li></ul> <table data-svelte-h="svelte-tworoc"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..FuseDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizersfusedecoder" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersfusedecoder"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~FuseDecoder</span></h2> <p data-svelte-h="svelte-f5h399">Fuse simply fuses all tokens into one big string. | |
| It’s usually the last decoding step anyway, but this decoder | |
| exists incase some decoders need to happen after that step</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..FuseDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="fusedecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fusedecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>fuseDecoder.decode_chain() : <code> * </code></span></h3> <p data-svelte-h="svelte-1hzjpri"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..FuseDecoder"><code>FuseDecoder</code></a></p> <hr> <a id="module_tokenizers..WordPieceDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizerswordpiecedecoder--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswordpiecedecoder--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~WordPieceDecoder ⇐ <code> Decoder </code></span></h2> <p data-svelte-h="svelte-1m2xybh">A decoder that decodes a list of WordPiece tokens into a single string.</p> <p data-svelte-h="svelte-nbdqst"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Decoder</code></p> <ul data-svelte-h="svelte-1p8wow"><li><a href="#module_tokenizers..WordPieceDecoder">~WordPieceDecoder</a> ⇐ <code>Decoder</code><ul><li><a href="#new_module_tokenizers..WordPieceDecoder_new"><code>new WordPieceDecoder(config)</code></a></li> <li><a href="#module_tokenizers..WordPieceDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..WordPieceDecoder_new" class="group"></a> <h3 class="relative group"><a id="new-wordpiecedecoderconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-wordpiecedecoderconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new WordPieceDecoder(config)</span></h3> <p data-svelte-h="svelte-emne3c">Creates a new instance of WordPieceDecoder.</p> <table data-svelte-h="svelte-6ky2kn"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td> </tr><tr><td>config.prefix</td><td><code>string</code></td><td><p>The prefix used for WordPiece encoding.</p></td> </tr><tr><td>config.cleanup</td><td><code>boolean</code></td><td><p>Whether to cleanup the decoded string.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WordPieceDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="wordpiecedecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#wordpiecedecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>wordPieceDecoder.decode_chain() : <code> * </code></span></h3> <p data-svelte-h="svelte-100h0ya"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..WordPieceDecoder"><code>WordPieceDecoder</code></a></p> <hr> <a id="module_tokenizers..ByteLevelDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizersbyteleveldecoder--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbyteleveldecoder--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ByteLevelDecoder ⇐ <code> Decoder </code></span></h2> <p data-svelte-h="svelte-1312arw">Byte-level decoder for tokenization output. Inherits from the <code>Decoder</code> class.</p> <p data-svelte-h="svelte-nbdqst"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Decoder</code></p> <ul data-svelte-h="svelte-pxh1vl"><li><a href="#module_tokenizers..ByteLevelDecoder">~ByteLevelDecoder</a> ⇐ <code>Decoder</code><ul><li><a href="#new_module_tokenizers..ByteLevelDecoder_new"><code>new ByteLevelDecoder(config)</code></a></li> <li><a href="#module_tokenizers..ByteLevelDecoder+convert_tokens_to_string"><code>.convert_tokens_to_string(tokens)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..ByteLevelDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..ByteLevelDecoder_new" class="group"></a> <h3 class="relative group"><a id="new-byteleveldecoderconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-byteleveldecoderconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new ByteLevelDecoder(config)</span></h3> <p data-svelte-h="svelte-1sje6rv">Create a <code>ByteLevelDecoder</code> object.</p> <table data-svelte-h="svelte-kkg20v"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>Configuration object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelDecoder+convert_tokens_to_string" class="group"></a> <h3 class="relative group"><a id="byteleveldecoderconverttokenstostringtokens--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#byteleveldecoderconverttokenstostringtokens--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1b6r5pz">Convert an array of tokens to string by decoding each byte.</p> <p data-svelte-h="svelte-1vs4rgl"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ByteLevelDecoder"><code>ByteLevelDecoder</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.</p> <table data-svelte-h="svelte-unswmu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>Array of tokens to be decoded.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ByteLevelDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="byteleveldecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#byteleveldecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>byteLevelDecoder.decode_chain() : <code> * </code></span></h3> <p data-svelte-h="svelte-1dwsqre"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ByteLevelDecoder"><code>ByteLevelDecoder</code></a></p> <hr> <a id="module_tokenizers..CTCDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizersctcdecoder" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersctcdecoder"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~CTCDecoder</span></h2> <p data-svelte-h="svelte-zf06vq">The CTC (Connectionist Temporal Classification) decoder. | |
| See <a href="https://github.com/huggingface/tokenizers/blob/bb38f390a61883fc2f29d659af696f428d1cda6b/tokenizers/src/decoders/ctc.rs" rel="nofollow">https://github.com/huggingface/tokenizers/blob/bb38f390a61883fc2f29d659af696f428d1cda6b/tokenizers/src/decoders/ctc.rs</a></p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-1lodx12"><li><a href="#module_tokenizers..CTCDecoder">~CTCDecoder</a><ul><li><a href="#module_tokenizers..CTCDecoder+convert_tokens_to_string"><code>.convert_tokens_to_string(tokens)</code></a> ⇒ <code>string</code></li> <li><a href="#module_tokenizers..CTCDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="module_tokenizers..CTCDecoder+convert_tokens_to_string" class="group"></a> <h3 class="relative group"><a id="ctcdecoderconverttokenstostringtokens--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ctcdecoderconverttokenstostringtokens--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ctcDecoder.convert_tokens_to_string(tokens) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1gchkl">Converts a connectionist-temporal-classification (CTC) output tokens into a single string.</p> <p data-svelte-h="svelte-pchset"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..CTCDecoder"><code>CTCDecoder</code></a><br> <strong>Returns</strong>: <code>string</code> - The decoded string.</p> <table data-svelte-h="svelte-unswmu"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>Array of tokens to be decoded.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..CTCDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="ctcdecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ctcdecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ctcDecoder.decode_chain() : <code> * </code></span></h3> <p data-svelte-h="svelte-jnewq"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..CTCDecoder"><code>CTCDecoder</code></a></p> <hr> <a id="module_tokenizers..DecoderSequence" class="group"></a> <h2 class="relative group"><a id="tokenizersdecodersequence--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersdecodersequence--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~DecoderSequence ⇐ <code> Decoder </code></span></h2> <p data-svelte-h="svelte-16p2zks">Apply a sequence of decoders.</p> <p data-svelte-h="svelte-nbdqst"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Decoder</code></p> <ul data-svelte-h="svelte-19pe06l"><li><a href="#module_tokenizers..DecoderSequence">~DecoderSequence</a> ⇐ <code>Decoder</code><ul><li><a href="#new_module_tokenizers..DecoderSequence_new"><code>new DecoderSequence(config)</code></a></li> <li><a href="#module_tokenizers..DecoderSequence+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..DecoderSequence_new" class="group"></a> <h3 class="relative group"><a id="new-decodersequenceconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-decodersequenceconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new DecoderSequence(config)</span></h3> <p data-svelte-h="svelte-1gk4xdv">Creates a new instance of DecoderSequence.</p> <table data-svelte-h="svelte-1ip0x80"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object.</p></td> </tr><tr><td>config.decoders</td><td><code>Array.<Object></code></td><td><p>The list of decoders to apply.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..DecoderSequence+decode_chain" class="group"></a> <h3 class="relative group"><a id="decodersequencedecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#decodersequencedecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>decoderSequence.decode_chain() : <code> * </code></span></h3> <p data-svelte-h="svelte-1jmxaf6"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..DecoderSequence"><code>DecoderSequence</code></a></p> <hr> <a id="module_tokenizers..MetaspacePreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersmetaspacepretokenizer--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmetaspacepretokenizer--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~MetaspacePreTokenizer ⇐ <code> PreTokenizer </code></span></h2> <p data-svelte-h="svelte-o07wl1">This PreTokenizer replaces spaces with the given replacement character, adds a prefix space if requested, | |
| and returns a list of tokens.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-xjpnfg"><li><a href="#module_tokenizers..MetaspacePreTokenizer">~MetaspacePreTokenizer</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..MetaspacePreTokenizer_new"><code>new MetaspacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..MetaspacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..MetaspacePreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-metaspacepretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-metaspacepretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new MetaspacePreTokenizer(config)</span></h3> <table data-svelte-h="svelte-1sg6gza"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td></td><td><p>The configuration object for the MetaspacePreTokenizer.</p></td> </tr><tr><td>config.add_prefix_space</td><td><code>boolean</code></td><td></td><td><p>Whether to add a prefix space to the first token.</p></td> </tr><tr><td>config.replacement</td><td><code>string</code></td><td></td><td><p>The character to replace spaces with.</p></td> </tr><tr><td>[config.str_rep]</td><td><code>string</code></td><td><code>"config.replacement"</code></td><td><p>An optional string representation of the replacement character.</p></td> </tr><tr><td>[config.prepend_scheme]</td><td><code>'first'</code> | <code>'never'</code> | <code>'always'</code></td><td><code>'always'</code></td><td><p>The metaspace prepending scheme.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..MetaspacePreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="metaspacepretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#metaspacepretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>metaspacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-i68fsj">This method takes a string, replaces spaces with the replacement character, | |
| adds a prefix space if requested, and returns a new list of tokens.</p> <p data-svelte-h="svelte-8kprtf"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..MetaspacePreTokenizer"><code>MetaspacePreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - A new list of pre-tokenized tokens.</p> <table data-svelte-h="svelte-mb6l9"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>The options for the pre-tokenization.</p></td> </tr><tr><td>[options.section_index]</td><td><code>number</code></td><td><p>The index of the section to pre-tokenize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..MetaspaceDecoder" class="group"></a> <h2 class="relative group"><a id="tokenizersmetaspacedecoder--code-decoder-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmetaspacedecoder--code-decoder-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~MetaspaceDecoder ⇐ <code> Decoder </code></span></h2> <p data-svelte-h="svelte-kf7suv">MetaspaceDecoder class extends the Decoder class and decodes Metaspace tokenization.</p> <p data-svelte-h="svelte-nbdqst"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Decoder</code></p> <ul data-svelte-h="svelte-5xe7kn"><li><a href="#module_tokenizers..MetaspaceDecoder">~MetaspaceDecoder</a> ⇐ <code>Decoder</code><ul><li><a href="#new_module_tokenizers..MetaspaceDecoder_new"><code>new MetaspaceDecoder(config)</code></a></li> <li><a href="#module_tokenizers..MetaspaceDecoder+decode_chain"><code>.decode_chain()</code></a> : <code>*</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..MetaspaceDecoder_new" class="group"></a> <h3 class="relative group"><a id="new-metaspacedecoderconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-metaspacedecoderconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new MetaspaceDecoder(config)</span></h3> <p data-svelte-h="svelte-44mrh1">Constructs a new MetaspaceDecoder object.</p> <table data-svelte-h="svelte-669i62"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the MetaspaceDecoder.</p></td> </tr><tr><td>config.add_prefix_space</td><td><code>boolean</code></td><td><p>Whether to add a prefix space to the decoded string.</p></td> </tr><tr><td>config.replacement</td><td><code>string</code></td><td><p>The string to replace spaces with.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..MetaspaceDecoder+decode_chain" class="group"></a> <h3 class="relative group"><a id="metaspacedecoderdecodechain--code--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#metaspacedecoderdecodechain--code--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>metaspaceDecoder.decode_chain() : <code> * </code></span></h3> <p data-svelte-h="svelte-hmubey"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..MetaspaceDecoder"><code>MetaspaceDecoder</code></a></p> <hr> <a id="module_tokenizers..Precompiled" class="group"></a> <h2 class="relative group"><a id="tokenizersprecompiled--code-normalizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersprecompiled--code-normalizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Precompiled ⇐ <code> Normalizer </code></span></h2> <p data-svelte-h="svelte-obvisk">A normalizer that applies a precompiled charsmap. | |
| This is useful for applying complex normalizations in C++ and exposing them to JavaScript.</p> <p data-svelte-h="svelte-s0e1a"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>Normalizer</code></p> <ul data-svelte-h="svelte-1go7hlr"><li><a href="#module_tokenizers..Precompiled">~Precompiled</a> ⇐ <code>Normalizer</code><ul><li><a href="#new_module_tokenizers..Precompiled_new"><code>new Precompiled(config)</code></a></li> <li><a href="#module_tokenizers..Precompiled+normalize"><code>.normalize(text)</code></a> ⇒ <code>string</code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..Precompiled_new" class="group"></a> <h3 class="relative group"><a id="new-precompiledconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-precompiledconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new Precompiled(config)</span></h3> <p data-svelte-h="svelte-vphs3k">Create a new instance of Precompiled normalizer.</p> <table data-svelte-h="svelte-1fprcm1"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the Precompiled normalizer.</p></td> </tr><tr><td>config.precompiled_charsmap</td><td><code>Object</code></td><td><p>The precompiled charsmap object.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Precompiled+normalize" class="group"></a> <h3 class="relative group"><a id="precompilednormalizetext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#precompilednormalizetext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>precompiled.normalize(text) ⇒ <code> string </code></span></h3> <p data-svelte-h="svelte-1kg0a1i">Normalizes the given text by applying the precompiled charsmap.</p> <p data-svelte-h="svelte-1cck924"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..Precompiled"><code>Precompiled</code></a><br> <strong>Returns</strong>: <code>string</code> - The normalized text.</p> <table data-svelte-h="svelte-1x3bnnd"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to normalize.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizerSequence" class="group"></a> <h2 class="relative group"><a id="tokenizerspretokenizersequence--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretokenizersequence--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PreTokenizerSequence ⇐ <code> PreTokenizer </code></span></h2> <p data-svelte-h="svelte-1hneoxf">A pre-tokenizer that applies a sequence of pre-tokenizers to the input text.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-1wq9tve"><li><a href="#module_tokenizers..PreTokenizerSequence">~PreTokenizerSequence</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..PreTokenizerSequence_new"><code>new PreTokenizerSequence(config)</code></a></li> <li><a href="#module_tokenizers..PreTokenizerSequence+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..PreTokenizerSequence_new" class="group"></a> <h3 class="relative group"><a id="new-pretokenizersequenceconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-pretokenizersequenceconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new PreTokenizerSequence(config)</span></h3> <p data-svelte-h="svelte-f6z5j5">Creates an instance of PreTokenizerSequence.</p> <table data-svelte-h="svelte-3gpd27"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the pre-tokenizer sequence.</p></td> </tr><tr><td>config.pretokenizers</td><td><code>Array.<Object></code></td><td><p>An array of pre-tokenizer configurations.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PreTokenizerSequence+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="pretokenizersequencepretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pretokenizersequencepretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>preTokenizerSequence.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-cexh8w">Applies each pre-tokenizer in the sequence to the input text in turn.</p> <p data-svelte-h="svelte-v7wifj"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..PreTokenizerSequence"><code>PreTokenizerSequence</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The pre-tokenized text.</p> <table data-svelte-h="svelte-zcvat0"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to pre-tokenize.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WhitespacePreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizerswhitespacepretokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswhitespacepretokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~WhitespacePreTokenizer</span></h2> <p data-svelte-h="svelte-1xsklq1">Splits on word boundaries (using the following regular expression: <code>\w+|[^\w\s]+</code>).</p> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-1pxidrf"><li><a href="#module_tokenizers..WhitespacePreTokenizer">~WhitespacePreTokenizer</a><ul><li><a href="#new_module_tokenizers..WhitespacePreTokenizer_new"><code>new WhitespacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..WhitespacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..WhitespacePreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-whitespacepretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-whitespacepretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new WhitespacePreTokenizer(config)</span></h3> <p data-svelte-h="svelte-1eny2m5">Creates an instance of WhitespacePreTokenizer.</p> <table data-svelte-h="svelte-q9tfiz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the pre-tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WhitespacePreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="whitespacepretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whitespacepretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>whitespacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-j2p7wj">Pre-tokenizes the input text by splitting it on word boundaries.</p> <p data-svelte-h="svelte-rjqdv6"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..WhitespacePreTokenizer"><code>WhitespacePreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of tokens produced by splitting the input text on whitespace.</p> <table data-svelte-h="svelte-12f9sp1"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be pre-tokenized.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WhitespaceSplit" class="group"></a> <h2 class="relative group"><a id="tokenizerswhitespacesplit--code-pretokenizer-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswhitespacesplit--code-pretokenizer-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~WhitespaceSplit ⇐ <code> PreTokenizer </code></span></h2> <p data-svelte-h="svelte-6e66pa">Splits a string of text by whitespace characters into individual tokens.</p> <p data-svelte-h="svelte-7m4c3f"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Extends</strong>: <code>PreTokenizer</code></p> <ul data-svelte-h="svelte-i2abig"><li><a href="#module_tokenizers..WhitespaceSplit">~WhitespaceSplit</a> ⇐ <code>PreTokenizer</code><ul><li><a href="#new_module_tokenizers..WhitespaceSplit_new"><code>new WhitespaceSplit(config)</code></a></li> <li><a href="#module_tokenizers..WhitespaceSplit+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..WhitespaceSplit_new" class="group"></a> <h3 class="relative group"><a id="new-whitespacesplitconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-whitespacesplitconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new WhitespaceSplit(config)</span></h3> <p data-svelte-h="svelte-19yr1r7">Creates an instance of WhitespaceSplit.</p> <table data-svelte-h="svelte-q9tfiz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration object for the pre-tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..WhitespaceSplit+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="whitespacesplitpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#whitespacesplitpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>whitespaceSplit.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-s2po1q">Pre-tokenizes the input text by splitting it on whitespace characters.</p> <p data-svelte-h="svelte-gmi99a"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..WhitespaceSplit"><code>WhitespaceSplit</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of tokens produced by splitting the input text on whitespace.</p> <table data-svelte-h="svelte-12f9sp1"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be pre-tokenized.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ReplacePreTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersreplacepretokenizer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersreplacepretokenizer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~ReplacePreTokenizer</span></h2> <p data-svelte-h="svelte-134ima0"><strong>Kind</strong>: inner class of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <ul data-svelte-h="svelte-n2rage"><li><a href="#module_tokenizers..ReplacePreTokenizer">~ReplacePreTokenizer</a><ul><li><a href="#new_module_tokenizers..ReplacePreTokenizer_new"><code>new ReplacePreTokenizer(config)</code></a></li> <li><a href="#module_tokenizers..ReplacePreTokenizer+pre_tokenize_text"><code>.pre_tokenize_text(text, [options])</code></a> ⇒ <code>Array.<string></code></li></ul></li></ul> <hr> <a id="new_module_tokenizers..ReplacePreTokenizer_new" class="group"></a> <h3 class="relative group"><a id="new-replacepretokenizerconfig" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#new-replacepretokenizerconfig"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>new ReplacePreTokenizer(config)</span></h3> <table data-svelte-h="svelte-8tvu8r"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>config</td><td><code>Object</code></td><td><p>The configuration options for the pre-tokenizer.</p></td> </tr><tr><td>config.pattern</td><td><code>Object</code></td><td><p>The pattern used to split the text. Can be a string or a regex object.</p></td> </tr><tr><td>config.content</td><td><code>string</code></td><td><p>What to replace the pattern with.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..ReplacePreTokenizer+pre_tokenize_text" class="group"></a> <h3 class="relative group"><a id="replacepretokenizerpretokenizetexttext-options--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#replacepretokenizerpretokenizetexttext-options--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>replacePreTokenizer.pre_tokenize_text(text, [options]) ⇒ <code> Array. < string > </code></span></h3> <p data-svelte-h="svelte-167jpma">Pre-tokenizes the input text by replacing certain characters.</p> <p data-svelte-h="svelte-1605wdl"><strong>Kind</strong>: instance method of <a href="#module_tokenizers..ReplacePreTokenizer"><code>ReplacePreTokenizer</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - An array of tokens produced by replacing certain characters.</p> <table data-svelte-h="svelte-12f9sp1"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to be pre-tokenized.</p></td> </tr><tr><td>[options]</td><td><code>Object</code></td><td><p>Additional options for the pre-tokenization logic.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BYTES_TO_UNICODE" class="group"></a> <h2 class="relative group"><a id="tokenizersbytestounicode--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbytestounicode--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BYTES_TO_UNICODE ⇒ <code> Object </code></span></h2> <p data-svelte-h="svelte-9bnea6">Returns list of utf-8 byte and a mapping to unicode strings. | |
| Specifically avoids mapping to whitespace/control characters the BPE code barfs on.</p> <p data-svelte-h="svelte-6ckyyb"><strong>Kind</strong>: inner constant of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Object</code> - Object with utf-8 byte keys and unicode string values.</p> <hr> <a id="module_tokenizers..loadTokenizer" class="group"></a> <h2 class="relative group"><a id="tokenizersloadtokenizerpretrainedmodelnameorpath-options--code-promise--array--any---code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersloadtokenizerpretrainedmodelnameorpath-options--code-promise--array--any---code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~loadTokenizer(pretrained_model_name_or_path, options) ⇒ <code> Promise. < Array < any > > </code></span></h2> <p data-svelte-h="svelte-reckhh">Loads a tokenizer from the specified path.</p> <p data-svelte-h="svelte-13ziayt"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Promise.<Array<any>></code> - A promise that resolves with information about the loaded tokenizer.</p> <table data-svelte-h="svelte-1p1jwnz"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>pretrained_model_name_or_path</td><td><code>string</code></td><td><p>The path to the tokenizer directory.</p></td> </tr><tr><td>options</td><td><code>PretrainedTokenizerOptions</code></td><td><p>Additional options for loading the tokenizer.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..regexSplit" class="group"></a> <h2 class="relative group"><a id="tokenizersregexsplittext-regex--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersregexsplittext-regex--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~regexSplit(text, regex) ⇒ <code> Array. < string > </code></span></h2> <p data-svelte-h="svelte-tsn1ig">Helper function to split a string on a regex, but keep the delimiters. | |
| This is required, because the JavaScript <code>.split()</code> method does not keep the delimiters, | |
| and wrapping in a capturing group causes issues with existing capturing groups (due to nesting).</p> <p data-svelte-h="svelte-j4end5"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The split string.</p> <table data-svelte-h="svelte-guhl6k"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to split.</p></td> </tr><tr><td>regex</td><td><code>RegExp</code></td><td><p>The regex to split on.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..createPattern" class="group"></a> <h2 class="relative group"><a id="tokenizerscreatepatternpattern-invert--code-regexp-code--code-null-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerscreatepatternpattern-invert--code-regexp-code--code-null-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~createPattern(pattern, invert) ⇒ <code> RegExp </code> | <code> null </code></span></h2> <p data-svelte-h="svelte-9yqxaa">Helper method to construct a pattern from a config object.</p> <p data-svelte-h="svelte-1tcd95m"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>RegExp</code> | <code>null</code> - The compiled pattern.</p> <table data-svelte-h="svelte-2irxm"><thead><tr><th>Param</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>pattern</td><td><code>Object</code></td><td></td><td><p>The pattern object.</p></td> </tr><tr><td>invert</td><td><code>boolean</code></td><td><code>true</code></td><td><p>Whether to invert the pattern.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..objectToMap" class="group"></a> <h2 class="relative group"><a id="tokenizersobjecttomapobj--code-map--string-any--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersobjecttomapobj--code-map--string-any--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~objectToMap(obj) ⇒ <code> Map. < string, any > </code></span></h2> <p data-svelte-h="svelte-y4nvw8">Helper function to convert an Object to a Map</p> <p data-svelte-h="svelte-1oim1d9"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Map.<string, any></code> - The map.</p> <table data-svelte-h="svelte-1ha9dpj"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>obj</td><td><code>Object</code></td><td><p>The object to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..prepareTensorForDecode" class="group"></a> <h2 class="relative group"><a id="tokenizerspreparetensorfordecodetensor--code-array--number--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspreparetensorfordecodetensor--code-array--number--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~prepareTensorForDecode(tensor) ⇒ <code> Array. < number > </code></span></h2> <p data-svelte-h="svelte-1sig5im">Helper function to convert a tensor to a list before decoding.</p> <p data-svelte-h="svelte-1qz3zie"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Array.<number></code> - The tensor as a list.</p> <table data-svelte-h="svelte-1kahhga"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tensor</td><td><code><a href="#Tensor">Tensor</a></code></td><td><p>The tensor to convert.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..clean_up_tokenization" class="group"></a> <h2 class="relative group"><a id="tokenizerscleanuptokenizationtext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerscleanuptokenizationtext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~clean_up_tokenization(text) ⇒ <code> string </code></span></h2> <p data-svelte-h="svelte-1n3aqy7">Clean up a list of simple English tokenization artifacts like spaces before punctuations and abbreviated forms</p> <p data-svelte-h="svelte-157j3gz"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>string</code> - The cleaned up text.</p> <table data-svelte-h="svelte-my2gd4"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to clean up.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..remove_accents" class="group"></a> <h2 class="relative group"><a id="tokenizersremoveaccentstext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersremoveaccentstext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~remove_accents(text) ⇒ <code> string </code></span></h2> <p data-svelte-h="svelte-b1xq8m">Helper function to remove accents from a string.</p> <p data-svelte-h="svelte-10igiq5"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>string</code> - The text with accents removed.</p> <table data-svelte-h="svelte-96bvb"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to remove accents from.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..lowercase_and_remove_accent" class="group"></a> <h2 class="relative group"><a id="tokenizerslowercaseandremoveaccenttext--code-string-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerslowercaseandremoveaccenttext--code-string-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~lowercase_and_remove_accent(text) ⇒ <code> string </code></span></h2> <p data-svelte-h="svelte-1kajtfy">Helper function to lowercase a string and remove accents.</p> <p data-svelte-h="svelte-15y2bq4"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>string</code> - The lowercased text with accents removed.</p> <table data-svelte-h="svelte-usftcj"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to lowercase and remove accents from.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..whitespace_split" class="group"></a> <h2 class="relative group"><a id="tokenizerswhitespacesplittext--code-array--string--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerswhitespacesplittext--code-array--string--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~whitespace_split(text) ⇒ <code> Array. < string > </code></span></h2> <p data-svelte-h="svelte-1x0opp4">Split a string on whitespace.</p> <p data-svelte-h="svelte-j4end5"><strong>Kind</strong>: inner method of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Returns</strong>: <code>Array.<string></code> - The split string.</p> <table data-svelte-h="svelte-h36eua"><thead><tr><th>Param</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>text</td><td><code>string</code></td><td><p>The text to split.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..PretrainedTokenizerOptions" class="group"></a> <h2 class="relative group"><a id="tokenizerspretrainedtokenizeroptions--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspretrainedtokenizeroptions--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PretrainedTokenizerOptions : <code> Object </code></span></h2> <p data-svelte-h="svelte-3nuv1e">Additional tokenizer-specific properties.</p> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-1me0ii7"><thead><tr><th>Name</th><th>Type</th><th>Default</th><th>Description</th></tr></thead> <tbody><tr><td>[legacy]</td><td><code>boolean</code></td><td><code>false</code></td><td><p>Whether or not the <code>legacy</code> behavior of the tokenizer should be used.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BPENode" class="group"></a> <h2 class="relative group"><a id="tokenizersbpenode--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbpenode--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BPENode : <code> Object </code></span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-1ou5uv8"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>token</td><td><code>string</code></td><td><p>The token associated with the node</p></td> </tr><tr><td>bias</td><td><code>number</code></td><td><p>A positional bias for the node.</p></td> </tr><tr><td>[score]</td><td><code>number</code></td><td><p>The score of the node.</p></td> </tr><tr><td>[prev]</td><td><code>BPENode</code></td><td><p>The previous node in the linked list.</p></td> </tr><tr><td>[next]</td><td><code>BPENode</code></td><td><p>The next node in the linked list.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..SplitDelimiterBehavior" class="group"></a> <h2 class="relative group"><a id="tokenizerssplitdelimiterbehavior--code--removed--code--code--isolated--code--code--mergedwithprevious--code--code--mergedwithnext--code--code--contiguous--code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerssplitdelimiterbehavior--code--removed--code--code--isolated--code--code--mergedwithprevious--code--code--mergedwithnext--code--code--contiguous--code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~SplitDelimiterBehavior : <code> ’ removed ’ </code> | <code> ’ isolated ’ </code> | <code> ’ mergedWithPrevious ’ </code> | <code> ’ mergedWithNext ’ </code> | <code> ’ contiguous ’ </code></span></h2> <p data-svelte-h="svelte-ec8jqd"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a></p> <hr> <a id="module_tokenizers..PostProcessedOutput" class="group"></a> <h2 class="relative group"><a id="tokenizerspostprocessedoutput--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizerspostprocessedoutput--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~PostProcessedOutput : <code> Object </code></span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-eksz4k"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>tokens</td><td><code>Array.<string></code></td><td><p>List of token produced by the post-processor.</p></td> </tr><tr><td>[token_type_ids]</td><td><code>Array.<number></code></td><td><p>List of token type ids produced by the post-processor.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..EncodingSingle" class="group"></a> <h2 class="relative group"><a id="tokenizersencodingsingle--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersencodingsingle--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~EncodingSingle : <code> Object </code></span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-dv15ku"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>input_ids</td><td><code>Array.<number></code></td><td><p>List of token ids to be fed to a model.</p></td> </tr><tr><td>attention_mask</td><td><code>Array.<number></code></td><td><p>List of token type ids to be fed to a model</p></td> </tr><tr><td>[token_type_ids]</td><td><code>Array.<number></code></td><td><p>List of indices specifying which tokens should be attended to by the model</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..Message" class="group"></a> <h2 class="relative group"><a id="tokenizersmessage--code-object-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersmessage--code-object-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~Message : <code> Object </code></span></h2> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-sjyk18"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>role</td><td><code>string</code></td><td><p>The role of the message (e.g., "user" or "assistant" or "system").</p></td> </tr><tr><td>content</td><td><code>string</code></td><td><p>The content of the message.</p></td></tr></tbody></table> <hr> <a id="module_tokenizers..BatchEncoding" class="group"></a> <h2 class="relative group"><a id="tokenizersbatchencoding--code-array--number--code--code-array--array--number---code--code-tensor-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tokenizersbatchencoding--code-array--number--code--code-array--array--number---code--code-tensor-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>tokenizers~BatchEncoding : <code> Array < number > </code> | <code> Array < Array < number > > </code> | <code> Tensor </code></span></h2> <p data-svelte-h="svelte-1d7gg97">Holds the output of the tokenizer’s call function.</p> <p data-svelte-h="svelte-e78esz"><strong>Kind</strong>: inner typedef of <a href="#module_tokenizers"><code>tokenizers</code></a><br> <strong>Properties</strong></p> <table data-svelte-h="svelte-6ozwz5"><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead> <tbody><tr><td>input_ids</td><td><code>BatchEncodingItem</code></td><td><p>List of token ids to be fed to a model.</p></td> </tr><tr><td>attention_mask</td><td><code>BatchEncodingItem</code></td><td><p>List of indices specifying which tokens should be attended to by the model.</p></td> </tr><tr><td>[token_type_ids]</td><td><code>BatchEncodingItem</code></td><td><p>List of token type ids to be fed to a model.</p></td></tr></tbody></table> <hr> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers.js/blob/main/docs/source/api/tokenizers.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_kuyevp = { | |
| assets: "/docs/transformers.js/pr_1113/en", | |
| base: "/docs/transformers.js/pr_1113/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers.js/pr_1113/en/_app/immutable/entry/start.88a6e140.js"), | |
| import("/docs/transformers.js/pr_1113/en/_app/immutable/entry/app.0003020d.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 14], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 487 kB
- Xet hash:
- 883755823a4afc7d21241afd5101240bcab3358cb987ae3cfcbfb0dfc3448815
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.