| name: bpe_tokenizer | |
| config_type: preprocessor | |
| truncation_side: right | |
| padding_side: right | |
| stride: 0 | |
| pad_to_multiple_of: 0 | |
| pad_token_type_id: 0 | |
| bos_token: <s> | |
| eos_token: </s> | |
| unk_token: <unk> | |
| sep_token: <sep> | |
| pad_token: <pad> | |
| cls_token: <cls> | |
| mask_token: <mask> | |
| continuing_subword_prefix: '' | |
| end_of_word_suffix: '' | |
| fuse_unk: false | |
| vocab_size: 42000 | |
| min_frequency: 2 | |
| limit_alphabet: 1000 | |
| initial_alphabet: [] | |
| show_progress: true | |