| name: whisper_bpe_tokenizer | |
| config_type: preprocessor | |
| truncation_side: right | |
| padding_side: right | |
| stride: 0 | |
| pad_to_multiple_of: 0 | |
| pad_token_type_id: 0 | |
| bos_token: <|startoftranscript|> | |
| eos_token: <|endoftext|> | |
| unk_token: <|endoftext|> | |
| sep_token: <sep> | |
| pad_token: <|endoftext|> | |
| cls_token: <cls> | |
| mask_token: <mask> | |
| additional_special_tokens: | |
| - <|endoftext|> | |
| - <|endoftext|> | |
| - <|startoftranscript|> | |
| - <|en|> | |
| - <|zh|> | |
| - <|de|> | |
| - <|es|> | |
| - <|ru|> | |
| - <|ko|> | |
| - <|fr|> | |
| - <|ja|> | |
| - <|pt|> | |
| - <|tr|> | |
| - <|pl|> | |
| - <|ca|> | |
| - <|nl|> | |
| - <|ar|> | |
| - <|sv|> | |
| - <|it|> | |
| - <|id|> | |
| - <|hi|> | |
| - <|fi|> | |
| - <|vi|> | |
| - <|he|> | |
| - <|uk|> | |
| - <|el|> | |
| - <|ms|> | |
| - <|cs|> | |
| - <|ro|> | |
| - <|da|> | |
| - <|hu|> | |
| - <|ta|> | |
| - <|no|> | |
| - <|th|> | |
| - <|ur|> | |
| - <|hr|> | |
| - <|bg|> | |
| - <|lt|> | |
| - <|la|> | |
| - <|mi|> | |
| - <|ml|> | |
| - <|cy|> | |
| - <|sk|> | |
| - <|te|> | |
| - <|fa|> | |
| - <|lv|> | |
| - <|bn|> | |
| - <|sr|> | |
| - <|az|> | |
| - <|sl|> | |
| - <|kn|> | |
| - <|et|> | |
| - <|mk|> | |
| - <|br|> | |
| - <|eu|> | |
| - <|is|> | |
| - <|hy|> | |
| - <|ne|> | |
| - <|mn|> | |
| - <|bs|> | |
| - <|kk|> | |
| - <|sq|> | |
| - <|sw|> | |
| - <|gl|> | |
| - <|mr|> | |
| - <|pa|> | |
| - <|si|> | |
| - <|km|> | |
| - <|sn|> | |
| - <|yo|> | |
| - <|so|> | |
| - <|af|> | |
| - <|oc|> | |
| - <|ka|> | |
| - <|be|> | |
| - <|tg|> | |
| - <|sd|> | |
| - <|gu|> | |
| - <|am|> | |
| - <|yi|> | |
| - <|lo|> | |
| - <|uz|> | |
| - <|fo|> | |
| - <|ht|> | |
| - <|ps|> | |
| - <|tk|> | |
| - <|nn|> | |
| - <|mt|> | |
| - <|sa|> | |
| - <|lb|> | |
| - <|my|> | |
| - <|bo|> | |
| - <|tl|> | |
| - <|mg|> | |
| - <|as|> | |
| - <|tt|> | |
| - <|haw|> | |
| - <|ln|> | |
| - <|ha|> | |
| - <|ba|> | |
| - <|jw|> | |
| - <|su|> | |
| - <|translate|> | |
| - <|transcribe|> | |
| - <|startoflm|> | |
| - <|startofprev|> | |
| - <|nocaptions|> | |
| - <|notimestamps|> | |
| continuing_subword_prefix: '' | |
| end_of_word_suffix: '' | |
| fuse_unk: false | |
| vocab_size: 50364 | |
| min_frequency: 2 | |
| limit_alphabet: 1000 | |
| initial_alphabet: [] | |
| show_progress: true | |
| translate_token: <|translate|> | |
| transcribe_token: <|transcribe|> | |
| notimestamps_token: <|notimestamps|> | |
| add_prefix_space: false | |
| add_bos_token: false | |
| model_max_length: 1024 | |
| language: persian | |
| predict_timestamps: false | |