File size: 1,768 Bytes
370d26b 73ab749 f7da8bd 370d26b dbc5dc8 54b8211 370d26b f7da8bd 54b8211 f7da8bd 54b8211 370d26b 54b8211 370d26b 562d414 370d26b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | name: whisper_bpe_tokenizer
config_type: preprocessor
truncation_side: right
padding_side: right
stride: 0
pad_to_multiple_of: 0
pad_token_type_id: 0
bos_token: <|startoftranscript|>
eos_token: <|endoftext|>
unk_token: <|endoftext|>
sep_token: <sep>
pad_token: <|endoftext|>
cls_token: <cls>
mask_token: <mask>
additional_special_tokens:
- <|endoftext|>
- <|endoftext|>
- <|startoftranscript|>
- <|en|>
- <|zh|>
- <|de|>
- <|es|>
- <|ru|>
- <|ko|>
- <|fr|>
- <|ja|>
- <|pt|>
- <|tr|>
- <|pl|>
- <|ca|>
- <|nl|>
- <|ar|>
- <|sv|>
- <|it|>
- <|id|>
- <|hi|>
- <|fi|>
- <|vi|>
- <|he|>
- <|uk|>
- <|el|>
- <|ms|>
- <|cs|>
- <|ro|>
- <|da|>
- <|hu|>
- <|ta|>
- <|no|>
- <|th|>
- <|ur|>
- <|hr|>
- <|bg|>
- <|lt|>
- <|la|>
- <|mi|>
- <|ml|>
- <|cy|>
- <|sk|>
- <|te|>
- <|fa|>
- <|lv|>
- <|bn|>
- <|sr|>
- <|az|>
- <|sl|>
- <|kn|>
- <|et|>
- <|mk|>
- <|br|>
- <|eu|>
- <|is|>
- <|hy|>
- <|ne|>
- <|mn|>
- <|bs|>
- <|kk|>
- <|sq|>
- <|sw|>
- <|gl|>
- <|mr|>
- <|pa|>
- <|si|>
- <|km|>
- <|sn|>
- <|yo|>
- <|so|>
- <|af|>
- <|oc|>
- <|ka|>
- <|be|>
- <|tg|>
- <|sd|>
- <|gu|>
- <|am|>
- <|yi|>
- <|lo|>
- <|uz|>
- <|fo|>
- <|ht|>
- <|ps|>
- <|tk|>
- <|nn|>
- <|mt|>
- <|sa|>
- <|lb|>
- <|my|>
- <|bo|>
- <|tl|>
- <|mg|>
- <|as|>
- <|tt|>
- <|haw|>
- <|ln|>
- <|ha|>
- <|ba|>
- <|jw|>
- <|su|>
- <|translate|>
- <|transcribe|>
- <|startoflm|>
- <|startofprev|>
- <|nocaptions|>
- <|notimestamps|>
continuing_subword_prefix: ''
end_of_word_suffix: ''
fuse_unk: false
vocab_size: 50364
min_frequency: 2
limit_alphabet: 1000
initial_alphabet: []
show_progress: true
translate_token: <|translate|>
transcribe_token: <|transcribe|>
notimestamps_token: <|notimestamps|>
add_prefix_space: false
add_bos_token: false
model_max_length: 1024
language: persian
predict_timestamps: false
|