| { | |
| "add_bos_token": false, | |
| "add_prefix_space": false, | |
| "added_tokens_decoder": { | |
| "4197": { | |
| "content": "<|endoftext|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4198": { | |
| "content": "<|startoftranscript|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4199": { | |
| "content": "<|en|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4200": { | |
| "content": "<|ru|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4201": { | |
| "content": "<|translate|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4202": { | |
| "content": "<|transcribe|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4203": { | |
| "content": "<|startoflm|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4204": { | |
| "content": "<|startofprev|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4205": { | |
| "content": "<|nocaptions|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "4206": { | |
| "content": "<|notimestamps|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| } | |
| }, | |
| "additional_special_tokens": [ | |
| "<|endoftext|>", | |
| "<|startoftranscript|>", | |
| "<|en|>", | |
| "<|ru|>", | |
| "<|translate|>", | |
| "<|transcribe|>", | |
| "<|startoflm|>", | |
| "<|startofprev|>", | |
| "<|nocaptions|>", | |
| "<|notimestamps|>" | |
| ], | |
| "bos_token": "<|endoftext|>", | |
| "clean_up_tokenization_spaces": true, | |
| "eos_token": "<|endoftext|>", | |
| "errors": "replace", | |
| "model_max_length": 1024, | |
| "pad_token": "<|endoftext|>", | |
| "processor_class": "WhisperProcessor", | |
| "return_attention_mask": false, | |
| "tokenizer_class": "WhisperTokenizer", | |
| "unk_token": "<|endoftext|>" | |
| } |