| { |
| "added_tokens_decoder": { |
| "15": { |
| "content": "'y", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "17": { |
| "content": "kp", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "18": { |
| "content": "gb", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "29": { |
| "content": "ny", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "33": { |
| "content": "'d", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "54": { |
| "content": "'b", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "64": { |
| "content": "...", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "65": { |
| "content": "[UNK]", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "66": { |
| "content": "[PAD]", |
| "lstrip": true, |
| "normalized": false, |
| "rstrip": true, |
| "single_word": false, |
| "special": false |
| }, |
| "67": { |
| "content": "<s>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| }, |
| "68": { |
| "content": "</s>", |
| "lstrip": false, |
| "normalized": false, |
| "rstrip": false, |
| "single_word": false, |
| "special": true |
| } |
| }, |
| "bos_token": "<s>", |
| "clean_up_tokenization_spaces": false, |
| "do_lower_case": false, |
| "eos_token": "</s>", |
| "extra_special_tokens": {}, |
| "model_max_length": 1000000000000000019884624838656, |
| "pad_token": "[PAD]", |
| "replace_word_delimiter_char": " ", |
| "target_lang": "ukv", |
| "tokenizer_class": "Wav2Vec2CTCTokenizer", |
| "unk_token": "[UNK]", |
| "word_delimiter_token": "|" |
| } |
|
|