| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "<blank>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "<unk>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "<pad>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 3, | |
| "content": "<sos>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 4, | |
| "content": "<eos>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 5, | |
| "content": "en", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 6, | |
| "content": "es", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 7, | |
| "content": "fr", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 8, | |
| "content": "zh", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 9, | |
| "content": "other", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 10, | |
| "content": "xinan", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 11, | |
| "content": "ja", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 12, | |
| "content": "ko", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 13, | |
| "content": "ru", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 14, | |
| "content": "mandarin", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 15, | |
| "content": "min", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 16, | |
| "content": "wu", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 17, | |
| "content": "xiang", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 18, | |
| "content": "yue", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 19, | |
| "content": "north", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 20, | |
| "content": "de", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 21, | |
| "content": "pt", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 22, | |
| "content": "ab", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 23, | |
| "content": "af", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 24, | |
| "content": "am", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 25, | |
| "content": "ar", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 26, | |
| "content": "as", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 27, | |
| "content": "az", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 28, | |
| "content": "ba", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 29, | |
| "content": "be", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 30, | |
| "content": "bg", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 31, | |
| "content": "bn", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 32, | |
| "content": "br", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 33, | |
| "content": "ca", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 34, | |
| "content": "cs", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 35, | |
| "content": "cy", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 36, | |
| "content": "da", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 37, | |
| "content": "el", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 38, | |
| "content": "eo", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 39, | |
| "content": "et", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 40, | |
| "content": "eu", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 41, | |
| "content": "fa", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 42, | |
| "content": "gl", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 43, | |
| "content": "gn", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 44, | |
| "content": "ha", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 45, | |
| "content": "iw", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 46, | |
| "content": "hi", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 47, | |
| "content": "ht", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 48, | |
| "content": "hu", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 49, | |
| "content": "hy", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 50, | |
| "content": "ia", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 51, | |
| "content": "id", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 52, | |
| "content": "is", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 53, | |
| "content": "it", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 54, | |
| "content": "ka", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 55, | |
| "content": "kk", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 56, | |
| "content": "lo", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 57, | |
| "content": "lt", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 58, | |
| "content": "lv", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 59, | |
| "content": "mk", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 60, | |
| "content": "ml", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 61, | |
| "content": "mn", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 62, | |
| "content": "mr", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 63, | |
| "content": "mt", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 64, | |
| "content": "no", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 65, | |
| "content": "ne", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 66, | |
| "content": "nl", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 67, | |
| "content": "nn", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 68, | |
| "content": "oc", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 69, | |
| "content": "pa", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 70, | |
| "content": "pl", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 71, | |
| "content": "ps", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 72, | |
| "content": "ro", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 73, | |
| "content": "sd", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 74, | |
| "content": "sk", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 75, | |
| "content": "sl", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 76, | |
| "content": "sq", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 77, | |
| "content": "sr", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 78, | |
| "content": "sv", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 79, | |
| "content": "sw", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 80, | |
| "content": "ta", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 81, | |
| "content": "te", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 82, | |
| "content": "tg", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 83, | |
| "content": "th", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 84, | |
| "content": "tk", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 85, | |
| "content": "tr", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 86, | |
| "content": "tt", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 87, | |
| "content": "uk", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 88, | |
| "content": "ur", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 89, | |
| "content": "uz", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 90, | |
| "content": "vi", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 91, | |
| "content": "yi", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 92, | |
| "content": "yo", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 93, | |
| "content": "kn", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 94, | |
| "content": "so", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 95, | |
| "content": "ceb", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 96, | |
| "content": "jw", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 97, | |
| "content": "mi", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 98, | |
| "content": "hr", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 99, | |
| "content": "bs", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 100, | |
| "content": "tl", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 101, | |
| "content": "ln", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 102, | |
| "content": "my", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 103, | |
| "content": "fi", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 104, | |
| "content": "sn", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 105, | |
| "content": "lb", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 106, | |
| "content": "gu", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 107, | |
| "content": "ms", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 108, | |
| "content": "km", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 109, | |
| "content": "bo", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 110, | |
| "content": "fo", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 111, | |
| "content": "gv", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 112, | |
| "content": "haw", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 113, | |
| "content": "la", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 114, | |
| "content": "mg", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 115, | |
| "content": "sa", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 116, | |
| "content": "sco", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 117, | |
| "content": "si", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 118, | |
| "content": "su", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| }, | |
| { | |
| "id": 119, | |
| "content": "war", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": false | |
| } | |
| ], | |
| "normalizer": null, | |
| "pre_tokenizer": { | |
| "type": "WhitespaceSplit" | |
| }, | |
| "post_processor": null, | |
| "decoder": null, | |
| "model": { | |
| "type": "WordLevel", | |
| "vocab": { | |
| "<blank>": 0, | |
| "<unk>": 1, | |
| "<pad>": 2, | |
| "<sos>": 3, | |
| "<eos>": 4, | |
| "en": 5, | |
| "es": 6, | |
| "fr": 7, | |
| "zh": 8, | |
| "other": 9, | |
| "xinan": 10, | |
| "ja": 11, | |
| "ko": 12, | |
| "ru": 13, | |
| "mandarin": 14, | |
| "min": 15, | |
| "wu": 16, | |
| "xiang": 17, | |
| "yue": 18, | |
| "north": 19, | |
| "de": 20, | |
| "pt": 21, | |
| "ab": 22, | |
| "af": 23, | |
| "am": 24, | |
| "ar": 25, | |
| "as": 26, | |
| "az": 27, | |
| "ba": 28, | |
| "be": 29, | |
| "bg": 30, | |
| "bn": 31, | |
| "br": 32, | |
| "ca": 33, | |
| "cs": 34, | |
| "cy": 35, | |
| "da": 36, | |
| "el": 37, | |
| "eo": 38, | |
| "et": 39, | |
| "eu": 40, | |
| "fa": 41, | |
| "gl": 42, | |
| "gn": 43, | |
| "ha": 44, | |
| "iw": 45, | |
| "hi": 46, | |
| "ht": 47, | |
| "hu": 48, | |
| "hy": 49, | |
| "ia": 50, | |
| "id": 51, | |
| "is": 52, | |
| "it": 53, | |
| "ka": 54, | |
| "kk": 55, | |
| "lo": 56, | |
| "lt": 57, | |
| "lv": 58, | |
| "mk": 59, | |
| "ml": 60, | |
| "mn": 61, | |
| "mr": 62, | |
| "mt": 63, | |
| "no": 64, | |
| "ne": 65, | |
| "nl": 66, | |
| "nn": 67, | |
| "oc": 68, | |
| "pa": 69, | |
| "pl": 70, | |
| "ps": 71, | |
| "ro": 72, | |
| "sd": 73, | |
| "sk": 74, | |
| "sl": 75, | |
| "sq": 76, | |
| "sr": 77, | |
| "sv": 78, | |
| "sw": 79, | |
| "ta": 80, | |
| "te": 81, | |
| "tg": 82, | |
| "th": 83, | |
| "tk": 84, | |
| "tr": 85, | |
| "tt": 86, | |
| "uk": 87, | |
| "ur": 88, | |
| "uz": 89, | |
| "vi": 90, | |
| "yi": 91, | |
| "yo": 92, | |
| "kn": 93, | |
| "so": 94, | |
| "ceb": 95, | |
| "jw": 96, | |
| "mi": 97, | |
| "hr": 98, | |
| "bs": 99, | |
| "tl": 100, | |
| "ln": 101, | |
| "my": 102, | |
| "fi": 103, | |
| "sn": 104, | |
| "lb": 105, | |
| "gu": 106, | |
| "ms": 107, | |
| "km": 108, | |
| "bo": 109, | |
| "fo": 110, | |
| "gv": 111, | |
| "haw": 112, | |
| "la": 113, | |
| "mg": 114, | |
| "sa": 115, | |
| "sco": 116, | |
| "si": 117, | |
| "su": 118, | |
| "war": 119 | |
| }, | |
| "unk_token": "<unk>" | |
| } | |
| } |