{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "en", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 6, "content": "es", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 7, "content": "fr", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 8, "content": "zh", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 9, "content": "other", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 10, "content": "xinan", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 11, "content": "ja", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 12, "content": "ko", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 13, "content": "ru", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 14, "content": "mandarin", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 15, "content": "min", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 16, "content": "wu", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 17, "content": "xiang", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 18, "content": "yue", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 19, "content": "north", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 20, "content": "de", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 21, "content": "pt", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 22, "content": "ab", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 23, "content": "af", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 24, "content": "am", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 25, "content": "ar", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 26, "content": "as", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 27, "content": "az", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 28, "content": "ba", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 29, "content": "be", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 30, "content": "bg", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 31, "content": "bn", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 32, "content": "br", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 33, "content": "ca", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 34, "content": "cs", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 35, "content": "cy", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 36, "content": "da", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 37, "content": "el", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 38, "content": "eo", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 39, "content": "et", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 40, "content": "eu", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 41, "content": "fa", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 42, "content": "gl", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 43, "content": "gn", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 44, "content": "ha", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 45, "content": "iw", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 46, "content": "hi", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 47, "content": "ht", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 48, "content": "hu", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 49, "content": "hy", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 50, "content": "ia", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 51, "content": "id", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 52, "content": "is", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 53, "content": "it", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 54, "content": "ka", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 55, "content": "kk", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 56, "content": "lo", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 57, "content": "lt", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 58, "content": "lv", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 59, "content": "mk", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 60, "content": "ml", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 61, "content": "mn", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 62, "content": "mr", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 63, "content": "mt", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 64, "content": "no", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 65, "content": "ne", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 66, "content": "nl", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 67, "content": "nn", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 68, "content": "oc", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 69, "content": "pa", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 70, "content": "pl", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 71, "content": "ps", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 72, "content": "ro", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 73, "content": "sd", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 74, "content": "sk", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 75, "content": "sl", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 76, "content": "sq", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 77, "content": "sr", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 78, "content": "sv", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 79, "content": "sw", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 80, "content": "ta", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 81, "content": "te", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 82, "content": "tg", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 83, "content": "th", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 84, "content": "tk", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 85, "content": "tr", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 86, "content": "tt", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 87, "content": "uk", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 88, "content": "ur", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 89, "content": "uz", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 90, "content": "vi", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 91, "content": "yi", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 92, "content": "yo", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 93, "content": "kn", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 94, "content": "so", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 95, "content": "ceb", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 96, "content": "jw", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 97, "content": "mi", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 98, "content": "hr", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 99, "content": "bs", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 100, "content": "tl", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 101, "content": "ln", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 102, "content": "my", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 103, "content": "fi", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 104, "content": "sn", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 105, "content": "lb", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 106, "content": "gu", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 107, "content": "ms", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 108, "content": "km", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 109, "content": "bo", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 110, "content": "fo", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 111, "content": "gv", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 112, "content": "haw", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 113, "content": "la", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 114, "content": "mg", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 115, "content": "sa", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 116, "content": "sco", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 117, "content": "si", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 118, "content": "su", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 119, "content": "war", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false } ], "normalizer": null, "pre_tokenizer": { "type": "WhitespaceSplit" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "en": 5, "es": 6, "fr": 7, "zh": 8, "other": 9, "xinan": 10, "ja": 11, "ko": 12, "ru": 13, "mandarin": 14, "min": 15, "wu": 16, "xiang": 17, "yue": 18, "north": 19, "de": 20, "pt": 21, "ab": 22, "af": 23, "am": 24, "ar": 25, "as": 26, "az": 27, "ba": 28, "be": 29, "bg": 30, "bn": 31, "br": 32, "ca": 33, "cs": 34, "cy": 35, "da": 36, "el": 37, "eo": 38, "et": 39, "eu": 40, "fa": 41, "gl": 42, "gn": 43, "ha": 44, "iw": 45, "hi": 46, "ht": 47, "hu": 48, "hy": 49, "ia": 50, "id": 51, "is": 52, "it": 53, "ka": 54, "kk": 55, "lo": 56, "lt": 57, "lv": 58, "mk": 59, "ml": 60, "mn": 61, "mr": 62, "mt": 63, "no": 64, "ne": 65, "nl": 66, "nn": 67, "oc": 68, "pa": 69, "pl": 70, "ps": 71, "ro": 72, "sd": 73, "sk": 74, "sl": 75, "sq": 76, "sr": 77, "sv": 78, "sw": 79, "ta": 80, "te": 81, "tg": 82, "th": 83, "tk": 84, "tr": 85, "tt": 86, "uk": 87, "ur": 88, "uz": 89, "vi": 90, "yi": 91, "yo": 92, "kn": 93, "so": 94, "ceb": 95, "jw": 96, "mi": 97, "hr": 98, "bs": 99, "tl": 100, "ln": 101, "my": 102, "fi": 103, "sn": 104, "lb": 105, "gu": 106, "ms": 107, "km": 108, "bo": 109, "fo": 110, "gv": 111, "haw": 112, "la": 113, "mg": 114, "sa": 115, "sco": 116, "si": 117, "su": 118, "war": 119 }, "unk_token": "" } }