FireRedLID-vllm / tokenizer.json
PatchyTisa's picture
Add files using upload-large-folder tool
1d67dac verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<blank>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<sos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "<eos>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 5,
"content": "en",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 6,
"content": "es",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 7,
"content": "fr",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 8,
"content": "zh",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 9,
"content": "other",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 10,
"content": "xinan",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 11,
"content": "ja",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 12,
"content": "ko",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 13,
"content": "ru",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 14,
"content": "mandarin",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 15,
"content": "min",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 16,
"content": "wu",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 17,
"content": "xiang",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 18,
"content": "yue",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 19,
"content": "north",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 20,
"content": "de",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 21,
"content": "pt",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 22,
"content": "ab",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 23,
"content": "af",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 24,
"content": "am",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 25,
"content": "ar",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 26,
"content": "as",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 27,
"content": "az",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 28,
"content": "ba",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 29,
"content": "be",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 30,
"content": "bg",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 31,
"content": "bn",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 32,
"content": "br",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 33,
"content": "ca",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 34,
"content": "cs",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 35,
"content": "cy",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 36,
"content": "da",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 37,
"content": "el",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 38,
"content": "eo",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 39,
"content": "et",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 40,
"content": "eu",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 41,
"content": "fa",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 42,
"content": "gl",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 43,
"content": "gn",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 44,
"content": "ha",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 45,
"content": "iw",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 46,
"content": "hi",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 47,
"content": "ht",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 48,
"content": "hu",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 49,
"content": "hy",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 50,
"content": "ia",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 51,
"content": "id",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 52,
"content": "is",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 53,
"content": "it",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 54,
"content": "ka",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 55,
"content": "kk",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 56,
"content": "lo",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 57,
"content": "lt",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 58,
"content": "lv",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 59,
"content": "mk",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 60,
"content": "ml",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 61,
"content": "mn",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 62,
"content": "mr",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 63,
"content": "mt",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 64,
"content": "no",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 65,
"content": "ne",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 66,
"content": "nl",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 67,
"content": "nn",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 68,
"content": "oc",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 69,
"content": "pa",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 70,
"content": "pl",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 71,
"content": "ps",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 72,
"content": "ro",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 73,
"content": "sd",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 74,
"content": "sk",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 75,
"content": "sl",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 76,
"content": "sq",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 77,
"content": "sr",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 78,
"content": "sv",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 79,
"content": "sw",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 80,
"content": "ta",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 81,
"content": "te",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 82,
"content": "tg",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 83,
"content": "th",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 84,
"content": "tk",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 85,
"content": "tr",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 86,
"content": "tt",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 87,
"content": "uk",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 88,
"content": "ur",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 89,
"content": "uz",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 90,
"content": "vi",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 91,
"content": "yi",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 92,
"content": "yo",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 93,
"content": "kn",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 94,
"content": "so",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 95,
"content": "ceb",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 96,
"content": "jw",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 97,
"content": "mi",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 98,
"content": "hr",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 99,
"content": "bs",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 100,
"content": "tl",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 101,
"content": "ln",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 102,
"content": "my",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 103,
"content": "fi",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 104,
"content": "sn",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 105,
"content": "lb",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 106,
"content": "gu",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 107,
"content": "ms",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 108,
"content": "km",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 109,
"content": "bo",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 110,
"content": "fo",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 111,
"content": "gv",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 112,
"content": "haw",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 113,
"content": "la",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 114,
"content": "mg",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 115,
"content": "sa",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 116,
"content": "sco",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 117,
"content": "si",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 118,
"content": "su",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
},
{
"id": 119,
"content": "war",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": false
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "WhitespaceSplit"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"<blank>": 0,
"<unk>": 1,
"<pad>": 2,
"<sos>": 3,
"<eos>": 4,
"en": 5,
"es": 6,
"fr": 7,
"zh": 8,
"other": 9,
"xinan": 10,
"ja": 11,
"ko": 12,
"ru": 13,
"mandarin": 14,
"min": 15,
"wu": 16,
"xiang": 17,
"yue": 18,
"north": 19,
"de": 20,
"pt": 21,
"ab": 22,
"af": 23,
"am": 24,
"ar": 25,
"as": 26,
"az": 27,
"ba": 28,
"be": 29,
"bg": 30,
"bn": 31,
"br": 32,
"ca": 33,
"cs": 34,
"cy": 35,
"da": 36,
"el": 37,
"eo": 38,
"et": 39,
"eu": 40,
"fa": 41,
"gl": 42,
"gn": 43,
"ha": 44,
"iw": 45,
"hi": 46,
"ht": 47,
"hu": 48,
"hy": 49,
"ia": 50,
"id": 51,
"is": 52,
"it": 53,
"ka": 54,
"kk": 55,
"lo": 56,
"lt": 57,
"lv": 58,
"mk": 59,
"ml": 60,
"mn": 61,
"mr": 62,
"mt": 63,
"no": 64,
"ne": 65,
"nl": 66,
"nn": 67,
"oc": 68,
"pa": 69,
"pl": 70,
"ps": 71,
"ro": 72,
"sd": 73,
"sk": 74,
"sl": 75,
"sq": 76,
"sr": 77,
"sv": 78,
"sw": 79,
"ta": 80,
"te": 81,
"tg": 82,
"th": 83,
"tk": 84,
"tr": 85,
"tt": 86,
"uk": 87,
"ur": 88,
"uz": 89,
"vi": 90,
"yi": 91,
"yo": 92,
"kn": 93,
"so": 94,
"ceb": 95,
"jw": 96,
"mi": 97,
"hr": 98,
"bs": 99,
"tl": 100,
"ln": 101,
"my": 102,
"fi": 103,
"sn": 104,
"lb": 105,
"gu": 106,
"ms": 107,
"km": 108,
"bo": 109,
"fo": 110,
"gv": 111,
"haw": 112,
"la": 113,
"mg": 114,
"sa": 115,
"sco": 116,
"si": 117,
"su": 118,
"war": 119
},
"unk_token": "<unk>"
}
}