{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "_", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFD" }, { "type": "StripAccents" }, { "type": "Lowercase" } ] }, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "_": 0, "[PAD]": 1, "[UNK]": 2, "[BOS]": 3, "[EOS]": 4, "a": 5, "b": 6, "c": 7, "d": 8, "e": 9, "f": 10, "g": 11, "h": 12, "i": 13, "j": 14, "k": 15, "l": 16, "m": 17, "n": 18, "o": 19, "p": 20, "q": 21, "r": 22, "s": 23, "t": 24, "u": 25, "v": 26, "w": 27, "x": 28, "y": 29, "z": 30, "0": 31, "1": 32, "2": 33, "3": 34, "4": 35, "5": 36, "6": 37, "7": 38, "8": 39, "9": 40, "10": 41, "11": 42, "12": 43, "13": 44, "14": 45, "15": 46, "16": 47, "17": 48, "18": 49, "19": 50, "20": 51, "21": 52, "22": 53, "23": 54, "24": 55, "25": 56, "26": 57, "27": 58, "28": 59, "29": 60, "30": 61, "31": 62, "32": 63, "33": 64, "34": 65, "35": 66, "36": 67, "37": 68, "38": 69, "39": 70, "40": 71, "41": 72, "42": 73, "43": 74, "44": 75, "45": 76, "46": 77, "47": 78, "48": 79, "49": 80, "50": 81, "51": 82, "52": 83, "53": 84, "54": 85, "55": 86, "56": 87, "57": 88, "58": 89, "59": 90, "60": 91, "61": 92, "62": 93, "63": 94, "64": 95, "65": 96, "66": 97, "67": 98, "68": 99, "69": 100, "70": 101, "71": 102, "72": 103, "73": 104, "74": 105, "75": 106, "76": 107, "77": 108, "78": 109, "79": 110, "80": 111, "81": 112, "82": 113, "83": 114, "84": 115, "85": 116, "86": 117, "87": 118, "88": 119, "89": 120, "90": 121, "91": 122, "92": 123, "93": 124, "94": 125, "95": 126, "96": 127, "97": 128, "98": 129, "99": 130, "100": 131, "101": 132, "102": 133, "103": 134, "104": 135, "105": 136, "106": 137, "107": 138, "108": 139, "109": 140, "110": 141, "111": 142, "112": 143, "113": 144, "114": 145, "115": 146, "116": 147, "117": 148, "118": 149, "119": 150, "120": 151, "121": 152, "122": 153, "123": 154, "124": 155, "125": 156, "126": 157, "127": 158, "128": 159, "129": 160, "130": 161, "131": 162, "132": 163, "133": 164, "134": 165, "135": 166, "136": 167, "137": 168, "138": 169, "139": 170, "140": 171, "141": 172, "142": 173, "143": 174, "144": 175, "145": 176, "146": 177, "147": 178, "148": 179, "149": 180, "150": 181, "|": 182, "?": 183 }, "unk_token": "[UNK]" } }