Add tokenizer files
681f54a verified | | { |
| | "version": "1.0", |
| | "truncation": null, |
| | "padding": null, |
| | "added_tokens": [ |
| | { |
| | "id": 0, |
| | "content": "_", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | }, |
| | { |
| | "id": 1, |
| | "content": "[PAD]", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | }, |
| | { |
| | "id": 2, |
| | "content": "[UNK]", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | }, |
| | { |
| | "id": 3, |
| | "content": "[BOS]", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | }, |
| | { |
| | "id": 4, |
| | "content": "[EOS]", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | } |
| | ], |
| | "normalizer": { |
| | "type": "Sequence", |
| | "normalizers": [ |
| | { |
| | "type": "NFD" |
| | }, |
| | { |
| | "type": "StripAccents" |
| | }, |
| | { |
| | "type": "Lowercase" |
| | } |
| | ] |
| | }, |
| | "pre_tokenizer": { |
| | "type": "Whitespace" |
| | }, |
| | "post_processor": null, |
| | "decoder": null, |
| | "model": { |
| | "type": "WordLevel", |
| | "vocab": { |
| | "_": 0, |
| | "[PAD]": 1, |
| | "[UNK]": 2, |
| | "[BOS]": 3, |
| | "[EOS]": 4, |
| | "a": 5, |
| | "b": 6, |
| | "c": 7, |
| | "d": 8, |
| | "e": 9, |
| | "f": 10, |
| | "g": 11, |
| | "h": 12, |
| | "i": 13, |
| | "j": 14, |
| | "k": 15, |
| | "l": 16, |
| | "m": 17, |
| | "n": 18, |
| | "o": 19, |
| | "p": 20, |
| | "q": 21, |
| | "r": 22, |
| | "s": 23, |
| | "t": 24, |
| | "u": 25, |
| | "v": 26, |
| | "w": 27, |
| | "x": 28, |
| | "y": 29, |
| | "z": 30, |
| | "0": 31, |
| | "1": 32, |
| | "2": 33, |
| | "3": 34, |
| | "4": 35, |
| | "5": 36, |
| | "6": 37, |
| | "7": 38, |
| | "8": 39, |
| | "9": 40, |
| | "10": 41, |
| | "11": 42, |
| | "12": 43, |
| | "13": 44, |
| | "14": 45, |
| | "15": 46, |
| | "16": 47, |
| | "17": 48, |
| | "18": 49, |
| | "19": 50, |
| | "20": 51, |
| | "21": 52, |
| | "22": 53, |
| | "23": 54, |
| | "24": 55, |
| | "25": 56, |
| | "26": 57, |
| | "27": 58, |
| | "28": 59, |
| | "29": 60, |
| | "30": 61, |
| | "31": 62, |
| | "32": 63, |
| | "33": 64, |
| | "34": 65, |
| | "35": 66, |
| | "36": 67, |
| | "37": 68, |
| | "38": 69, |
| | "39": 70, |
| | "40": 71, |
| | "41": 72, |
| | "42": 73, |
| | "43": 74, |
| | "44": 75, |
| | "45": 76, |
| | "46": 77, |
| | "47": 78, |
| | "48": 79, |
| | "49": 80, |
| | "50": 81, |
| | "51": 82, |
| | "52": 83, |
| | "53": 84, |
| | "54": 85, |
| | "55": 86, |
| | "56": 87, |
| | "57": 88, |
| | "58": 89, |
| | "59": 90, |
| | "60": 91, |
| | "61": 92, |
| | "62": 93, |
| | "63": 94, |
| | "64": 95, |
| | "65": 96, |
| | "66": 97, |
| | "67": 98, |
| | "68": 99, |
| | "69": 100, |
| | "70": 101, |
| | "71": 102, |
| | "72": 103, |
| | "73": 104, |
| | "74": 105, |
| | "75": 106, |
| | "76": 107, |
| | "77": 108, |
| | "78": 109, |
| | "79": 110, |
| | "80": 111, |
| | "81": 112, |
| | "82": 113, |
| | "83": 114, |
| | "84": 115, |
| | "85": 116, |
| | "86": 117, |
| | "87": 118, |
| | "88": 119, |
| | "89": 120, |
| | "90": 121, |
| | "91": 122, |
| | "92": 123, |
| | "93": 124, |
| | "94": 125, |
| | "95": 126, |
| | "96": 127, |
| | "97": 128, |
| | "98": 129, |
| | "99": 130, |
| | "100": 131, |
| | "101": 132, |
| | "102": 133, |
| | "103": 134, |
| | "104": 135, |
| | "105": 136, |
| | "106": 137, |
| | "107": 138, |
| | "108": 139, |
| | "109": 140, |
| | "110": 141, |
| | "111": 142, |
| | "112": 143, |
| | "113": 144, |
| | "114": 145, |
| | "115": 146, |
| | "116": 147, |
| | "117": 148, |
| | "118": 149, |
| | "119": 150, |
| | "120": 151, |
| | "121": 152, |
| | "122": 153, |
| | "123": 154, |
| | "124": 155, |
| | "125": 156, |
| | "126": 157, |
| | "127": 158, |
| | "128": 159, |
| | "129": 160, |
| | "130": 161, |
| | "131": 162, |
| | "132": 163, |
| | "133": 164, |
| | "134": 165, |
| | "135": 166, |
| | "136": 167, |
| | "137": 168, |
| | "138": 169, |
| | "139": 170, |
| | "140": 171, |
| | "141": 172, |
| | "142": 173, |
| | "143": 174, |
| | "144": 175, |
| | "145": 176, |
| | "146": 177, |
| | "147": 178, |
| | "148": 179, |
| | "149": 180, |
| | "150": 181, |
| | "|": 182, |
| | "?": 183 |
| | }, |
| | "unk_token": "[UNK]" |
| | } |
| | } |