Add tokenizer files
4652187 verified | | { |
| | "version": "1.0", |
| | "truncation": null, |
| | "padding": null, |
| | "added_tokens": [ |
| | { |
| | "id": 0, |
| | "content": "_", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | }, |
| | { |
| | "id": 1, |
| | "content": "[PAD]", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | }, |
| | { |
| | "id": 2, |
| | "content": "[UNK]", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | }, |
| | { |
| | "id": 3, |
| | "content": "[BOS]", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | }, |
| | { |
| | "id": 4, |
| | "content": "[EOS]", |
| | "single_word": false, |
| | "lstrip": false, |
| | "rstrip": false, |
| | "normalized": false, |
| | "special": true |
| | } |
| | ], |
| | "normalizer": { |
| | "type": "Sequence", |
| | "normalizers": [ |
| | { |
| | "type": "NFD" |
| | }, |
| | { |
| | "type": "StripAccents" |
| | }, |
| | { |
| | "type": "Lowercase" |
| | } |
| | ] |
| | }, |
| | "pre_tokenizer": { |
| | "type": "Whitespace" |
| | }, |
| | "post_processor": null, |
| | "decoder": null, |
| | "model": { |
| | "type": "WordLevel", |
| | "vocab": { |
| | "_": 0, |
| | "[PAD]": 1, |
| | "[UNK]": 2, |
| | "[BOS]": 3, |
| | "[EOS]": 4, |
| | "a": 5, |
| | "b": 6, |
| | "c": 7, |
| | "d": 8, |
| | "e": 9, |
| | "f": 10, |
| | "g": 11, |
| | "h": 12, |
| | "i": 13, |
| | "j": 14, |
| | "k": 15, |
| | "l": 16, |
| | "m": 17, |
| | "n": 18, |
| | "o": 19, |
| | "p": 20, |
| | "q": 21, |
| | "r": 22, |
| | "s": 23, |
| | "t": 24, |
| | "u": 25, |
| | "v": 26, |
| | "w": 27, |
| | "x": 28, |
| | "y": 29, |
| | "z": 30, |
| | "0": 31, |
| | "1": 32, |
| | "2": 33, |
| | "3": 34, |
| | "4": 35, |
| | "5": 36, |
| | "6": 37, |
| | "7": 38, |
| | "8": 39, |
| | "9": 40, |
| | "10": 41, |
| | "11": 42, |
| | "12": 43, |
| | "13": 44, |
| | "14": 45, |
| | "15": 46, |
| | "16": 47, |
| | "17": 48, |
| | "18": 49, |
| | "19": 50, |
| | "20": 51, |
| | "21": 52, |
| | "22": 53, |
| | "23": 54, |
| | "24": 55, |
| | "25": 56, |
| | "26": 57, |
| | "27": 58, |
| | "28": 59, |
| | "29": 60, |
| | "30": 61, |
| | "31": 62, |
| | "32": 63, |
| | "33": 64, |
| | "34": 65, |
| | "35": 66, |
| | "36": 67, |
| | "37": 68, |
| | "38": 69, |
| | "39": 70, |
| | "40": 71, |
| | "41": 72, |
| | "42": 73, |
| | "43": 74, |
| | "44": 75, |
| | "45": 76, |
| | "46": 77, |
| | "47": 78, |
| | "48": 79, |
| | "49": 80, |
| | "50": 81, |
| | "51": 82, |
| | "52": 83, |
| | "53": 84, |
| | "54": 85, |
| | "55": 86, |
| | "56": 87, |
| | "57": 88, |
| | "58": 89, |
| | "59": 90, |
| | "60": 91, |
| | "61": 92, |
| | "62": 93, |
| | "63": 94, |
| | "64": 95, |
| | "65": 96, |
| | "66": 97, |
| | "67": 98, |
| | "68": 99, |
| | "69": 100, |
| | "70": 101, |
| | "71": 102, |
| | "72": 103, |
| | "73": 104, |
| | "74": 105, |
| | "75": 106, |
| | "|": 107, |
| | "?": 108 |
| | }, |
| | "unk_token": "[UNK]" |
| | } |
| | } |