| { |
| "version": "1.0", |
| "truncation": null, |
| "padding": null, |
| "added_tokens": [ |
| { |
| "id": 0, |
| "content": "<start>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 1, |
| "content": "<end>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 2, |
| "content": "<pad>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| } |
| ], |
| "normalizer": null, |
| "pre_tokenizer": { |
| "type": "ByteLevel", |
| "add_prefix_space": false, |
| "trim_offsets": true, |
| "use_regex": true |
| }, |
| "post_processor": { |
| "type": "ByteLevel", |
| "add_prefix_space": true, |
| "trim_offsets": false, |
| "use_regex": true |
| }, |
| "decoder": { |
| "type": "ByteLevel", |
| "add_prefix_space": true, |
| "trim_offsets": true, |
| "use_regex": true |
| }, |
| "model": { |
| "type": "BPE", |
| "dropout": null, |
| "unk_token": null, |
| "continuing_subword_prefix": null, |
| "end_of_word_suffix": null, |
| "fuse_unk": false, |
| "byte_fallback": false, |
| "vocab": { |
| "<start>": 0, |
| "<end>": 1, |
| "<pad>": 2, |
| "\"": 3, |
| "'": 4, |
| ",": 5, |
| "-": 6, |
| ".": 7, |
| "<": 8, |
| ">": 9, |
| "A": 10, |
| "B": 11, |
| "C": 12, |
| "D": 13, |
| "G": 14, |
| "H": 15, |
| "I": 16, |
| "M": 17, |
| "N": 18, |
| "O": 19, |
| "S": 20, |
| "a": 21, |
| "b": 22, |
| "c": 23, |
| "d": 24, |
| "e": 25, |
| "f": 26, |
| "g": 27, |
| "h": 28, |
| "i": 29, |
| "j": 30, |
| "k": 31, |
| "l": 32, |
| "m": 33, |
| "n": 34, |
| "o": 35, |
| "p": 36, |
| "r": 37, |
| "s": 38, |
| "t": 39, |
| "u": 40, |
| "w": 41, |
| "x": 42, |
| "y": 43, |
| "²": 44, |
| "Ì": 45, |
| "Ġ": 46, |
| "st": 47, |
| "nd": 48, |
| "ar": 49, |
| "end": 50, |
| "Ġ<": 51, |
| "star": 52, |
| "start": 53, |
| "hl": 54, |
| "̲": 55, |
| "wi": 56, |
| "ii": 57, |
| "Ġg": 58, |
| "aa": 59, |
| "oo": 60, |
| "Ġn": 61, |
| "Ġwi": 62, |
| "Ġ'": 63, |
| "Ġii": 64, |
| "an": 65, |
| "Ġy": 66, |
| "Ġl": 67, |
| "Ii": 68, |
| "ĠIi": 69, |
| "oohl": 70, |
| "ee": 71, |
| "im": 72, |
| "Ġwil": 73, |
| "Ġh": 74, |
| "whl": 75, |
| "Ġhl": 76, |
| "ag": 77, |
| "dii": 78, |
| "nii": 79, |
| "ts": 80, |
| "xwi": 81 |
| }, |
| "merges": [ |
| "s t", |
| "n d", |
| "a r", |
| "e nd", |
| "Ġ <", |
| "st ar", |
| "star t", |
| "h l", |
| "Ì ²", |
| "w i", |
| "i i", |
| "Ġ g", |
| "a a", |
| "o o", |
| "Ġ n", |
| "Ġ wi", |
| "Ġ '", |
| "Ġ ii", |
| "a n", |
| "Ġ y", |
| "Ġ l", |
| "I i", |
| "Ġ Ii", |
| "oo hl", |
| "e e", |
| "i m", |
| "Ġwi l", |
| "Ġ h", |
| "w hl", |
| "Ġ hl", |
| "a g", |
| "d ii", |
| "n ii", |
| "t s", |
| "x wi" |
| ] |
| } |
| } |