| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "([bos])", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "([eos])", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "([unk])", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 3, | |
| "content": "([pad])", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 4, | |
| "content": "([mask])", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": null, | |
| "pre_tokenizer": { | |
| "type": "ByteLevel", | |
| "add_prefix_space": true, | |
| "trim_offsets": true, | |
| "use_regex": true | |
| }, | |
| "post_processor": { | |
| "type": "TemplateProcessing", | |
| "single": [ | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "([eos])", | |
| "type_id": 0 | |
| } | |
| } | |
| ], | |
| "pair": [ | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "([eos])", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "B", | |
| "type_id": 1 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "([eos])", | |
| "type_id": 1 | |
| } | |
| } | |
| ], | |
| "special_tokens": { | |
| "([bos])": { | |
| "id": "([bos])", | |
| "ids": [ | |
| 0 | |
| ], | |
| "tokens": [ | |
| "([bos])" | |
| ] | |
| }, | |
| "([eos])": { | |
| "id": "([eos])", | |
| "ids": [ | |
| 1 | |
| ], | |
| "tokens": [ | |
| "([eos])" | |
| ] | |
| } | |
| } | |
| }, | |
| "decoder": { | |
| "type": "ByteLevel", | |
| "add_prefix_space": true, | |
| "trim_offsets": true, | |
| "use_regex": true | |
| }, | |
| "model": { | |
| "type": "BPE", | |
| "dropout": null, | |
| "unk_token": null, | |
| "continuing_subword_prefix": null, | |
| "end_of_word_suffix": null, | |
| "fuse_unk": false, | |
| "byte_fallback": false, | |
| "vocab": { | |
| "([bos])": 0, | |
| "([eos])": 1, | |
| "([unk])": 2, | |
| "([pad])": 3, | |
| "([mask])": 4, | |
| "'": 5, | |
| "a": 6, | |
| "b": 7, | |
| "c": 8, | |
| "d": 9, | |
| "e": 10, | |
| "f": 11, | |
| "g": 12, | |
| "h": 13, | |
| "i": 14, | |
| "j": 15, | |
| "k": 16, | |
| "l": 17, | |
| "m": 18, | |
| "n": 19, | |
| "o": 20, | |
| "p": 21, | |
| "q": 22, | |
| "r": 23, | |
| "s": 24, | |
| "t": 25, | |
| "u": 26, | |
| "v": 27, | |
| "w": 28, | |
| "x": 29, | |
| "y": 30, | |
| "z": 31, | |
| "Ġ": 32, | |
| "Ġt": 33, | |
| "he": 34, | |
| "Ġa": 35, | |
| "Ġthe": 36, | |
| "in": 37, | |
| "Ġs": 38, | |
| "Ġw": 39, | |
| "Ġo": 40, | |
| "re": 41, | |
| "nd": 42, | |
| "Ġb": 43, | |
| "Ġh": 44, | |
| "er": 45, | |
| "Ġm": 46, | |
| "Ġi": 47, | |
| "ou": 48, | |
| "Ġc": 49, | |
| "Ġf": 50, | |
| "at": 51, | |
| "ed": 52, | |
| "Ġand": 53, | |
| "en": 54, | |
| "Ġto": 55, | |
| "Ġof": 56, | |
| "on": 57, | |
| "is": 58, | |
| "Ġd": 59, | |
| "ing": 60, | |
| "Ġth": 61, | |
| "Ġp": 62, | |
| "Ġhe": 63, | |
| "or": 64, | |
| "Ġl": 65, | |
| "es": 66, | |
| "Ġin": 67, | |
| "ll": 68, | |
| "it": 69, | |
| "ar": 70, | |
| "as": 71, | |
| "an": 72, | |
| "Ġn": 73, | |
| "Ġg": 74, | |
| "om": 75, | |
| "Ġbe": 76, | |
| "Ġha": 77, | |
| "Ġe": 78, | |
| "le": 79, | |
| "ot": 80, | |
| "Ġy": 81, | |
| "ut": 82, | |
| "ow": 83, | |
| "ic": 84, | |
| "Ġwh": 85, | |
| "Ġit": 86, | |
| "ld": 87, | |
| "ve": 88, | |
| "Ġthat": 89, | |
| "ly": 90, | |
| "Ġwas": 91, | |
| "id": 92, | |
| "se": 93, | |
| "st": 94, | |
| "Ġon": 95, | |
| "gh": 96, | |
| "ent": 97, | |
| "Ġre": 98, | |
| "Ġyou": 99 | |
| }, | |
| "merges": [ | |
| "Ġ t", | |
| "h e", | |
| "Ġ a", | |
| "Ġt he", | |
| "i n", | |
| "Ġ s", | |
| "Ġ w", | |
| "Ġ o", | |
| "r e", | |
| "n d", | |
| "Ġ b", | |
| "Ġ h", | |
| "e r", | |
| "Ġ m", | |
| "Ġ i", | |
| "o u", | |
| "Ġ c", | |
| "Ġ f", | |
| "a t", | |
| "e d", | |
| "Ġa nd", | |
| "e n", | |
| "Ġt o", | |
| "Ġo f", | |
| "o n", | |
| "i s", | |
| "Ġ d", | |
| "in g", | |
| "Ġt h", | |
| "Ġ p", | |
| "Ġ he", | |
| "o r", | |
| "Ġ l", | |
| "e s", | |
| "Ġ in", | |
| "l l", | |
| "i t", | |
| "a r", | |
| "a s", | |
| "a n", | |
| "Ġ n", | |
| "Ġ g", | |
| "o m", | |
| "Ġb e", | |
| "Ġh a", | |
| "Ġ e", | |
| "l e", | |
| "o t", | |
| "Ġ y", | |
| "u t", | |
| "o w", | |
| "i c", | |
| "Ġw h", | |
| "Ġi t", | |
| "l d", | |
| "v e", | |
| "Ġth at", | |
| "l y", | |
| "Ġw as", | |
| "i d", | |
| "s e", | |
| "s t", | |
| "Ġo n", | |
| "g h", | |
| "en t", | |
| "Ġ re", | |
| "Ġy ou" | |
| ] | |
| } | |
| } |