{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "[PAD]": 0, "[BOS]": 1, "[EOS]": 2, "!": 3, "'": 4, ",": 5, "?": 6, "H": 7, "I": 8, "a": 9, "d": 10, "e": 11, "g": 12, "h": 13, "i": 14, "k": 15, "l": 16, "m": 17, "n": 18, "o": 19, "r": 20, "s": 21, "t": 22, "u": 23, "w": 24, "y": 25, "re": 26, "He": 27, "an": 28, "at": 29, "are": 30, "do": 31, "gre": 32, "ho": 33, "han": 34, "in": 35, "ks": 36, "ll": 37, "ou": 38, "than": 39, "you": 40, "Hell": 41, "doin": 42, "great": 43, "how": 44, "thanks": 45, "Hello": 46, "doing": 47 }, "merges": [ "r e", "H e", "a n", "a t", "a re", "d o", "g re", "h o", "h an", "i n", "k s", "l l", "o u", "t han", "y ou", "He ll", "do in", "gre at", "ho w", "than ks", "Hell o", "doin g" ] } }