| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "<pad>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "<|start|>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "<|end|>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 3, | |
| "content": "<|return|>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 4, | |
| "content": "<|call|>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 5, | |
| "content": "<|message|>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 6, | |
| "content": "<|channel|>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": { | |
| "type": "Sequence", | |
| "normalizers": [ | |
| { | |
| "type": "Prepend", | |
| "prepend": "▁" | |
| }, | |
| { | |
| "type": "Replace", | |
| "pattern": { | |
| "String": " " | |
| }, | |
| "content": "▁" | |
| } | |
| ] | |
| }, | |
| "pre_tokenizer": { | |
| "type": "Metaspace", | |
| "replacement": "▁", | |
| "add_prefix_space": true, | |
| "split": true | |
| }, | |
| "post_processor": { | |
| "type": "TemplateProcessing", | |
| "single": [ | |
| { | |
| "SpecialToken": { | |
| "id": "<|start|>", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| } | |
| ], | |
| "pair": [ | |
| { | |
| "SpecialToken": { | |
| "id": "<|start|>", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "B", | |
| "type_id": 1 | |
| } | |
| } | |
| ], | |
| "special_tokens": { | |
| "<|start|>": { | |
| "id": "<|start|>", | |
| "ids": [1], | |
| "tokens": ["<|start|>"] | |
| } | |
| } | |
| }, | |
| "decoder": { | |
| "type": "Metaspace", | |
| "replacement": "▁", | |
| "add_prefix_space": true, | |
| "split": true | |
| }, | |
| "model": { | |
| "type": "BPE", | |
| "dropout": null, | |
| "unk_token": null, | |
| "continuing_subword_prefix": "", | |
| "end_of_word_suffix": "", | |
| "fuse_unk": false, | |
| "byte_fallback": true, | |
| "vocab": { | |
| "<pad>": 0, | |
| "<|start|>": 1, | |
| "<|end|>": 2, | |
| "<|return|>": 3, | |
| "<|call|>": 4, | |
| "<|message|>": 5, | |
| "<|channel|>": 6, | |
| "▁": 7, | |
| "▁the": 8, | |
| "▁a": 9, | |
| "▁to": 10, | |
| "▁of": 11, | |
| "▁and": 12, | |
| "▁in": 13, | |
| "▁is": 14, | |
| "▁for": 15, | |
| "▁that": 16, | |
| "▁on": 17, | |
| "▁with": 18, | |
| "▁as": 19, | |
| "▁it": 20, | |
| "▁at": 21, | |
| "▁by": 22, | |
| "▁from": 23, | |
| "▁or": 24, | |
| "▁an": 25, | |
| "▁this": 26, | |
| "▁be": 27, | |
| "▁are": 28, | |
| "▁was": 29, | |
| "▁not": 30, | |
| "▁you": 31, | |
| "▁have": 32, | |
| "▁can": 33, | |
| "▁will": 34, | |
| "▁we": 35, | |
| "▁but": 36, | |
| "▁all": 37, | |
| "▁they": 38, | |
| "▁has": 39, | |
| "▁one": 40, | |
| "▁more": 41, | |
| "▁would": 42, | |
| "▁if": 43, | |
| "▁there": 44, | |
| "▁their": 45, | |
| "▁which": 46, | |
| "▁about": 47, | |
| "▁when": 48, | |
| "▁than": 49, | |
| "▁these": 50, | |
| "▁some": 51, | |
| "▁time": 52, | |
| "▁into": 53, | |
| "▁just": 54, | |
| "▁its": 55, | |
| "▁do": 56, | |
| "▁out": 57, | |
| "▁them": 58, | |
| "▁up": 59, | |
| "▁may": 60, | |
| "▁what": 61, | |
| "▁been": 62, | |
| "▁like": 63, | |
| "▁other": 64, | |
| "▁so": 65, | |
| "▁how": 66, | |
| "▁who": 67, | |
| "▁two": 68, | |
| "▁my": 69, | |
| "▁use": 70, | |
| "▁get": 71, | |
| "▁she": 72, | |
| "▁also": 73, | |
| "▁because": 74, | |
| "▁then": 75, | |
| "▁now": 76, | |
| "▁first": 77, | |
| "▁only": 78, | |
| "▁make": 79, | |
| "▁know": 80, | |
| "▁people": 81, | |
| "▁said": 82, | |
| "▁where": 83, | |
| "▁very": 84, | |
| "▁over": 85, | |
| "▁such": 86, | |
| "▁see": 87, | |
| "▁him": 88, | |
| "▁way": 89, | |
| "▁many": 90, | |
| "▁most": 91, | |
| "▁could": 92, | |
| "▁should": 93, | |
| "▁after": 94, | |
| "▁well": 95, | |
| "▁your": 96, | |
| "▁through": 97, | |
| "▁back": 98, | |
| "▁any": 99, | |
| "▁our": 100 | |
| }, | |
| "merges": [ | |
| "▁ t", | |
| "h e", | |
| "▁t he", | |
| "▁ a", | |
| "▁ to", | |
| "o f", | |
| "▁ of", | |
| "a n", | |
| "▁a nd", | |
| "i n", | |
| "▁ in", | |
| "i s", | |
| "▁ is", | |
| "▁ for", | |
| "t h", | |
| "▁th at", | |
| "o n", | |
| "▁ on", | |
| "w i", | |
| "▁wi th", | |
| "a s", | |
| "▁ as", | |
| "i t", | |
| "▁ it", | |
| "a t", | |
| "▁ at", | |
| "b y", | |
| "▁ by", | |
| "f r", | |
| "▁fr om", | |
| "o r", | |
| "▁ or", | |
| "▁a n", | |
| "▁th is", | |
| "b e", | |
| "▁ be", | |
| "a r", | |
| "▁ar e", | |
| "w a", | |
| "▁wa s", | |
| "n o", | |
| "▁no t", | |
| "y o", | |
| "▁yo u", | |
| "h a", | |
| "▁ha ve", | |
| "c a", | |
| "▁ca n", | |
| "w il", | |
| "▁wil l", | |
| "▁ we", | |
| "▁b ut", | |
| "al l", | |
| "▁ all", | |
| "th e", | |
| "▁the y", | |
| "▁ha s", | |
| "on e", | |
| "▁ one", | |
| "m or", | |
| "▁mor e", | |
| "w oul", | |
| "▁woul d", | |
| "▁ if", | |
| "th er", | |
| "▁ther e", | |
| "th ei", | |
| "▁thei r", | |
| "wh i", | |
| "▁whi ch", | |
| "ab ou", | |
| "▁abou t", | |
| "wh en", | |
| "▁ when", | |
| "th an", | |
| "▁ than", | |
| "th es", | |
| "▁thes e", | |
| "s om", | |
| "▁som e", | |
| "t im", | |
| "▁tim e", | |
| "in to", | |
| "▁ into", | |
| "j us", | |
| "▁jus t", | |
| "it s", | |
| "▁ its", | |
| "d o", | |
| "▁ do", | |
| "ou t", | |
| "▁ out", | |
| "th em", | |
| "▁ them", | |
| "u p", | |
| "▁ up" | |
| ] | |
| } | |
| } |