{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "NFD" }, { "type": "Lowercase" }, { "type": "StripAccents" } ] }, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 0 ], "tokens": [ "" ] }, "": { "id": "", "ids": [ 1 ], "tokens": [ "" ] } } }, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, ",": 4, ".": 5, "a": 6, "b": 7, "c": 8, "d": 9, "e": 10, "f": 11, "g": 12, "h": 13, "i": 14, "j": 15, "k": 16, "l": 17, "m": 18, "n": 19, "o": 20, "p": 21, "r": 22, "s": 23, "t": 24, "u": 25, "v": 26, "w": 27, "y": 28, "—": 29, "’": 30, "“": 31, "”": 32, "re": 33, "an": 34, "in": 35, "is": 36, "al": 37, "ea": 38, "on": 39, "sp": 40, "th": 41, "tu": 42, "and": 43, "am": 44, "bo": 45, "bu": 46, "co": 47, "de": 48, "el": 49, "fr": 50, "ion": 51, "us": 52, "we": 53, "ai": 54, "are": 55, "ca": 56, "ce": 57, "ed": 58, "en": 59, "eed": 60, "fu": 61, "ig": 62, "il": 63, "lo": 64, "lig": 65, "no": 66, "om": 67, "or": 68, "ri": 69, "sc": 70, "va": 71, "ve": 72, "all": 73, "ear": 74, "speed": 75, "the": 76, "ture": 77, "buil": 78, "from": 79, "ence": 80, "future": 81, ".”": 82, "ah": 83, "abo": 84, "ade": 85, "afr": 86, "ava": 87, "be": 88, "by": 89, "bel": 90, "ch": 91, "can": 92, "dre": 93, "din": 94, "es": 95, "et": 96, "ean": 97, "eve": 98, "for": 99, "hn": 100, "his": 101, "hel": 102, "hear": 103, "it": 104, "ith": 105, "ica": 106, "iri": 107, "ience": 108, "ieve": 109, "java": 110, "jes": 111, "kr": 112, "ks": 113, "lea": 114, "let": 115, "my": 116, "made": 117, "mean": 118, "mfor": 119, "ntu": 120, "nam": 121, "nit": 122, "ou": 123, "owe": 124, "oks": 125, "pt": 126, "py": 127, "pre": 128, "powe": 129, "rn": 130, "rs": 131, "rus": 132, "ss": 133, "sear": 134, "to": 135, "tal": 136, "tel": 137, "tion": 138, "tor": 139, "ut": 140, "ure": 141, "ubu": 142, "unit": 143, "vis": 144, "with": 145, "relig": 146, "inno": 147, "intel": 148, "ishn": 149, "eak": 150, "spiri": 151, "speak": 152, "thon": 153, "tual": 154, "books": 155, "born": 156, "code": 157, "codin": 158, "comfor": 159, "ders": 160, "use": 161, "cause": 162, "love": 163, "ligence": 164, "not": 165, "ript": 166, "science": 167, "script": 168, "vation": 169, "allah": 170, "build": 171, "built": 172, "about": 173, "africa": 174, "because": 175, "believe": 176, "dream": 177, "histor": 178, "hello": 179, "heart": 180, "javascript": 181, "jesus": 182, "krishn": 183, "leaders": 184, "means": 185, "name": 186, "our": 187, "python": 188, "press": 189, "power": 190, "rust": 191, "search": 192, "talk": 193, "ubuntu": 194, "unity": 195, "vision": 196, "religion": 197, "innovation": 198, "intelligence": 199, "spiritual": 200, "coding": 201, "comfort": 202, "history": 203, "krishna": 204, "pressure": 205 }, "merges": [ [ "r", "e" ], [ "a", "n" ], [ "i", "n" ], [ "i", "s" ], [ "a", "l" ], [ "e", "a" ], [ "o", "n" ], [ "s", "p" ], [ "t", "h" ], [ "t", "u" ], [ "an", "d" ], [ "a", "m" ], [ "b", "o" ], [ "b", "u" ], [ "c", "o" ], [ "d", "e" ], [ "e", "l" ], [ "f", "r" ], [ "i", "on" ], [ "u", "s" ], [ "w", "e" ], [ "a", "i" ], [ "a", "re" ], [ "c", "a" ], [ "c", "e" ], [ "e", "d" ], [ "e", "n" ], [ "e", "ed" ], [ "f", "u" ], [ "i", "g" ], [ "i", "l" ], [ "l", "o" ], [ "l", "ig" ], [ "n", "o" ], [ "o", "m" ], [ "o", "r" ], [ "r", "i" ], [ "s", "c" ], [ "v", "a" ], [ "v", "e" ], [ "al", "l" ], [ "ea", "r" ], [ "sp", "eed" ], [ "th", "e" ], [ "tu", "re" ], [ "bu", "il" ], [ "fr", "om" ], [ "en", "ce" ], [ "fu", "ture" ], [ ".", "”" ], [ "a", "h" ], [ "a", "bo" ], [ "a", "de" ], [ "a", "fr" ], [ "a", "va" ], [ "b", "e" ], [ "b", "y" ], [ "b", "el" ], [ "c", "h" ], [ "c", "an" ], [ "d", "re" ], [ "d", "in" ], [ "e", "s" ], [ "e", "t" ], [ "e", "an" ], [ "e", "ve" ], [ "f", "or" ], [ "h", "n" ], [ "h", "is" ], [ "h", "el" ], [ "h", "ear" ], [ "i", "t" ], [ "i", "th" ], [ "i", "ca" ], [ "i", "ri" ], [ "i", "ence" ], [ "i", "eve" ], [ "j", "ava" ], [ "j", "es" ], [ "k", "r" ], [ "k", "s" ], [ "l", "ea" ], [ "l", "et" ], [ "m", "y" ], [ "m", "ade" ], [ "m", "ean" ], [ "m", "for" ], [ "n", "tu" ], [ "n", "am" ], [ "n", "it" ], [ "o", "u" ], [ "o", "we" ], [ "o", "ks" ], [ "p", "t" ], [ "p", "y" ], [ "p", "re" ], [ "p", "owe" ], [ "r", "n" ], [ "r", "s" ], [ "r", "us" ], [ "s", "s" ], [ "s", "ear" ], [ "t", "o" ], [ "t", "al" ], [ "t", "el" ], [ "t", "ion" ], [ "t", "or" ], [ "u", "t" ], [ "u", "re" ], [ "u", "bu" ], [ "u", "nit" ], [ "v", "is" ], [ "w", "ith" ], [ "re", "lig" ], [ "in", "no" ], [ "in", "tel" ], [ "is", "hn" ], [ "ea", "k" ], [ "sp", "iri" ], [ "sp", "eak" ], [ "th", "on" ], [ "tu", "al" ], [ "bo", "oks" ], [ "bo", "rn" ], [ "co", "de" ], [ "co", "din" ], [ "co", "mfor" ], [ "de", "rs" ], [ "us", "e" ], [ "ca", "use" ], [ "lo", "ve" ], [ "lig", "ence" ], [ "no", "t" ], [ "ri", "pt" ], [ "sc", "ience" ], [ "sc", "ript" ], [ "va", "tion" ], [ "all", "ah" ], [ "buil", "d" ], [ "buil", "t" ], [ "abo", "ut" ], [ "afr", "ica" ], [ "be", "cause" ], [ "bel", "ieve" ], [ "dre", "am" ], [ "his", "tor" ], [ "hel", "lo" ], [ "hear", "t" ], [ "java", "script" ], [ "jes", "us" ], [ "kr", "ishn" ], [ "lea", "ders" ], [ "mean", "s" ], [ "nam", "e" ], [ "ou", "r" ], [ "py", "thon" ], [ "pre", "ss" ], [ "powe", "r" ], [ "rus", "t" ], [ "sear", "ch" ], [ "tal", "k" ], [ "ubu", "ntu" ], [ "unit", "y" ], [ "vis", "ion" ], [ "relig", "ion" ], [ "inno", "vation" ], [ "intel", "ligence" ], [ "spiri", "tual" ], [ "codin", "g" ], [ "comfor", "t" ], [ "histor", "y" ], [ "krishn", "a" ], [ "press", "ure" ] ] } }