{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[BOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "### User:", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 7, "content": "### AWA:", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "[PAD]": 0, "[UNK]": 1, "[BOS]": 2, "[EOS]": 3, "": 4, "": 5, "### User:": 6, "### AWA:": 7, "!": 8, "\"": 9, "#": 10, "%": 11, "'": 12, "(": 13, ")": 14, "*": 15, "+": 16, ",": 17, "-": 18, ".": 19, "/": 20, "0": 21, "1": 22, "2": 23, "3": 24, "4": 25, "5": 26, "6": 27, "7": 28, "8": 29, "9": 30, ":": 31, ";": 32, "<": 33, "=": 34, ">": 35, "?": 36, "A": 37, "B": 38, "C": 39, "D": 40, "E": 41, "F": 42, "G": 43, "H": 44, "I": 45, "J": 46, "K": 47, "L": 48, "M": 49, "N": 50, "O": 51, "P": 52, "Q": 53, "R": 54, "S": 55, "T": 56, "U": 57, "V": 58, "W": 59, "X": 60, "Y": 61, "Z": 62, "[": 63, "]": 64, "a": 65, "b": 66, "c": 67, "d": 68, "e": 69, "f": 70, "g": 71, "h": 72, "i": 73, "j": 74, "k": 75, "l": 76, "m": 77, "n": 78, "o": 79, "p": 80, "q": 81, "r": 82, "s": 83, "t": 84, "u": 85, "v": 86, "w": 87, "x": 88, "y": 89, "z": 90, "~": 91, "¡": 92, "¢": 93, "£": 94, "¤": 95, "¥": 96, "¦": 97, "§": 98, "¨": 99, "©": 100, "ª": 101, "«": 102, "¬": 103, "®": 104, "¯": 105, "°": 106, "±": 107, "²": 108, "³": 109, "´": 110, "µ": 111, "¶": 112, "·": 113, "¸": 114, "¹": 115, "º": 116, "»": 117, "¼": 118, "½": 119, "¿": 120, "Â": 121, "Ã": 122, "Ä": 123, "Ç": 124, "Ï": 125, "â": 126, "ã": 127, "æ": 128, "ç": 129, "ï": 130, "ð": 131, "Ċ": 132, "Ġ": 133, "Ģ": 134, "ģ": 135, "Ĥ": 136, "ĥ": 137, "Ħ": 138, "ħ": 139, "Ĩ": 140, "ĩ": 141, "Ī": 142, "ī": 143, "Ĭ": 144, "ĭ": 145, "Į": 146, "į": 147, "İ": 148, "ı": 149, "IJ": 150, "ij": 151, "Ĵ": 152, "ĵ": 153, "Ķ": 154, "ķ": 155, "ĸ": 156, "Ĺ": 157, "ĺ": 158, "Ļ": 159, "ļ": 160, "Ľ": 161, "ľ": 162, "Ŀ": 163, "ŀ": 164, "Ł": 165, "ł": 166, "Ń": 167, "an": 168, "er": 169, "ang": 170, "ah": 171, "ka": 172, "Ġm": 173, "Ġd": 174, "en": 175, "Ġb": 176, "##": 177, "ak": 178, "at": 179, "Ġs": 180, "ar": 181, "Ġt": 182, "ĠA": 183, "am": 184, "Ġk": 185, "ya": 186, "in": 187, "Ġp": 188, "al": 189, "ad": 190, "un": 191, "yang": 192, "###": 193, "em": 194, "ap": 195, "Ġyang": 196, "lah": 197, "Ġka": 198, "Ġke": 199, "it": 200, "as": 201, "Ġ(": 202, "eng": 203, "kan": 204, "Ġdi": 205, "WA": 206, "ĠAWA": 207, "Ġse": 208, "or": 209, "eka": 210, "Ġber": 211, "Ġdan": 212, "Ġl": 213, "mu": 214, "ereka": 215, "orang": 216, "uh": 217, "Ġmereka": 218, "ĠU": 219, "ser": 220, "Ġj": 221, "el": 222, "ĠUser": 223, "ĠM": 224, "