{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "([bos])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "([eos])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "([unk])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "([pad])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "([mask])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 247, "content": "(LNG)", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 248, "content": "(UNK)", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false }, { "id": 249, "content": "(SPN)", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "([eos])", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "([eos])", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "([eos])", "type_id": 1 } } ], "special_tokens": { "([bos])": { "id": "([bos])", "ids": [ 0 ], "tokens": [ "([bos])" ] }, "([eos])": { "id": "([eos])", "ids": [ 1 ], "tokens": [ "([eos])" ] } } }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": "([unk])", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "([bos])": 0, "([eos])": 1, "([unk])": 2, "([pad])": 3, "([mask])": 4, "!": 5, "%": 6, "'": 7, "(": 8, ")": 9, "*": 10, "+": 11, ",": 12, "-": 13, ".": 14, "/": 15, "0": 16, "1": 17, "2": 18, "3": 19, "4": 20, "5": 21, "6": 22, "7": 23, "8": 24, "9": 25, ":": 26, ";": 27, "?": 28, "A": 29, "B": 30, "C": 31, "D": 32, "E": 33, "F": 34, "G": 35, "H": 36, "I": 37, "J": 38, "K": 39, "L": 40, "M": 41, "N": 42, "O": 43, "P": 44, "Q": 45, "R": 46, "S": 47, "T": 48, "U": 49, "V": 50, "W": 51, "X": 52, "Y": 53, "Z": 54, "a": 55, "b": 56, "c": 57, "d": 58, "e": 59, "f": 60, "g": 61, "h": 62, "i": 63, "j": 64, "k": 65, "l": 66, "m": 67, "n": 68, "o": 69, "p": 70, "q": 71, "r": 72, "s": 73, "t": 74, "u": 75, "v": 76, "w": 77, "x": 78, "y": 79, "z": 80, "¡": 81, "¢": 82, "£": 83, "¤": 84, "¥": 85, "§": 86, "¨": 87, "©": 88, "ª": 89, "«": 90, "¬": 91, "®": 92, "¯": 93, "°": 94, "±": 95, "²": 96, "³": 97, "´": 98, "µ": 99, "¶": 100, "·": 101, "¸": 102, "º": 103, "¼": 104, "½": 105, "¾": 106, "Â": 107, "Ã": 108, "Ä": 109, "Å": 110, "È": 111, "â": 112, "Ġ": 113, "Ģ": 114, "ģ": 115, "Ĥ": 116, "ĥ": 117, "Ħ": 118, "ħ": 119, "ĩ": 120, "ī": 121, "Į": 122, "į": 123, "İ": 124, "IJ": 125, "ķ": 126, "ĸ": 127, "Ĺ": 128, "ĺ": 129, "Ļ": 130, "Ľ": 131, "ľ": 132, "Ł": 133, "ł": 134, "Ń": 135, "se": 136, "Ġk": 137, "Ġ,": 138, "st": 139, "Ġ.": 140, "le": 141, "ä": 142, "Ġt": 143, "Ġm": 144, "Ġo": 145, "õ": 146, "Ġe": 147, "id": 148, "in": 149, "Ġp": 150, "Ġv": 151, "ja": 152, "Ġs": 153, "da": 154, "li": 155, "Ġse": 156, "ma": 157, "me": 158, "Ġa": 159, "Ġn": 160, "oo": 161, "it": 162, "ü": 163, "is": 164, "Ġon": 165, "ga": 166, "ud": 167, "Ġja": 168, "ra": 169, "ks": 170, "Ġme": 171, "us": 172, "te": 173, "va": 174, "ta": 175, "ik": 176, "Ġte": 177, "ur": 178, "Ġka": 179, "en": 180, "Ġet": 181, "Ġva": 182, "la": 183, "Ġko": 184, "si": 185, "lle": 186, "es": 187, "aa": 188, "ust": 189, "lt": 190, "na": 191, "õi": 192, "mi": 193, "ri": 194, "use": 195, "Ġh": 196, "Ġj": 197, "pa": 198, "ö": 199, "ge": 200, "gi": 201, "ne": 202, "Ġku": 203, "ee": 204, "Ġ(": 205, "lu": 206, "ea": 207, "il": 208, "Ġselle": 209, "Ġpa": 210, "Ġü": 211, "de": 212, "ĠE": 213, "gu": 214, "Ġole": 215, "Ġr": 216, "Ġsee": 217, "Ġvä": 218, "uroo": 219, "sta": 220, "nd": 221, "ine": 222, "ku": 223, "Ġta": 224, "uroopa": 225, "ti": 226, "Ġei": 227, "ĠEuroopa": 228, "är": 229, "ida": 230, "ko": 231, "ha": 232, "Ġl": 233, "el": 234, "ii": 235, "Ġpea": 236, "Ġtu": 237, "ni": 238, "vad": 239, "tud": 240, "Ġsu": 241, "Ġsaa": 242, "Ġtä": 243, "est": 244, "Ġsi": 245, "Ġma": 246 }, "merges": [ "s e", "Ġ k", "Ġ ,", "s t", "Ġ .", "l e", "à ¤", "Ġ t", "Ġ m", "Ġ o", "à µ", "Ġ e", "i d", "i n", "Ġ p", "Ġ v", "j a", "Ġ s", "d a", "l i", "Ġ se", "m a", "m e", "Ġ a", "Ġ n", "o o", "i t", "à ¼", "i s", "Ġo n", "g a", "u d", "Ġ ja", "r a", "k s", "Ġm e", "u s", "t e", "v a", "t a", "i k", "Ġt e", "u r", "Ġk a", "e n", "Ġe t", "Ġv a", "l a", "Ġk o", "s i", "l le", "e s", "a a", "u st", "l t", "n a", "õ i", "m i", "r i", "u se", "Ġ h", "Ġ j", "p a", "à ¶", "g e", "g i", "n e", "Ġk u", "e e", "Ġ (", "l u", "e a", "i l", "Ġse lle", "Ġp a", "Ġ ü", "d e", "Ġ E", "g u", "Ġo le", "Ġ r", "Ġse e", "Ġv ä", "ur oo", "st a", "n d", "in e", "k u", "Ġt a", "uroo pa", "t i", "Ġe i", "ĠE uroopa", "ä r", "id a", "k o", "h a", "Ġ l", "e l", "i i", "Ġp ea", "Ġt u", "n i", "va d", "t ud", "Ġs u", "Ġs aa", "Ġt ä", "e st", "Ġs i", "Ġm a" ] } }