| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "<start>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "<end>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "<pad>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": null, | |
| "pre_tokenizer": { | |
| "type": "ByteLevel", | |
| "add_prefix_space": false, | |
| "trim_offsets": true, | |
| "use_regex": true | |
| }, | |
| "post_processor": { | |
| "type": "ByteLevel", | |
| "add_prefix_space": true, | |
| "trim_offsets": false, | |
| "use_regex": true | |
| }, | |
| "decoder": { | |
| "type": "ByteLevel", | |
| "add_prefix_space": true, | |
| "trim_offsets": true, | |
| "use_regex": true | |
| }, | |
| "model": { | |
| "type": "BPE", | |
| "dropout": null, | |
| "unk_token": null, | |
| "continuing_subword_prefix": null, | |
| "end_of_word_suffix": null, | |
| "fuse_unk": false, | |
| "byte_fallback": false, | |
| "vocab": { | |
| "<start>": 0, | |
| "<end>": 1, | |
| "<pad>": 2, | |
| "-": 3, | |
| ".": 4, | |
| "1": 5, | |
| "2": 6, | |
| "3": 7, | |
| "<": 8, | |
| ">": 9, | |
| "A": 10, | |
| "B": 11, | |
| "C": 12, | |
| "D": 13, | |
| "E": 14, | |
| "F": 15, | |
| "G": 16, | |
| "I": 17, | |
| "J": 18, | |
| "L": 19, | |
| "M": 20, | |
| "N": 21, | |
| "O": 22, | |
| "P": 23, | |
| "R": 24, | |
| "S": 25, | |
| "T": 26, | |
| "U": 27, | |
| "V": 28, | |
| "W": 29, | |
| "X": 30, | |
| "Z": 31, | |
| "a": 32, | |
| "b": 33, | |
| "c": 34, | |
| "d": 35, | |
| "e": 36, | |
| "f": 37, | |
| "g": 38, | |
| "h": 39, | |
| "i": 40, | |
| "k": 41, | |
| "l": 42, | |
| "m": 43, | |
| "n": 44, | |
| "o": 45, | |
| "p": 46, | |
| "r": 47, | |
| "s": 48, | |
| "t": 49, | |
| "u": 50, | |
| "v": 51, | |
| "w": 52, | |
| "y": 53, | |
| "Ġ": 54, | |
| "ar": 55, | |
| "nd": 56, | |
| "st": 57, | |
| "art": 58, | |
| "end": 59, | |
| "Ġ<": 60, | |
| "start": 61, | |
| "CN": 62, | |
| "II": 63, | |
| "ĠC": 64, | |
| "CNJ": 65, | |
| "ĠCCNJ": 66, | |
| "SG": 67, | |
| "ĠL": 68, | |
| "OC": 69, | |
| "ou": 70, | |
| "ĠI": 71, | |
| "on": 72, | |
| "PL": 73, | |
| "ĠP": 74, | |
| "ĠLOC": 75, | |
| "ho": 76, | |
| "Ġg": 77, | |
| "Ġs": 78, | |
| "in": 79, | |
| "AS": 80, | |
| "Ġgo": 81, | |
| "ĠN": 82, | |
| "ĠD": 83, | |
| "Ġw": 84, | |
| "ĠPR": 85, | |
| "Ġt": 86, | |
| "ĠIN": 87, | |
| "Ġ1": 88, | |
| "Ġp": 89, | |
| "SP": 90, | |
| "MP": 91, | |
| "OMP": 92, | |
| "ĠCOMP": 93, | |
| "Ġh": 94, | |
| "TR": 95, | |
| "EP": 96, | |
| "le": 97, | |
| "ck": 98, | |
| "Ġl": 99, | |
| "OSP": 100, | |
| "ĠPROSP": 101, | |
| "ke": 102, | |
| "ll": 103, | |
| "se": 104, | |
| "ID": 105, | |
| "re": 106, | |
| "Ġar": 107, | |
| "VB": 108, | |
| "ĠLVB": 109, | |
| "ay": 110, | |
| "Ġin": 111, | |
| "out": 112, | |
| "CCNJ": 113, | |
| "or": 114, | |
| "CEP": 115, | |
| "ĠINCEP": 116, | |
| "EG": 117, | |
| "FOC": 118, | |
| "te": 119, | |
| "ĠNEG": 120, | |
| "ake": 121, | |
| "ound": 122, | |
| "Ġaround": 123, | |
| "WID": 124, | |
| "Ġf": 125, | |
| "ĠDWID": 126, | |
| "Ġon": 127, | |
| "PAS": 128, | |
| "me": 129, | |
| "PASS": 130, | |
| "an": 131, | |
| "ee": 132, | |
| "pe": 133, | |
| "EAS": 134, | |
| "REAS": 135, | |
| "Ġb": 136, | |
| "ĠREAS": 137, | |
| "ouse": 138, | |
| "Ġpi": 139, | |
| "ĠS": 140, | |
| "co": 141, | |
| "LZ": 142, | |
| "MLZ": 143, | |
| "ĠNMLZ": 144, | |
| "ork": 145, | |
| "it": 146, | |
| "Ġm": 147, | |
| "BL": 148, | |
| "FV": 149, | |
| "OBL": 150, | |
| "PFV": 151, | |
| "ĠIPFV": 152, | |
| "AN": 153, | |
| "mp": 154, | |
| "to": 155, | |
| "Ġwho": 156, | |
| "Ġhouse": 157, | |
| "Ġpipe": 158, | |
| "comp": 159, | |
| "Ġhit": 160, | |
| "ac": 161, | |
| "ain": 162, | |
| "all": 163, | |
| "Ġtr": 164, | |
| "ĠINS": 165, | |
| "ear": 166, | |
| "ow": 167, | |
| "oll": 168, | |
| "Ġtake": 169, | |
| "Ġfoll": 170, | |
| "Ġfollow": 171, | |
| "RR": 172, | |
| "Ġout": 173, | |
| "AU": 174, | |
| "ĠOBL": 175, | |
| "AUS": 176, | |
| "ri": 177, | |
| "ong": 178, | |
| "SX": 179, | |
| "ack": 180, | |
| "lm": 181, | |
| "ve": 182, | |
| "Ġo": 183, | |
| "holm": 184, | |
| "Ġsee": 185, | |
| "ckholm": 186, | |
| "ĠSto": 187, | |
| "ĠStockholm": 188, | |
| "CAUS": 189, | |
| "IP": 190, | |
| "TIP": 191, | |
| "ly": 192, | |
| "od": 193, | |
| "par": 194, | |
| "Ġcomp": 195, | |
| "Ġgood": 196, | |
| "Ġwork": 197, | |
| "Ġth": 198, | |
| "lete": 199, | |
| "ANTIP": 200, | |
| "Ġcomplete": 201, | |
| "Ġcompletely": 202, | |
| "ER": 203, | |
| "VER": 204, | |
| "ĠVER": 205, | |
| "Ġsay": 206, | |
| "DM": 207, | |
| "IS": 208, | |
| "he": 209, | |
| "ir": 210, | |
| "rn": 211, | |
| "rt": 212, | |
| "tu": 213, | |
| "Ġ3": 214, | |
| "III": 215, | |
| "hort": 216, | |
| "Ġshort": 217, | |
| "Ġli": 218, | |
| "Ġmake": 219, | |
| "Ġtry": 220, | |
| "turn": 221, | |
| "ab": 222, | |
| "un": 223, | |
| "Ġlay": 224, | |
| "able": 225, | |
| "AR": 226, | |
| "ca": 227, | |
| "do": 228, | |
| "way": 229, | |
| "Ġdo": 230, | |
| "ĠIRR": 231, | |
| "ĠPAR": 232, | |
| "SPT": 233, | |
| "Ġdoor": 234, | |
| "ĠPART": 235, | |
| "DE": 236, | |
| "OX": 237, | |
| "PR": 238, | |
| "work": 239, | |
| "Ġre": 240, | |
| "home": 241, | |
| "Ġwh": 242, | |
| "DEM": 243, | |
| "PROX": 244, | |
| "ate": 245, | |
| "no": 246, | |
| "so": 247, | |
| "Ġno": 248, | |
| "rive": 249, | |
| "Ġnot": 250, | |
| "Ġlong": 251, | |
| "Ġlack": 252, | |
| "PN": 253, | |
| "AX": 254, | |
| "EL": 255, | |
| "EV": 256, | |
| "IRR": 257, | |
| "MAN": 258, | |
| "ad": 259, | |
| "ag": 260, | |
| "at": 261, | |
| "con": 262, | |
| "de": 263, | |
| "ike": 264, | |
| "ian": 265, | |
| "ide": 266, | |
| "long": 267, | |
| "like": 268, | |
| "mall": 269, | |
| "os": 270, | |
| "ole": 271, | |
| "pre": 272, | |
| "pouse": 273, | |
| "ros": 274, | |
| "side": 275, | |
| "tin": 276, | |
| "uall": 277, | |
| "year": 278, | |
| "ĠCN": 279, | |
| "Ġho": 280, | |
| "Ġac": 281, | |
| "ĠAX": 282, | |
| "Ġcon": 283, | |
| "Ġyear": 284, | |
| "ĠCan": 285, | |
| "ĠPCNJ": 286, | |
| "Ġgir": 287, | |
| "Ġsmall": 288, | |
| "Ġspouse": 289, | |
| "ĠDM": 290, | |
| "ĠDIS": 291, | |
| "Ġwee": 292, | |
| "Ġpee": 293, | |
| "Ġpole": 294, | |
| "ree": 295, | |
| "rest": 296, | |
| "Ġinside": 297, | |
| "any": 298, | |
| "Ġpick": 299, | |
| "ĠSEL": 300, | |
| "Ġman": 301, | |
| "company": 302, | |
| "acros": 303, | |
| "Ġtrain": 304, | |
| "pare": 305, | |
| "Ġthree": 306, | |
| "heart": 307, | |
| "Ġlie": 308, | |
| "case": 309, | |
| "Ġreturn": 310, | |
| "Ġwhat": 311, | |
| "EVID": 312, | |
| "MANR": 313, | |
| "adian": 314, | |
| "again": 315, | |
| "prepare": 316, | |
| "tinuall": 317, | |
| "ĠCNTR": 318, | |
| "Ġhole": 319, | |
| "Ġaccompany": 320, | |
| "Ġcontinuall": 321, | |
| "ĠCanadian": 322, | |
| "Ġgirl": 323, | |
| "ĠDISTR": 324, | |
| "Ġweek": 325, | |
| "ĠSELF": 326, | |
| "across": 327, | |
| "Ġcontinually": 328, | |
| "ES": 329, | |
| "ak": 330, | |
| "eri": 331, | |
| "epar": 332, | |
| "gh": 333, | |
| "ig": 334, | |
| "ind": 335, | |
| "Ġun": 336, | |
| "ough": 337, | |
| "Ġsepar": 338, | |
| "ĠDES": 339, | |
| "Ġperi": 340, | |
| "Ġhear": 341, | |
| "reak": 342, | |
| "Ġarrive": 343, | |
| "ter": 344, | |
| "Ġfind": 345, | |
| "Ġone": 346, | |
| "meter": 347, | |
| "Ġback": 348, | |
| "Ġbig": 349, | |
| "Ġbreak": 350, | |
| "Ġoh": 351, | |
| "Ġthough": 352, | |
| "Ġunable": 353, | |
| "Ġseparate": 354, | |
| "Ġperimeter": 355, | |
| "Ġthought": 356, | |
| "fir": 357, | |
| "ime": 358, | |
| "lo": 359, | |
| "pain": 360, | |
| "run": 361, | |
| "the": 362, | |
| "time": 363, | |
| "Ġco": 364, | |
| "Ġall": 365, | |
| "ĠSPT": 366, | |
| "Ġrun": 367, | |
| "ĠPREP": 368, | |
| "EPIS": 369, | |
| "Ġfear": 370, | |
| "pen": 371, | |
| "Ġblo": 372, | |
| "ĠSpain": 373, | |
| "Ġopen": 374, | |
| "Ġlive": 375, | |
| "first": 376, | |
| "Ġcome": 377, | |
| "Ġblock": 378, | |
| "eca": 379, | |
| "use": 380, | |
| "Ġbeca": 381, | |
| "Ġbecause": 382, | |
| "AL": 383, | |
| "BM": 384, | |
| "VAL": 385, | |
| "ai": 386, | |
| "as": 387, | |
| "ame": 388, | |
| "ce": 389, | |
| "en": 390, | |
| "ep": 391, | |
| "ff": 392, | |
| "ite": 393, | |
| "ice": 394, | |
| "lac": 395, | |
| "mar": 396, | |
| "name": 397, | |
| "prive": 398, | |
| "rs": 399, | |
| "sit": 400, | |
| "uprive": 401, | |
| "was": 402, | |
| "ĠCAUS": 403, | |
| "ĠIBM": 404, | |
| "Ġsle": 405, | |
| "ĠDen": 406, | |
| "Ġwate": 407, | |
| "Ġplac": 408, | |
| "pers": 409, | |
| "Ġmai": 410, | |
| "ach": 411, | |
| "Ġoff": 412, | |
| "Ġreach": 413, | |
| "Ġwhite": 414, | |
| "mark": 415, | |
| "upriver": 416, | |
| "wash": 417, | |
| "Ġsleep": 418, | |
| "ĠDenmark": 419, | |
| "Ġwater": 420, | |
| "Ġplace": 421, | |
| "person": 422, | |
| "Ġmail": 423, | |
| "Ġoffice": 424 | |
| }, | |
| "merges": [ | |
| "a r", | |
| "n d", | |
| "s t", | |
| "ar t", | |
| "e nd", | |
| "Ġ <", | |
| "st art", | |
| "C N", | |
| "I I", | |
| "Ġ C", | |
| "CN J", | |
| "ĠC CNJ", | |
| "S G", | |
| "Ġ L", | |
| "O C", | |
| "o u", | |
| "Ġ I", | |
| "o n", | |
| "P L", | |
| "Ġ P", | |
| "ĠL OC", | |
| "h o", | |
| "Ġ g", | |
| "Ġ s", | |
| "i n", | |
| "A S", | |
| "Ġg o", | |
| "Ġ N", | |
| "Ġ D", | |
| "Ġ w", | |
| "ĠP R", | |
| "Ġ t", | |
| "ĠI N", | |
| "Ġ 1", | |
| "Ġ p", | |
| "S P", | |
| "M P", | |
| "O MP", | |
| "ĠC OMP", | |
| "Ġ h", | |
| "T R", | |
| "E P", | |
| "l e", | |
| "c k", | |
| "Ġ l", | |
| "O SP", | |
| "ĠPR OSP", | |
| "k e", | |
| "l l", | |
| "s e", | |
| "I D", | |
| "r e", | |
| "Ġ ar", | |
| "V B", | |
| "ĠL VB", | |
| "a y", | |
| "Ġ in", | |
| "ou t", | |
| "C CNJ", | |
| "o r", | |
| "C EP", | |
| "ĠIN CEP", | |
| "E G", | |
| "F OC", | |
| "t e", | |
| "ĠN EG", | |
| "a ke", | |
| "ou nd", | |
| "Ġar ound", | |
| "W ID", | |
| "Ġ f", | |
| "ĠD WID", | |
| "Ġ on", | |
| "P AS", | |
| "m e", | |
| "PAS S", | |
| "a n", | |
| "e e", | |
| "p e", | |
| "E AS", | |
| "R EAS", | |
| "Ġ b", | |
| "Ġ REAS", | |
| "ou se", | |
| "Ġp i", | |
| "Ġ S", | |
| "c o", | |
| "L Z", | |
| "M LZ", | |
| "ĠN MLZ", | |
| "or k", | |
| "i t", | |
| "Ġ m", | |
| "B L", | |
| "F V", | |
| "O BL", | |
| "P FV", | |
| "ĠI PFV", | |
| "A N", | |
| "m p", | |
| "t o", | |
| "Ġw ho", | |
| "Ġh ouse", | |
| "Ġpi pe", | |
| "co mp", | |
| "Ġh it", | |
| "a c", | |
| "a in", | |
| "a ll", | |
| "Ġt r", | |
| "ĠIN S", | |
| "e ar", | |
| "o w", | |
| "o ll", | |
| "Ġt ake", | |
| "Ġf oll", | |
| "Ġfoll ow", | |
| "R R", | |
| "Ġ out", | |
| "A U", | |
| "Ġ OBL", | |
| "AU S", | |
| "r i", | |
| "on g", | |
| "S X", | |
| "a ck", | |
| "l m", | |
| "v e", | |
| "Ġ o", | |
| "ho lm", | |
| "Ġs ee", | |
| "ck holm", | |
| "ĠS to", | |
| "ĠSto ckholm", | |
| "C AUS", | |
| "I P", | |
| "T IP", | |
| "l y", | |
| "o d", | |
| "p ar", | |
| "Ġ comp", | |
| "Ġgo od", | |
| "Ġw ork", | |
| "Ġt h", | |
| "le te", | |
| "AN TIP", | |
| "Ġcomp lete", | |
| "Ġcomplete ly", | |
| "E R", | |
| "V ER", | |
| "Ġ VER", | |
| "Ġs ay", | |
| "D M", | |
| "I S", | |
| "h e", | |
| "i r", | |
| "r n", | |
| "r t", | |
| "t u", | |
| "Ġ 3", | |
| "II I", | |
| "ho rt", | |
| "Ġs hort", | |
| "Ġl i", | |
| "Ġm ake", | |
| "Ġtr y", | |
| "tu rn", | |
| "a b", | |
| "u n", | |
| "Ġl ay", | |
| "ab le", | |
| "A R", | |
| "c a", | |
| "d o", | |
| "w ay", | |
| "Ġ do", | |
| "ĠI RR", | |
| "ĠP AR", | |
| "SP T", | |
| "Ġdo or", | |
| "ĠPAR T", | |
| "D E", | |
| "O X", | |
| "P R", | |
| "w ork", | |
| "Ġ re", | |
| "ho me", | |
| "Ġw h", | |
| "DE M", | |
| "PR OX", | |
| "a te", | |
| "n o", | |
| "s o", | |
| "Ġ no", | |
| "ri ve", | |
| "Ġno t", | |
| "Ġl ong", | |
| "Ġl ack", | |
| "P N", | |
| "A X", | |
| "E L", | |
| "E V", | |
| "I RR", | |
| "M AN", | |
| "a d", | |
| "a g", | |
| "a t", | |
| "c on", | |
| "d e", | |
| "i ke", | |
| "i an", | |
| "i de", | |
| "l ong", | |
| "l ike", | |
| "m all", | |
| "o s", | |
| "o le", | |
| "p re", | |
| "p ouse", | |
| "r os", | |
| "s ide", | |
| "t in", | |
| "u all", | |
| "y ear", | |
| "Ġ CN", | |
| "Ġ ho", | |
| "Ġ ac", | |
| "Ġ AX", | |
| "Ġ con", | |
| "Ġ year", | |
| "ĠC an", | |
| "ĠP CNJ", | |
| "Ġg ir", | |
| "Ġs mall", | |
| "Ġs pouse", | |
| "ĠD M", | |
| "ĠD IS", | |
| "Ġw ee", | |
| "Ġp ee", | |
| "Ġp ole", | |
| "re e", | |
| "re st", | |
| "Ġin side", | |
| "an y", | |
| "Ġpi ck", | |
| "ĠS EL", | |
| "Ġm an", | |
| "comp any", | |
| "ac ros", | |
| "Ġtr ain", | |
| "par e", | |
| "Ġth ree", | |
| "he art", | |
| "Ġli e", | |
| "ca se", | |
| "Ġre turn", | |
| "Ġwh at", | |
| "EV ID", | |
| "MAN R", | |
| "ad ian", | |
| "ag ain", | |
| "pre pare", | |
| "tin uall", | |
| "ĠCN TR", | |
| "Ġho le", | |
| "Ġac company", | |
| "Ġcon tinuall", | |
| "ĠCan adian", | |
| "Ġgir l", | |
| "ĠDIS TR", | |
| "Ġwee k", | |
| "ĠSEL F", | |
| "acros s", | |
| "Ġcontinuall y", | |
| "E S", | |
| "a k", | |
| "e ri", | |
| "e par", | |
| "g h", | |
| "i g", | |
| "i nd", | |
| "Ġ un", | |
| "ou gh", | |
| "Ġs epar", | |
| "ĠD ES", | |
| "Ġp eri", | |
| "Ġh ear", | |
| "re ak", | |
| "Ġar rive", | |
| "te r", | |
| "Ġf ind", | |
| "Ġon e", | |
| "me ter", | |
| "Ġb ack", | |
| "Ġb ig", | |
| "Ġb reak", | |
| "Ġo h", | |
| "Ġth ough", | |
| "Ġun able", | |
| "Ġsepar ate", | |
| "Ġperi meter", | |
| "Ġthough t", | |
| "f ir", | |
| "i me", | |
| "l o", | |
| "p ain", | |
| "r un", | |
| "t he", | |
| "t ime", | |
| "Ġ co", | |
| "Ġ all", | |
| "Ġ SPT", | |
| "Ġ run", | |
| "ĠPR EP", | |
| "EP IS", | |
| "Ġf ear", | |
| "pe n", | |
| "Ġb lo", | |
| "ĠS pain", | |
| "Ġo pen", | |
| "Ġli ve", | |
| "fir st", | |
| "Ġco me", | |
| "Ġblo ck", | |
| "e ca", | |
| "u se", | |
| "Ġb eca", | |
| "Ġbeca use", | |
| "A L", | |
| "B M", | |
| "V AL", | |
| "a i", | |
| "a s", | |
| "a me", | |
| "c e", | |
| "e n", | |
| "e p", | |
| "f f", | |
| "i te", | |
| "i ce", | |
| "l ac", | |
| "m ar", | |
| "n ame", | |
| "p rive", | |
| "r s", | |
| "s it", | |
| "u prive", | |
| "w as", | |
| "ĠC AUS", | |
| "ĠI BM", | |
| "Ġs le", | |
| "ĠD en", | |
| "Ġw ate", | |
| "Ġp lac", | |
| "pe rs", | |
| "Ġm ai", | |
| "ac h", | |
| "Ġo ff", | |
| "Ġre ach", | |
| "Ġwh ite", | |
| "mar k", | |
| "uprive r", | |
| "was h", | |
| "Ġsle ep", | |
| "ĠDen mark", | |
| "Ġwate r", | |
| "Ġplac e", | |
| "pers on", | |
| "Ġmai l", | |
| "Ġoff ice" | |
| ] | |
| } | |
| } |