Mirror from hf-internal-testing/tiny-random-CohereForCausalLM

Browse files

Files changed (7) hide show

config.json +30 -0
generation_config.json +7 -0
model.safetensors +3 -0
onnx/model.onnx +3 -0
special_tokens_map.json +23 -0
tokenizer.json +2026 -0
tokenizer_config.json +112 -0

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "CohereForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 5,
+  "eos_token_id": 8,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 32,
+  "initializer_range": 0.02,
+  "intermediate_size": 37,
+  "is_decoder": true,
+  "layer_norm_eps": 1e-05,
+  "logit_scale": 0.0625,
+  "max_position_embeddings": 512,
+  "model_type": "cohere",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "num_key_value_heads": 4,
+  "pad_token_id": 0,
+  "rope_theta": 10000.0,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.0.dev0",
+  "type_vocab_size": 16,
+  "use_cache": true,
+  "vocab_size": 1024
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 5,
+  "eos_token_id": 8,
+  "pad_token_id": 0,
+  "transformers_version": "4.39.0.dev0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c28934e124dc44f6cc66752ebeff3d6d56b70cfac4837b54f86a0643f33c8d52
+size 194488

onnx/model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fb0d546a95c0ebeadd666a78e4999c46c06ec1d20adc243c547861c35becb7d
+size 276787

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<BOS_TOKEN>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|END_OF_TURN_TOKEN|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<PAD>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,2026 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "<PAD>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "<UNK>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "<CLS>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "<SEP>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 4,
+      "content": "<MASK_TOKEN>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 5,
+      "content": "<BOS_TOKEN>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 6,
+      "content": "<EOS_TOKEN>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 7,
+      "content": "<EOP_TOKEN>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 8,
+      "content": "<|END_OF_TURN_TOKEN|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": {
+    "type": "NFC"
+  },
+  "pre_tokenizer": {
+    "type": "Sequence",
+    "pretokenizers": [
+      {
+        "type": "Digits",
+        "individual_digits": true
+      },
+      {
+        "type": "ByteLevel",
+        "add_prefix_space": false,
+        "trim_offsets": true,
+        "use_regex": true
+      }
+    ]
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "<BOS_TOKEN>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "<BOS_TOKEN>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "<BOS_TOKEN>",
+          "type_id": 1
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "<BOS_TOKEN>": {
+        "id": "<BOS_TOKEN>",
+        "ids": [
+          5
+        ],
+        "tokens": [
+          "<BOS_TOKEN>"
+        ]
+      }
+    }
+  },
+  "decoder": {
+    "type": "ByteLevel",
+    "add_prefix_space": true,
+    "trim_offsets": true,
+    "use_regex": true
+  },
+  "model": {
+    "type": "BPE",
+    "dropout": null,
+    "unk_token": null,
+    "continuing_subword_prefix": null,
+    "end_of_word_suffix": null,
+    "fuse_unk": false,
+    "byte_fallback": false,
+    "ignore_merges": false,
+    "vocab": {
+      "<PAD>": 0,
+      "<UNK>": 1,
+      "<CLS>": 2,
+      "<SEP>": 3,
+      "<MASK_TOKEN>": 4,
+      "<BOS_TOKEN>": 5,
+      "<EOS_TOKEN>": 6,
+      "<EOP_TOKEN>": 7,
+      "<|END_OF_TURN_TOKEN|>": 8,
+      "!": 9,
+      "\"": 10,
+      "#": 11,
+      "$": 12,
+      "%": 13,
+      "&": 14,
+      "'": 15,
+      "(": 16,
+      ")": 17,
+      "*": 18,
+      "+": 19,
+      ",": 20,
+      "-": 21,
+      ".": 22,
+      "/": 23,
+      "0": 24,
+      "1": 25,
+      "2": 26,
+      "3": 27,
+      "4": 28,
+      "5": 29,
+      "6": 30,
+      "7": 31,
+      "8": 32,
+      "9": 33,
+      ":": 34,
+      ";": 35,
+      "<": 36,
+      "=": 37,
+      ">": 38,
+      "?": 39,
+      "@": 40,
+      "A": 41,
+      "B": 42,
+      "C": 43,
+      "D": 44,
+      "E": 45,
+      "F": 46,
+      "G": 47,
+      "H": 48,
+      "I": 49,
+      "J": 50,
+      "K": 51,
+      "L": 52,
+      "M": 53,
+      "N": 54,
+      "O": 55,
+      "P": 56,
+      "Q": 57,
+      "R": 58,
+      "S": 59,
+      "T": 60,
+      "U": 61,
+      "V": 62,
+      "W": 63,
+      "X": 64,
+      "Y": 65,
+      "Z": 66,
+      "[": 67,
+      "\\": 68,
+      "]": 69,
+      "^": 70,
+      "_": 71,
+      "`": 72,
+      "a": 73,
+      "b": 74,
+      "c": 75,
+      "d": 76,
+      "e": 77,
+      "f": 78,
+      "g": 79,
+      "h": 80,
+      "i": 81,
+      "j": 82,
+      "k": 83,
+      "l": 84,
+      "m": 85,
+      "n": 86,
+      "o": 87,
+      "p": 88,
+      "q": 89,
+      "r": 90,
+      "s": 91,
+      "t": 92,
+      "u": 93,
+      "v": 94,
+      "w": 95,
+      "x": 96,
+      "y": 97,
+      "z": 98,
+      "|": 99,
+      "}": 100,
+      "~": 101,
+      "¡": 102,
+      "¢": 103,
+      "£": 104,
+      "¤": 105,
+      "¥": 106,
+      "¦": 107,
+      "§": 108,
+      "¨": 109,
+      "©": 110,
+      "ª": 111,
+      "«": 112,
+      "¬": 113,
+      "®": 114,
+      "¯": 115,
+      "°": 116,
+      "±": 117,
+      "²": 118,
+      "³": 119,
+      "´": 120,
+      "µ": 121,
+      "¶": 122,
+      "·": 123,
+      "¸": 124,
+      "¹": 125,
+      "º": 126,
+      "»": 127,
+      "¼": 128,
+      "½": 129,
+      "¾": 130,
+      "¿": 131,
+      "Â": 132,
+      "Ã": 133,
+      "Ä": 134,
+      "Å": 135,
+      "Æ": 136,
+      "Ç": 137,
+      "È": 138,
+      "É": 139,
+      "Ê": 140,
+      "Ë": 141,
+      "Ì": 142,
+      "Í": 143,
+      "Î": 144,
+      "Ï": 145,
+      "Ð": 146,
+      "Ñ": 147,
+      "Ö": 148,
+      "×": 149,
+      "Ø": 150,
+      "Ù": 151,
+      "Ü": 152,
+      "à": 153,
+      "á": 154,
+      "â": 155,
+      "ã": 156,
+      "ä": 157,
+      "å": 158,
+      "æ": 159,
+      "ç": 160,
+      "è": 161,
+      "é": 162,
+      "ë": 163,
+      "ì": 164,
+      "ï": 165,
+      "Ċ": 166,
+      "Ġ": 167,
+      "Ģ": 168,
+      "ģ": 169,
+      "Ĥ": 170,
+      "ĥ": 171,
+      "Ħ": 172,
+      "ħ": 173,
+      "Ĩ": 174,
+      "ĩ": 175,
+      "Ī": 176,
+      "ī": 177,
+      "Ĭ": 178,
+      "ĭ": 179,
+      "Į": 180,
+      "į": 181,
+      "İ": 182,
+      "ı": 183,
+      "Ĳ": 184,
+      "ĳ": 185,
+      "Ĵ": 186,
+      "ĵ": 187,
+      "Ķ": 188,
+      "ķ": 189,
+      "ĸ": 190,
+      "Ĺ": 191,
+      "ĺ": 192,
+      "Ļ": 193,
+      "ļ": 194,
+      "Ľ": 195,
+      "ľ": 196,
+      "Ŀ": 197,
+      "ŀ": 198,
+      "Ł": 199,
+      "ł": 200,
+      "Ń": 201,
+      "Ġt": 202,
+      "he": 203,
+      "Ġa": 204,
+      "in": 205,
+      "Ġthe": 206,
+      "er": 207,
+      "on": 208,
+      "Ġ,": 209,
+      "re": 210,
+      "Ġs": 211,
+      "ed": 212,
+      "Ġo": 213,
+      "Ġw": 214,
+      "nd": 215,
+      "at": 216,
+      "Ġ.": 217,
+      "or": 218,
+      "it": 219,
+      "Ġc": 220,
+      "en": 221,
+      "Ġf": 222,
+      "is": 223,
+      "es": 224,
+      "ar": 225,
+      "Ġof": 226,
+      "Ġb": 227,
+      "an": 228,
+      "Ġin": 229,
+      "al": 230,
+      "ing": 231,
+      "Ġp": 232,
+      "Ġand": 233,
+      "as": 234,
+      "Ġto": 235,
+      "ro": 236,
+      "ic": 237,
+      "Ġm": 238,
+      "Ġd": 239,
+      "Ġh": 240,
+      "ion": 241,
+      "le": 242,
+      "ou": 243,
+      "ĠT": 244,
+      "Ġre": 245,
+      "Ġ=": 246,
+      "Ġ\"": 247,
+      "ĠA": 248,
+      "ĠS": 249,
+      "ent": 250,
+      "il": 251,
+      "Ġth": 252,
+      "st": 253,
+      "ĠC": 254,
+      "el": 255,
+      "om": 256,
+      "Ġl": 257,
+      "am": 258,
+      "ĠĊ": 259,
+      "Ġe": 260,
+      "Ġn": 261,
+      "Ġ@": 262,
+      "ad": 263,
+      "ac": 264,
+      "Ġwas": 265,
+      "ĠM": 266,
+      "ur": 267,
+      "ĠThe": 268,
+      "ec": 269,
+      "Ġon": 270,
+      "ly": 271,
+      "ĠB": 272,
+      "ĠI": 273,
+      "Ġg": 274,
+      "Ġ'": 275,
+      "et": 276,
+      "ol": 277,
+      "id": 278,
+      "iv": 279,
+      "im": 280,
+      "Ġfor": 281,
+      "ir": 282,
+      "-@": 283,
+      "Ġ@-@": 284,
+      "ig": 285,
+      "ot": 286,
+      "ter": 287,
+      "Ġas": 288,
+      "ĠH": 289,
+      "us": 290,
+      "ow": 291,
+      "Ġst": 292,
+      "ut": 293,
+      "ith": 294,
+      "ay": 295,
+      "ĠP": 296,
+      "ation": 297,
+      "ver": 298,
+      "Ġbe": 299,
+      "her": 300,
+      "Ġthat": 301,
+      "Ġwith": 302,
+      "ĠR": 303,
+      "ce": 304,
+      "th": 305,
+      "ĠD": 306,
+      "Ġis": 307,
+      "un": 308,
+      "em": 309,
+      "ĠF": 310,
+      "Ġwh": 311,
+      "ul": 312,
+      "Ġby": 313,
+      "Ġal": 314,
+      "ch": 315,
+      "Ġ)": 316,
+      "Ġ(": 317,
+      "ĠW": 318,
+      "Ġcon": 319,
+      "ra": 320,
+      "ĠG": 321,
+      "os": 322,
+      "ĠL": 323,
+      "ĠN": 324,
+      "Ġat": 325,
+      "ers": 326,
+      "ct": 327,
+      "Ġit": 328,
+      "rom": 329,
+      "and": 330,
+      "Ġan": 331,
+      "um": 332,
+      "est": 333,
+      "ĠJ": 334,
+      "ag": 335,
+      "Ġhe": 336,
+      "ist": 337,
+      "ain": 338,
+      "od": 339,
+      "av": 340,
+      "ri": 341,
+      "ĠE": 342,
+      "ĠO": 343,
+      "Ġfrom": 344,
+      "Ġcom": 345,
+      "Ġhis": 346,
+      "op": 347,
+      "Ġpro": 348,
+      "res": 349,
+      "ies": 350,
+      "if": 351,
+      "Ġv": 352,
+      "ort": 353,
+      "ere": 354,
+      "ill": 355,
+      "ld": 356,
+      "Ġde": 357,
+      "pp": 358,
+      "Ġsu": 359,
+      "ore": 360,
+      "ĠIn": 361,
+      "Ġr": 362,
+      "Ġse": 363,
+      "Ġwere": 364,
+      "ew": 365,
+      "ong": 366,
+      "igh": 367,
+      "ard": 368,
+      "ate": 369,
+      "all": 370,
+      "art": 371,
+      "ak": 372,
+      "ich": 373,
+      "Ġch": 374,
+      "Ġor": 375,
+      "ab": 376,
+      "ant": 377,
+      "ud": 378,
+      "oc": 379,
+      "ber": 380,
+      "Ġex": 381,
+      "gh": 382,
+      "ity": 383,
+      "ated": 384,
+      "pt": 385,
+      "ess": 386,
+      "ear": 387,
+      "ĠK": 388,
+      "Ġpl": 389,
+      "ame": 390,
+      "qu": 391,
+      "ive": 392,
+      "rou": 393,
+      "Ġare": 394,
+      "Ġâ": 395,
+      "Ġsh": 396,
+      "Ġk": 397,
+      "ack": 398,
+      "ect": 399,
+      "ĠâĢ": 400,
+      "ĠU": 401,
+      "Ġhad": 402,
+      "se": 403,
+      "Ġwhich": 404,
+      "red": 405,
+      "ov": 406,
+      "ĠSt": 407,
+      "ast": 408,
+      "Ġsp": 409,
+      "ian": 410,
+      "Ġy": 411,
+      "ment": 412,
+      "Ġle": 413,
+      "Ġnot": 414,
+      "ge": 415,
+      "ord": 416,
+      "rit": 417,
+      "ip": 418,
+      "ine": 419,
+      "ell": 420,
+      "ally": 421,
+      "our": 422,
+      "ost": 423,
+      "ight": 424,
+      "ther": 425,
+      "ap": 426,
+      "Ġu": 427,
+      "ish": 428,
+      "ĠCh": 429,
+      "oun": 430,
+      "ia": 431,
+      "ave": 432,
+      "ary": 433,
+      "ust": 434,
+      "og": 435,
+      "Ġun": 436,
+      "ous": 437,
+      "irst": 438,
+      "ĠV": 439,
+      "cc": 440,
+      "Ġinc": 441,
+      "Ġ;": 442,
+      "Ġcomp": 443,
+      "ru": 444,
+      "ions": 445,
+      "Ġtheir": 446,
+      "Ġbut": 447,
+      "ide": 448,
+      "ure": 449,
+      "so": 450,
+      "Ġcont": 451,
+      "Ġint": 452,
+      "fter": 453,
+      "ical": 454,
+      "ial": 455,
+      "Ġar": 456,
+      "Ġfirst": 457,
+      "ould": 458,
+      "Ġits": 459,
+      "hed": 460,
+      "ĠâĢĵ": 461,
+      "Ġwhe": 462,
+      "wo": 463,
+      "out": 464,
+      "ub": 465,
+      "ff": 466,
+      "Ġ:": 467,
+      "ue": 468,
+      "Ġher": 469,
+      "own": 470,
+      "ok": 471,
+      "Ġalso": 472,
+      "Ġcl": 473,
+      "per": 474,
+      "ign": 475,
+      "ater": 476,
+      "ran": 477,
+      "orm": 478,
+      "ie": 479,
+      "ome": 480,
+      "ork": 481,
+      "ass": 482,
+      "ire": 483,
+      "end": 484,
+      "Ġres": 485,
+      "Ġab": 486,
+      "Ġad": 487,
+      "Ġus": 488,
+      "ry": 489,
+      "Ġrec": 490,
+      "Ġhave": 491,
+      "age": 492,
+      "ĠHe": 493,
+      "Ġro": 494,
+      "mer": 495,
+      "Ġone": 496,
+      "ond": 497,
+      "low": 498,
+      "Ġhas": 499,
+      "ĠTh": 500,
+      "du": 501,
+      "Ġper": 502,
+      "Ġbeen": 503,
+      "ime": 504,
+      "Ġtwo": 505,
+      "ence": 506,
+      "land": 507,
+      ".@": 508,
+      "Ġ@.@": 509,
+      "ult": 510,
+      "ree": 511,
+      "ough": 512,
+      "ile": 513,
+      "Ġwho": 514,
+      "ĠAl": 515,
+      "Ġsc": 516,
+      "uring": 517,
+      "pl": 518,
+      "ory": 519,
+      "ition": 520,
+      "ric": 521,
+      "ations": 522,
+      "Ġdis": 523,
+      "Ġthis": 524,
+      "Ġbec": 525,
+      "Ġapp": 526,
+      "iz": 527,
+      "ĠIt": 528,
+      "are": 529,
+      "ach": 530,
+      "lud": 531,
+      "ade": 532,
+      "Ġplay": 533,
+      "Ġj": 534,
+      "Ġman": 535,
+      "act": 536,
+      "ely": 537,
+      "Ġpart": 538,
+      "Ġdes": 539,
+      "Ġag": 540,
+      "Ġthey": 541,
+      "Ġyear": 542,
+      "ount": 543,
+      "Ġover": 544,
+      "Ġother": 545,
+      "ound": 546,
+      "Ġafter": 547,
+      "ib": 548,
+      "over": 549,
+      "Ġser": 550,
+      "Ġen": 551,
+      "Ġoff": 552,
+      "Ġim": 553,
+      "ction": 554,
+      "ĠY": 555,
+      "ke": 556,
+      "ite": 557,
+      ",@": 558,
+      "Ġ@,@": 559,
+      "te": 560,
+      "urn": 561,
+      "Ġinclud": 562,
+      "ress": 563,
+      "ance": 564,
+      "ang": 565,
+      "Ġatt": 566,
+      "ice": 567,
+      "ace": 568,
+      "ark": 569,
+      "Ġout": 570,
+      "wn": 571,
+      "ph": 572,
+      "ember": 573,
+      "Ġpre": 574,
+      "Ġup": 575,
+      "ens": 576,
+      "man": 577,
+      "Ġev": 578,
+      "Ġtime": 579,
+      "nder": 580,
+      "rough": 581,
+      "ced": 582,
+      "Ġfin": 583,
+      "Ġinto": 584,
+      "one": 585,
+      "port": 586,
+      "round": 587,
+      "we": 588,
+      "ren": 589,
+      "les": 590,
+      "int": 591,
+      "ĠOn": 592,
+      "vel": 593,
+      "Ġcomm": 594,
+      "Ġshe": 595,
+      "ason": 596,
+      "amp": 597,
+      "Ġte": 598,
+      "Ġwould": 599,
+      "ward": 600,
+      "Ġmore": 601,
+      "ied": 602,
+      "ose": 603,
+      "rib": 604,
+      "ĠUn": 605,
+      "Ġall": 606,
+      "ings": 607,
+      "tern": 608,
+      "ces": 609,
+      "able": 610,
+      "Ġwe": 611,
+      "ited": 612,
+      "ever": 613,
+      "ents": 614,
+      "Ġhim": 615,
+      "ased": 616,
+      "ors": 617,
+      "oy": 618,
+      "ood": 619,
+      "Ġcent": 620,
+      "ix": 621,
+      "ase": 622,
+      "ild": 623,
+      "ĠAn": 624,
+      "Ġwork": 625,
+      "ates": 626,
+      "ious": 627,
+      "ath": 628,
+      "Ġpo": 629,
+      "rop": 630,
+      "old": 631,
+      "als": 632,
+      "iss": 633,
+      "ey": 634,
+      "ict": 635,
+      "Ġfe": 636,
+      "Ġthem": 637,
+      "gan": 638,
+      "Ġsec": 639,
+      "Ġbet": 640,
+      "Ġwhen": 641,
+      "Ġsong": 642,
+      "Ġrem": 643,
+      "ep": 644,
+      "form": 645,
+      "ail": 646,
+      "fer": 647,
+      "Ġear": 648,
+      "ubl": 649,
+      "aw": 650,
+      "Ġkn": 651,
+      "ake": 652,
+      "aus": 653,
+      "Ġmost": 654,
+      "Ġcons": 655,
+      "Ġduring": 656,
+      "ĠAs": 657,
+      "orth": 658,
+      "Ġnew": 659,
+      "ered": 660,
+      "ilm": 661,
+      "ved": 662,
+      "att": 663,
+      "Ġonly": 664,
+      "Ġdec": 665,
+      "ick": 666,
+      "Ġgame": 667,
+      "ons": 668,
+      "ug": 669,
+      "Ġtr": 670,
+      "ft": 671,
+      "oth": 672,
+      "ook": 673,
+      "ĠMar": 674,
+      "reat": 675,
+      "way": 676,
+      "Ġcan": 677,
+      "ollow": 678,
+      "outh": 679,
+      "ween": 680,
+      "ĠEn": 681,
+      "ters": 682,
+      "Ġrel": 683,
+      "ind": 684,
+      "Ġabout": 685,
+      "Ġseason": 686,
+      "Ġagain": 687,
+      "ral": 688,
+      "Ġthree": 689,
+      "ational": 690,
+      "Ġunder": 691,
+      "ular": 692,
+      "Ġme": 693,
+      "Ġthan": 694,
+      "ĠCom": 695,
+      "ĠAr": 696,
+      "hip": 697,
+      "ob": 698,
+      "Ġne": 699,
+      "Ġbetween": 700,
+      "Ġfl": 701,
+      "hn": 702,
+      "ve": 703,
+      "Ġchar": 704,
+      "Ġcol": 705,
+      "Ġrecord": 706,
+      "iew": 707,
+      "ron": 708,
+      "fore": 709,
+      "Ġthrough": 710,
+      "ision": 711,
+      "orn": 712,
+      "ock": 713,
+      "Ġver": 714,
+      "Ġlater": 715,
+      "Ġnum": 716,
+      "Ġend": 717,
+      "olog": 718,
+      "ames": 719,
+      "Ġpos": 720,
+      "Ġwrit": 721,
+      "Ġprodu": 722,
+      "Ġwhile": 723,
+      "Ġact": 724,
+      "Ġrele": 725,
+      "Ġfilm": 726,
+      "ished": 727,
+      "Ġpr": 728,
+      "ans": 729,
+      "Ġreg": 730,
+      "Ġform": 731,
+      "Ġass": 732,
+      "ĠSe": 733,
+      "ury": 734,
+      "ted": 735,
+      "ts": 736,
+      "Ġmade": 737,
+      "Ġsub": 738,
+      "Ġpe": 739,
+      "Ġso": 740,
+      "orld": 741,
+      "Ġret": 742,
+      "ĠNew": 743,
+      "Ġspec": 744,
+      "Ġacc": 745,
+      "Ġqu": 746,
+      "Ġwhere": 747,
+      "ener": 748,
+      "Ġmov": 749,
+      "hes": 750,
+      "meric": 751,
+      "ating": 752,
+      "Ġinter": 753,
+      "ĠLe": 754,
+      "ĠAmeric": 755,
+      "Ġra": 756,
+      "Ġsome": 757,
+      "Ġco": 758,
+      "Ġlar": 759,
+      "Ġbu": 760,
+      "Ġdef": 761,
+      "bum": 762,
+      "Ġac": 763,
+      "Ġmus": 764,
+      "Ġfollow": 765,
+      "ĠAt": 766,
+      "ins": 767,
+      "ived": 768,
+      "ific": 769,
+      "ual": 770,
+      "Ġam": 771,
+      "Ġsuch": 772,
+      "Ġsecond": 773,
+      "ike": 774,
+      "Ġfour": 775,
+      "Ġind": 776,
+      "ann": 777,
+      "hen": 778,
+      "Ġused": 779,
+      "ĠRe": 780,
+      "ics": 781,
+      "lect": 782,
+      "Ġday": 783,
+      "iel": 784,
+      "ily": 785,
+      "ĠThis": 786,
+      "Ġpubl": 787,
+      "Ġcall": 788,
+      "ĠJo": 789,
+      "ll": 790,
+      "Ġalbum": 791,
+      "rans": 792,
+      "Ġdo": 793,
+      "any": 794,
+      "Ġbefore": 795,
+      "ros": 796,
+      "ĠSh": 797,
+      "Ġsy": 798,
+      "aid": 799,
+      "ĠEng": 800,
+      "Ġbeing": 801,
+      "uc": 802,
+      "Ġep": 803,
+      "Ġsupp": 804,
+      "Ġthere": 805,
+      "Ġyears": 806,
+      "ars": 807,
+      "owever": 808,
+      "Ġent": 809,
+      "ife": 810,
+      "Ġhigh": 811,
+      "Ġfound": 812,
+      "ird": 813,
+      "Ġno": 814,
+      "Ġset": 815,
+      "ines": 816,
+      "iver": 817,
+      "io": 818,
+      "other": 819,
+      "ject": 820,
+      "Ġsur": 821,
+      "aj": 822,
+      "ten": 823,
+      "Ġtra": 824,
+      "ised": 825,
+      "ities": 826,
+      "velop": 827,
+      "Ġbl": 828,
+      "ale": 829,
+      "Ġseries": 830,
+      "Ġloc": 831,
+      "Ġnumber": 832,
+      "Ġpres": 833,
+      "ane": 834,
+      "ause": 835,
+      "ode": 836,
+      "ek": 837,
+      "ton": 838,
+      "ĠSc": 839,
+      "ier": 840,
+      "ise": 841,
+      "Ġsever": 842,
+      "ince": 843,
+      "Ġboth": 844,
+      "ank": 845,
+      "row": 846,
+      "irect": 847,
+      "son": 848,
+      "Ġthen": 849,
+      "ĠBrit": 850,
+      "iet": 851,
+      "Ġepis": 852,
+      "Ġincluding": 853,
+      "its": 854,
+      "igin": 855,
+      "pr": 856,
+      "Ġ/": 857,
+      "Ġagainst": 858,
+      "Ġwell": 859,
+      "Ġbecame": 860,
+      "Ġexp": 861,
+      "Ġknown": 862,
+      "Ġtrans": 863,
+      "Ġcharac": 864,
+      "ĠâĢĶ": 865,
+      "ram": 866,
+      "Ġback": 867,
+      "Ġadd": 868,
+      "Ġpop": 869,
+      "Ġgo": 870,
+      "urch": 871,
+      "Ġdesc": 872,
+      "Ġsing": 873,
+      "ield": 874,
+      "Ġperform": 875,
+      "ained": 876,
+      "Ġrece": 877,
+      "ident": 878,
+      "Ġem": 879,
+      "ert": 880,
+      "ures": 881,
+      "Ġinv": 882,
+      "Ġdep": 883,
+      "air": 884,
+      "ern": 885,
+      "ather": 886,
+      "ful": 887,
+      "ĠZ": 888,
+      "Ġmon": 889,
+      "Ġmany": 890,
+      "Ġmain": 891,
+      "Ġstud": 892,
+      "Ġlong": 893,
+      "inn": 894,
+      "though": 895,
+      "up": 896,
+      "ool": 897,
+      "ĠUnited": 898,
+      "led": 899,
+      "ement": 900,
+      "ower": 901,
+      "ĠJohn": 902,
+      "Ġop": 903,
+      "ined": 904,
+      "Ġmet": 905,
+      "ober": 906,
+      "ley": 907,
+      "Ġcentury": 908,
+      "Ġteam": 909,
+      "Ġest": 910,
+      "ĠAfter": 911,
+      "yl": 912,
+      "Ġmin": 913,
+      "uch": 914,
+      "ute": 915,
+      "Ġdevelop": 916,
+      "ĠShe": 917,
+      "iam": 918,
+      "Ġshow": 919,
+      "elf": 920,
+      "Ġrep": 921,
+      "Ġconc": 922,
+      "ative": 923,
+      "Ġcre": 924,
+      "overn": 925,
+      "ared": 926,
+      "Ġorigin": 927,
+      "Ġsm": 928,
+      "ivers": 929,
+      "az": 930,
+      "Ġlead": 931,
+      "Ġseveral": 932,
+      "ah": 933,
+      "Ġob": 934,
+      "Ġrev": 935,
+      "Ġmill": 936,
+      "erm": 937,
+      "ually": 938,
+      "oot": 939,
+      "Ġbegan": 940,
+      "ired": 941,
+      "Ġdif": 942,
+      "Ġcontin": 943,
+      "Ġsign": 944,
+      "ik": 945,
+      "ĠInd": 946,
+      "ments": 947,
+      "ized": 948,
+      "Ġdirect": 949,
+      "au": 950,
+      "Ġext": 951,
+      "ross": 952,
+      "emb": 953,
+      "der": 954,
+      "Ġpol": 955,
+      "Ġmay": 956,
+      "apt": 957,
+      "els": 958,
+      "ĠWh": 959,
+      "Ġcomple": 960,
+      "Ġart": 961,
+      "ĠBr": 962,
+      "ĠIs": 963,
+      "une": 964,
+      "til": 965,
+      "Ġcrit": 966,
+      "Ġhist": 967,
+      "Ġearly": 968,
+      "Ġcould": 969,
+      "ĠCon": 970,
+      "Ġdid": 971,
+      "Ġbel": 972,
+      "Ġcalled": 973,
+      "ued": 974,
+      "Ġnear": 975,
+      "Ġepisode": 976,
+      "yp": 977,
+      "Ġdescrib": 978,
+      "imes": 979,
+      "Ġbro": 980,
+      "roup": 981,
+      "ople": 982,
+      "Ġdown": 983,
+      "Ġrun": 984,
+      "aking": 985,
+      "Ġdisc": 986,
+      "Ġalong": 987,
+      "ccess": 988,
+      "ĠMay": 989,
+      "Ġfeat": 990,
+      "Ġinst": 991,
+      "ĠAmerican": 992,
+      "Ġsaid": 993,
+      "Ġdist": 994,
+      "Ġinf": 995,
+      "Ġimp": 996,
+      "ink": 997,
+      "Ġins": 998,
+      "ty": 999,
+      "Ġthese": 1000,
+      "Ġmil": 1001,
+      "ven": 1002,
+      "ĠGer": 1003,
+      "gest": 1004,
+      "ium": 1005,
+      "enn": 1006,
+      "ĠBl": 1007,
+      "ĠAust": 1008,
+      "Ġresp": 1009,
+      "ull": 1010,
+      "ruct": 1011,
+      "ured": 1012,
+      "Ġthough": 1013,
+      "Ġship": 1014,
+      "ove": 1015,
+      "Ġoper": 1016,
+      "ĠBritish": 1017,
+      "Ã©": 1018,
+      "Ġuntil": 1019,
+      "Ġprev": 1020,
+      "itt": 1021,
+      "amed": 1022,
+      "Ġcar": 1023
+    },
+    "merges": [
+      "Ġ t",
+      "h e",
+      "Ġ a",
+      "i n",
+      "Ġt he",
+      "e r",
+      "o n",
+      "Ġ ,",
+      "r e",
+      "Ġ s",
+      "e d",
+      "Ġ o",
+      "Ġ w",
+      "n d",
+      "a t",
+      "Ġ .",
+      "o r",
+      "i t",
+      "Ġ c",
+      "e n",
+      "Ġ f",
+      "i s",
+      "e s",
+      "a r",
+      "Ġo f",
+      "Ġ b",
+      "a n",
+      "Ġ in",
+      "a l",
+      "in g",
+      "Ġ p",
+      "Ġa nd",
+      "a s",
+      "Ġt o",
+      "r o",
+      "i c",
+      "Ġ m",
+      "Ġ d",
+      "Ġ h",
+      "i on",
+      "l e",
+      "o u",
+      "Ġ T",
+      "Ġ re",
+      "Ġ =",
+      "Ġ \"",
+      "Ġ A",
+      "Ġ S",
+      "en t",
+      "i l",
+      "Ġt h",
+      "s t",
+      "Ġ C",
+      "e l",
+      "o m",
+      "Ġ l",
+      "a m",
+      "Ġ Ċ",
+      "Ġ e",
+      "Ġ n",
+      "Ġ @",
+      "a d",
+      "a c",
+      "Ġw as",
+      "Ġ M",
+      "u r",
+      "ĠT he",
+      "e c",
+      "Ġ on",
+      "l y",
+      "Ġ B",
+      "Ġ I",
+      "Ġ g",
+      "Ġ '",
+      "e t",
+      "o l",
+      "i d",
+      "i v",
+      "i m",
+      "Ġf or",
+      "i r",
+      "- @",
+      "Ġ@ -@",
+      "i g",
+      "o t",
+      "t er",
+      "Ġa s",
+      "Ġ H",
+      "u s",
+      "o w",
+      "Ġs t",
+      "u t",
+      "it h",
+      "a y",
+      "Ġ P",
+      "at ion",
+      "v er",
+      "Ġb e",
+      "he r",
+      "Ġth at",
+      "Ġw ith",
+      "Ġ R",
+      "c e",
+      "t h",
+      "Ġ D",
+      "Ġ is",
+      "u n",
+      "e m",
+      "Ġ F",
+      "Ġw h",
+      "u l",
+      "Ġb y",
+      "Ġa l",
+      "c h",
+      "Ġ )",
+      "Ġ (",
+      "Ġ W",
+      "Ġc on",
+      "r a",
+      "Ġ G",
+      "o s",
+      "Ġ L",
+      "Ġ N",
+      "Ġa t",
+      "er s",
+      "c t",
+      "Ġ it",
+      "ro m",
+      "a nd",
+      "Ġa n",
+      "u m",
+      "es t",
+      "Ġ J",
+      "a g",
+      "Ġ he",
+      "is t",
+      "a in",
+      "o d",
+      "a v",
+      "r i",
+      "Ġ E",
+      "Ġ O",
+      "Ġf rom",
+      "Ġc om",
+      "Ġh is",
+      "o p",
+      "Ġp ro",
+      "re s",
+      "i es",
+      "i f",
+      "Ġ v",
+      "or t",
+      "er e",
+      "il l",
+      "l d",
+      "Ġd e",
+      "p p",
+      "Ġs u",
+      "o re",
+      "ĠI n",
+      "Ġ r",
+      "Ġs e",
+      "Ġw ere",
+      "e w",
+      "on g",
+      "ig h",
+      "ar d",
+      "at e",
+      "al l",
+      "ar t",
+      "a k",
+      "ic h",
+      "Ġc h",
+      "Ġo r",
+      "a b",
+      "an t",
+      "u d",
+      "o c",
+      "b er",
+      "Ġe x",
+      "g h",
+      "it y",
+      "at ed",
+      "p t",
+      "es s",
+      "e ar",
+      "Ġ K",
+      "Ġp l",
+      "am e",
+      "q u",
+      "iv e",
+      "ro u",
+      "Ġa re",
+      "Ġ â",
+      "Ġs h",
+      "Ġ k",
+      "ac k",
+      "ec t",
+      "Ġâ Ģ",
+      "Ġ U",
+      "Ġh ad",
+      "s e",
+      "Ġwh ich",
+      "re d",
+      "o v",
+      "ĠS t",
+      "as t",
+      "Ġs p",
+      "i an",
+      "Ġ y",
+      "m ent",
+      "Ġ le",
+      "Ġn ot",
+      "g e",
+      "or d",
+      "r it",
+      "i p",
+      "in e",
+      "el l",
+      "al ly",
+      "ou r",
+      "o st",
+      "igh t",
+      "t her",
+      "a p",
+      "Ġ u",
+      "is h",
+      "ĠC h",
+      "ou n",
+      "i a",
+      "av e",
+      "ar y",
+      "u st",
+      "o g",
+      "Ġ un",
+      "ou s",
+      "ir st",
+      "Ġ V",
+      "c c",
+      "Ġin c",
+      "Ġ ;",
+      "Ġcom p",
+      "r u",
+      "ion s",
+      "Ġthe ir",
+      "Ġb ut",
+      "id e",
+      "u re",
+      "s o",
+      "Ġcon t",
+      "Ġin t",
+      "f ter",
+      "ic al",
+      "i al",
+      "Ġa r",
+      "Ġf irst",
+      "ou ld",
+      "Ġit s",
+      "he d",
+      "ĠâĢ ĵ",
+      "Ġw he",
+      "w o",
+      "ou t",
+      "u b",
+      "f f",
+      "Ġ :",
+      "u e",
+      "Ġ her",
+      "ow n",
+      "o k",
+      "Ġal so",
+      "Ġc l",
+      "p er",
+      "ig n",
+      "at er",
+      "r an",
+      "or m",
+      "i e",
+      "om e",
+      "or k",
+      "as s",
+      "i re",
+      "e nd",
+      "Ġre s",
+      "Ġa b",
+      "Ġa d",
+      "Ġ us",
+      "r y",
+      "Ġre c",
+      "Ġh ave",
+      "ag e",
+      "ĠH e",
+      "Ġ ro",
+      "m er",
+      "Ġon e",
+      "on d",
+      "l ow",
+      "Ġh as",
+      "ĠT h",
+      "d u",
+      "Ġp er",
+      "Ġbe en",
+      "im e",
+      "Ġt wo",
+      "en ce",
+      "l and",
+      ". @",
+      "Ġ@ .@",
+      "ul t",
+      "re e",
+      "ou gh",
+      "i le",
+      "Ġwh o",
+      "ĠA l",
+      "Ġs c",
+      "ur ing",
+      "p l",
+      "or y",
+      "it ion",
+      "r ic",
+      "ation s",
+      "Ġd is",
+      "Ġth is",
+      "Ġb ec",
+      "Ġa pp",
+      "i z",
+      "ĠI t",
+      "a re",
+      "ac h",
+      "l ud",
+      "ad e",
+      "Ġpl ay",
+      "Ġ j",
+      "Ġm an",
+      "ac t",
+      "el y",
+      "Ġp art",
+      "Ġd es",
+      "Ġa g",
+      "Ġthe y",
+      "Ġy ear",
+      "oun t",
+      "Ġo ver",
+      "Ġo ther",
+      "ou nd",
+      "Ġa fter",
+      "i b",
+      "o ver",
+      "Ġs er",
+      "Ġ en",
+      "Ġof f",
+      "Ġ im",
+      "ct ion",
+      "Ġ Y",
+      "k e",
+      "it e",
+      ", @",
+      "Ġ@ ,@",
+      "t e",
+      "ur n",
+      "Ġinc lud",
+      "res s",
+      "an ce",
+      "an g",
+      "Ġat t",
+      "ic e",
+      "ac e",
+      "ar k",
+      "Ġo ut",
+      "w n",
+      "p h",
+      "em ber",
+      "Ġp re",
+      "Ġu p",
+      "en s",
+      "m an",
+      "Ġe v",
+      "Ġt ime",
+      "nd er",
+      "rou gh",
+      "c ed",
+      "Ġf in",
+      "Ġint o",
+      "on e",
+      "p ort",
+      "rou nd",
+      "w e",
+      "re n",
+      "l es",
+      "in t",
+      "ĠO n",
+      "v el",
+      "Ġcom m",
+      "Ġs he",
+      "as on",
+      "am p",
+      "Ġt e",
+      "Ġw ould",
+      "w ard",
+      "Ġm ore",
+      "i ed",
+      "os e",
+      "ri b",
+      "ĠU n",
+      "Ġal l",
+      "ing s",
+      "ter n",
+      "c es",
+      "ab le",
+      "Ġw e",
+      "it ed",
+      "e ver",
+      "ent s",
+      "Ġh im",
+      "as ed",
+      "or s",
+      "o y",
+      "o od",
+      "Ġc ent",
+      "i x",
+      "as e",
+      "il d",
+      "ĠA n",
+      "Ġw ork",
+      "at es",
+      "i ous",
+      "at h",
+      "Ġp o",
+      "ro p",
+      "ol d",
+      "al s",
+      "is s",
+      "e y",
+      "ic t",
+      "Ġf e",
+      "Ġthe m",
+      "g an",
+      "Ġs ec",
+      "Ġb et",
+      "Ġwhe n",
+      "Ġs ong",
+      "Ġre m",
+      "e p",
+      "f orm",
+      "a il",
+      "f er",
+      "Ġe ar",
+      "ub l",
+      "a w",
+      "Ġk n",
+      "ak e",
+      "a us",
+      "Ġm ost",
+      "Ġcon s",
+      "Ġd uring",
+      "ĠA s",
+      "or th",
+      "Ġn ew",
+      "er ed",
+      "il m",
+      "v ed",
+      "at t",
+      "Ġon ly",
+      "Ġd ec",
+      "ic k",
+      "Ġg ame",
+      "on s",
+      "u g",
+      "Ġt r",
+      "f t",
+      "ot h",
+      "o ok",
+      "ĠM ar",
+      "re at",
+      "w ay",
+      "Ġc an",
+      "ol low",
+      "ou th",
+      "we en",
+      "ĠE n",
+      "ter s",
+      "Ġre l",
+      "in d",
+      "Ġab out",
+      "Ġse ason",
+      "Ġag ain",
+      "r al",
+      "Ġth ree",
+      "ation al",
+      "Ġu nder",
+      "ul ar",
+      "Ġm e",
+      "Ġth an",
+      "ĠC om",
+      "ĠA r",
+      "h ip",
+      "o b",
+      "Ġn e",
+      "Ġbet ween",
+      "Ġf l",
+      "h n",
+      "v e",
+      "Ġch ar",
+      "Ġc ol",
+      "Ġrec ord",
+      "i ew",
+      "r on",
+      "f ore",
+      "Ġth rough",
+      "is ion",
+      "or n",
+      "oc k",
+      "Ġ ver",
+      "Ġl ater",
+      "Ġn um",
+      "Ġe nd",
+      "ol og",
+      "am es",
+      "Ġp os",
+      "Ġw rit",
+      "Ġpro du",
+      "Ġwh ile",
+      "Ġa ct",
+      "Ġre le",
+      "Ġf ilm",
+      "is hed",
+      "Ġp r",
+      "an s",
+      "Ġre g",
+      "Ġfor m",
+      "Ġas s",
+      "ĠS e",
+      "ur y",
+      "t ed",
+      "t s",
+      "Ġm ade",
+      "Ġsu b",
+      "Ġp e",
+      "Ġs o",
+      "or ld",
+      "Ġre t",
+      "ĠN ew",
+      "Ġsp ec",
+      "Ġa cc",
+      "Ġ qu",
+      "Ġwhe re",
+      "en er",
+      "Ġm ov",
+      "he s",
+      "mer ic",
+      "at ing",
+      "Ġin ter",
+      "ĠL e",
+      "ĠA meric",
+      "Ġ ra",
+      "Ġs ome",
+      "Ġc o",
+      "Ġl ar",
+      "Ġb u",
+      "Ġde f",
+      "b um",
+      "Ġa c",
+      "Ġm us",
+      "Ġf ollow",
+      "ĠA t",
+      "in s",
+      "iv ed",
+      "if ic",
+      "u al",
+      "Ġa m",
+      "Ġsu ch",
+      "Ġsec ond",
+      "i ke",
+      "Ġf our",
+      "Ġin d",
+      "an n",
+      "he n",
+      "Ġus ed",
+      "ĠR e",
+      "ic s",
+      "le ct",
+      "Ġd ay",
+      "i el",
+      "il y",
+      "ĠTh is",
+      "Ġp ubl",
+      "Ġc all",
+      "ĠJ o",
+      "l l",
+      "Ġal bum",
+      "ran s",
+      "Ġd o",
+      "an y",
+      "Ġbe fore",
+      "ro s",
+      "ĠS h",
+      "Ġs y",
+      "a id",
+      "ĠEn g",
+      "Ġbe ing",
+      "u c",
+      "Ġe p",
+      "Ġsu pp",
+      "Ġthe re",
+      "Ġyear s",
+      "ar s",
+      "ow ever",
+      "Ġ ent",
+      "if e",
+      "Ġh igh",
+      "Ġf ound",
+      "ir d",
+      "Ġn o",
+      "Ġs et",
+      "in es",
+      "iv er",
+      "i o",
+      "ot her",
+      "j ect",
+      "Ġs ur",
+      "a j",
+      "t en",
+      "Ġt ra",
+      "is ed",
+      "it ies",
+      "vel op",
+      "Ġb l",
+      "al e",
+      "Ġser ies",
+      "Ġl oc",
+      "Ġnum ber",
+      "Ġp res",
+      "an e",
+      "aus e",
+      "od e",
+      "e k",
+      "t on",
+      "ĠS c",
+      "i er",
+      "is e",
+      "Ġse ver",
+      "in ce",
+      "Ġb oth",
+      "an k",
+      "ro w",
+      "ire ct",
+      "s on",
+      "Ġthe n",
+      "ĠB rit",
+      "i et",
+      "Ġep is",
+      "Ġinclud ing",
+      "it s",
+      "ig in",
+      "p r",
+      "Ġ /",
+      "Ġagain st",
+      "Ġw ell",
+      "Ġbec ame",
+      "Ġex p",
+      "Ġkn own",
+      "Ġt rans",
+      "Ġchar ac",
+      "ĠâĢ Ķ",
+      "r am",
+      "Ġb ack",
+      "Ġad d",
+      "Ġp op",
+      "Ġg o",
+      "ur ch",
+      "Ġdes c",
+      "Ġs ing",
+      "iel d",
+      "Ġper form",
+      "ain ed",
+      "Ġre ce",
+      "id ent",
+      "Ġe m",
+      "er t",
+      "u res",
+      "Ġin v",
+      "Ġde p",
+      "a ir",
+      "er n",
+      "at her",
+      "f ul",
+      "Ġ Z",
+      "Ġm on",
+      "Ġman y",
+      "Ġm ain",
+      "Ġst ud",
+      "Ġl ong",
+      "in n",
+      "th ough",
+      "u p",
+      "o ol",
+      "ĠUn ited",
+      "l ed",
+      "em ent",
+      "ow er",
+      "ĠJo hn",
+      "Ġo p",
+      "in ed",
+      "Ġm et",
+      "o ber",
+      "le y",
+      "Ġcent ury",
+      "Ġte am",
+      "Ġ est",
+      "ĠA fter",
+      "y l",
+      "Ġm in",
+      "u ch",
+      "ut e",
+      "Ġde velop",
+      "ĠS he",
+      "i am",
+      "Ġsh ow",
+      "el f",
+      "Ġre p",
+      "Ġcon c",
+      "at ive",
+      "Ġc re",
+      "over n",
+      "a red",
+      "Ġor igin",
+      "Ġs m",
+      "iv ers",
+      "a z",
+      "Ġle ad",
+      "Ġsever al",
+      "a h",
+      "Ġo b",
+      "Ġre v",
+      "Ġm ill",
+      "er m",
+      "u ally",
+      "o ot",
+      "Ġbe gan",
+      "i red",
+      "Ġd if",
+      "Ġcont in",
+      "Ġs ign",
+      "i k",
+      "ĠI nd",
+      "ment s",
+      "iz ed",
+      "Ġd irect",
+      "a u",
+      "Ġex t",
+      "ros s",
+      "em b",
+      "d er",
+      "Ġp ol",
+      "Ġm ay",
+      "a pt",
+      "el s",
+      "ĠW h",
+      "Ġcomp le",
+      "Ġar t",
+      "ĠB r",
+      "ĠI s",
+      "un e",
+      "t il",
+      "Ġc rit",
+      "Ġh ist",
+      "Ġear ly",
+      "Ġc ould",
+      "ĠC on",
+      "Ġd id",
+      "Ġb el",
+      "Ġcall ed",
+      "u ed",
+      "Ġn ear",
+      "Ġepis ode",
+      "y p",
+      "Ġdesc rib",
+      "im es",
+      "Ġb ro",
+      "rou p",
+      "op le",
+      "Ġd own",
+      "Ġr un",
+      "ak ing",
+      "Ġdis c",
+      "Ġal ong",
+      "cc ess",
+      "ĠM ay",
+      "Ġfe at",
+      "Ġin st",
+      "ĠAmeric an",
+      "Ġs aid",
+      "Ġd ist",
+      "Ġin f",
+      "Ġim p",
+      "in k",
+      "Ġin s",
+      "t y",
+      "Ġthe se",
+      "Ġm il",
+      "v en",
+      "ĠG er",
+      "g est",
+      "i um",
+      "en n",
+      "ĠB l",
+      "ĠA ust",
+      "Ġres p",
+      "ul l",
+      "ru ct",
+      "u red",
+      "Ġth ough",
+      "Ġsh ip",
+      "ov e",
+      "Ġo per",
+      "ĠBrit ish",
+      "Ã ©",
+      "Ġun til",
+      "Ġpre v",
+      "it t",
+      "am ed",
+      "Ġc ar"
+    ]
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<PAD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<UNK>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<CLS>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<SEP>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<MASK_TOKEN>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<BOS_TOKEN>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<EOS_TOKEN>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<EOP_TOKEN>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<|END_OF_TURN_TOKEN|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "auto_map": {
+    "AutoTokenizer": [
+      null,
+      "CohereForAI/c4ai-command-r-v01--tokenization_cohere_fast.CohereTokenizerFast"
+    ]
+  },
+  "bos_token": "<BOS_TOKEN>",
+  "chat_template": [
+    {
+      "name": "default",
+      "template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}"
+    },
+    {
+      "name": "tool_use",
+      "template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = '## Task and Context\\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user\\'s needs as best you can, which will be wide-ranging.\\n\\n## Style Guide\\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.' %}{% endif %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}{{ '# Safety Preamble' }}{{ '\nThe instructions in this section override those in the task description and style guide sections. Don\\'t answer questions that are harmful or immoral.' }}{{ '\n\n# System Preamble' }}{{ '\n## Basic Rules' }}{{ '\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\\'s requests, you cite your sources in your answers, according to those instructions.' }}{{ '\n\n# User Preamble' }}{{ '\n' + system_message }}{{'\n\n## Available Tools\nHere is a list of tools that you have available to you:\n\n'}}{% for tool in tools %}{% if loop.index0 != 0 %}{{ '\n\n'}}{% endif %}{{'```python\ndef ' + tool.name + '('}}{% for param_name, param_fields in tool.parameter_definitions.items() %}{% if loop.index0 != 0 %}{{ ', '}}{% endif %}{{param_name}}: {% if not param_fields.required %}{{'Optional[' + param_fields.type + '] = None'}}{% else %}{{ param_fields.type }}{% endif %}{% endfor %}{{ ') -> List[Dict]:\n    \"\"\"'}}{{ tool.description }}{% if tool.parameter_definitions|length != 0 %}{{ '\n\n    Args:\n        '}}{% for param_name, param_fields in tool.parameter_definitions.items() %}{% if loop.index0 != 0 %}{{ '\n        ' }}{% endif %}{{ param_name + ' ('}}{% if not param_fields.required %}{{'Optional[' + param_fields.type + ']'}}{% else %}{{ param_fields.type }}{% endif %}{{ '): ' + param_fields.description }}{% endfor %}{% endif %}{{ '\n    \"\"\"\n    pass\n```' }}{% endfor %}{{ '<|END_OF_TURN_TOKEN|>'}}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'system' %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{{'<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \\'Action:\\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n    {\n        \"tool_name\": title of the tool in the specification,\n        \"parameters\": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n    }\n]```<|END_OF_TURN_TOKEN|>'}}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}"
+    },
+    {
+      "name": "rag",
+      "template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = '## Task and Context\\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user\\'s needs as best you can, which will be wide-ranging.\\n\\n## Style Guide\\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.' %}{% endif %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}{{ '# Safety Preamble' }}{{ '\nThe instructions in this section override those in the task description and style guide sections. Don\\'t answer questions that are harmful or immoral.' }}{{ '\n\n# System Preamble' }}{{ '\n## Basic Rules' }}{{ '\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\\'s requests, you cite your sources in your answers, according to those instructions.' }}{{ '\n\n# User Preamble' }}{{ '\n' + system_message }}{{ '<|END_OF_TURN_TOKEN|>'}}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'system' %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>'}}{{ '<results>' }}{% for document in documents %}{{ '\nDocument: ' }}{{ loop.index0 }}\n{% for key, value in document.items() %}{{ key }}: {{value}}\n{% endfor %}{% endfor %}{{ '</results>'}}{{ '<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' }}{{ 'Carefully perform the following instructions, in order, starting each with a new line.\n' }}{{ 'Firstly, Decide which of the retrieved documents are relevant to the user\\'s last input by writing \\'Relevant Documents:\\' followed by comma-separated list of document numbers. If none are relevant, you should instead write \\'None\\'.\n' }}{{ 'Secondly, Decide which of the retrieved documents contain facts that should be cited in a good answer to the user\\'s last input by writing \\'Cited Documents:\\' followed a comma-separated list of document numbers. If you dont want to cite any of them, you should instead write \\'None\\'.\n' }}{% if citation_mode=='accurate' %}{{ 'Thirdly, Write \\'Answer:\\' followed by a response to the user\\'s last input in high quality natural english. Use the retrieved documents to help you. Do not insert any citations or grounding markup.\n' }}{% endif %}{{ 'Finally, Write \\'Grounded answer:\\' followed by a response to the user\\'s last input in high quality natural english. Use the symbols <co: doc> and </co: doc> to indicate when a fact comes from a document in the search result, e.g <co: 0>my fact</co: 0> for a fact from document 0.' }}{{ '<|END_OF_TURN_TOKEN|>' }}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}"
+    }
+  ],
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|END_OF_TURN_TOKEN|>",
+  "legacy": true,
+  "merges_file": null,
+  "model_max_length": 512,
+  "pad_token": "<PAD>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "CohereTokenizer",
+  "unk_token": null,
+  "use_default_system_prompt": false,
+  "vocab_file": null
+}