hindi_chat_gpt / tokenizer.json
Ssid7647's picture
Upload 10 files
f4c6ad2 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<\\s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "<pad>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "<UNK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 329,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 330,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": false
},
"post_processor": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<UNK>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"<s>": 0,
"<\\s>": 1,
"<pad>": 2,
"<UNK>": 3,
"!": 4,
"%": 5,
"&": 6,
"(": 7,
")": 8,
"*": 9,
"+": 10,
",": 11,
"-": 12,
".": 13,
"/": 14,
"0": 15,
"1": 16,
"2": 17,
"3": 18,
"4": 19,
"5": 20,
"6": 21,
"7": 22,
"8": 23,
"9": 24,
":": 25,
";": 26,
"<": 27,
"=": 28,
">": 29,
"?": 30,
"A": 31,
"B": 32,
"C": 33,
"D": 34,
"E": 35,
"F": 36,
"G": 37,
"H": 38,
"I": 39,
"J": 40,
"K": 41,
"L": 42,
"M": 43,
"N": 44,
"O": 45,
"P": 46,
"Q": 47,
"R": 48,
"S": 49,
"T": 50,
"U": 51,
"V": 52,
"W": 53,
"X": 54,
"Y": 55,
"Z": 56,
"[": 57,
"\\": 58,
"]": 59,
"_": 60,
"`": 61,
"a": 62,
"b": 63,
"c": 64,
"d": 65,
"e": 66,
"f": 67,
"g": 68,
"h": 69,
"i": 70,
"j": 71,
"k": 72,
"l": 73,
"m": 74,
"n": 75,
"o": 76,
"p": 77,
"q": 78,
"r": 79,
"s": 80,
"t": 81,
"u": 82,
"v": 83,
"w": 84,
"x": 85,
"y": 86,
"z": 87,
"|": 88,
"~": 89,
"¡": 90,
"¢": 91,
"£": 92,
"¤": 93,
"¥": 94,
"¦": 95,
"§": 96,
"¨": 97,
"©": 98,
"ª": 99,
"«": 100,
"¬": 101,
"®": 102,
"¯": 103,
"°": 104,
"±": 105,
"²": 106,
"³": 107,
"µ": 108,
"¶": 109,
"·": 110,
"¸": 111,
"¹": 112,
"¼": 113,
"½": 114,
"¾": 115,
"¿": 116,
"Â": 117,
"Ã": 118,
"Ë": 119,
"à": 120,
"â": 121,
"Ġ": 122,
"Ģ": 123,
"ģ": 124,
"Ĥ": 125,
"ĥ": 126,
"Ħ": 127,
"ħ": 128,
"Ĩ": 129,
"ĩ": 130,
"Ī": 131,
"ī": 132,
"Ĭ": 133,
"ĭ": 134,
"Į": 135,
"į": 136,
"ı": 137,
"IJ": 138,
"ij": 139,
"Ĵ": 140,
"ĵ": 141,
"Ķ": 142,
"ķ": 143,
"ĸ": 144,
"Ĺ": 145,
"ĺ": 146,
"Ļ": 147,
"ļ": 148,
"Ľ": 149,
"ľ": 150,
"Ŀ": 151,
"ŀ": 152,
"Ł": 153,
"ł": 154,
"Ń": 155,
"à¤": 156,
"à¥": 157,
"à¦": 158,
"à¨": 159,
"âĢ": 160,
"à§": 161,
"à©": 162,
"s>": 163,
"<\\": 164,
"<p": 165,
"ad": 166,
"°": 167,
"²": 168,
"á": 169,
"è": 170,
"é": 171,
"÷": 172,
"ÃĹ": 173,
"Ëļ": 174,
"âĪ": 175,
"ड": 176,
"ढ": 177,
"ण": 178,
"त": 179,
"थ": 180,
"द": 181,
"ध": 182,
"न": 183,
"प": 184,
"फ": 185,
"ब": 186,
"म": 187,
"य": 188,
"र": 189,
"ल": 190,
"ळ": 191,
"व": 192,
"श": 193,
"ष": 194,
"स": 195,
"ह": 196,
"़": 197,
"ऽ": 198,
"ा": 199,
"ि": 200,
"à¤ģ": 201,
"à¤Ĥ": 202,
"à¤ĥ": 203,
"à¤ħ": 204,
"à¤Ĩ": 205,
"à¤ĩ": 206,
"à¤Ī": 207,
"à¤ī": 208,
"à¤Ĭ": 209,
"à¤ĭ": 210,
"à¤į": 211,
"à¤ı": 212,
"à¤IJ": 213,
"à¤ij": 214,
"à¤ĵ": 215,
"à¤Ķ": 216,
"à¤ķ": 217,
"à¤ĸ": 218,
"à¤Ĺ": 219,
"à¤ĺ": 220,
"à¤Ļ": 221,
"à¤ļ": 222,
"à¤Ľ": 223,
"à¤ľ": 224,
"à¤Ŀ": 225,
"à¤ŀ": 226,
"à¤Ł": 227,
"à¤ł": 228,
"à¤Ń": 229,
"।": 230,
"॥": 231,
"०": 232,
"१": 233,
"२": 234,
"३": 235,
"४": 236,
"५": 237,
"६": 238,
"८": 239,
"९": 240,
"à¥Ģ": 241,
"à¥ģ": 242,
"à¥Ĥ": 243,
"à¥ĥ": 244,
"à¥Ħ": 245,
"à¥ħ": 246,
"à¥ĩ": 247,
"à¥Ī": 248,
"à¥ī": 249,
"à¥Ĭ": 250,
"à¥ĭ": 251,
"à¥Į": 252,
"à¥į": 253,
"à¥IJ": 254,
"à¥ĺ": 255,
"à¥Ļ": 256,
"à¥ļ": 257,
"à¥Ľ": 258,
"à¥ľ": 259,
"à¥Ŀ": 260,
"à¥ŀ": 261,
"à¥Ł": 262,
"à¥ł": 263,
"à¥Ń": 264,
"ত": 265,
"থ": 266,
"ন": 267,
"প": 268,
"ব": 269,
"ম": 270,
"য": 271,
"ল": 272,
"স": 273,
"হ": 274,
"়": 275,
"া": 276,
"ি": 277,
"à¦Ĩ": 278,
"à¦ĩ": 279,
"à¦ı": 280,
"à¦ķ": 281,
"à¦Ĺ": 282,
"à¦ļ": 283,
"à¦Ľ": 284,
"à¦ľ": 285,
"à¦ł": 286,
"ਤ": 287,
"ਦ": 288,
"ਨ": 289,
"ਪ": 290,
"ਬ": 291,
"ਮ": 292,
"ਲ": 293,
"ਵ": 294,
"ਸ਼": 295,
"ਸ": 296,
"ਹ": 297,
"਼": 298,
"ਾ": 299,
"ਿ": 300,
"à¨ħ": 301,
"à¨Ĩ": 302,
"à¨ī": 303,
"à¨ķ": 304,
"à¨ĸ": 305,
"à¨ļ": 306,
"â̦": 307,
"âĢĮ": 308,
"âĢį": 309,
"âĢĵ": 310,
"âĢĺ": 311,
"âĢĻ": 312,
"âĢľ": 313,
"âĢĿ": 314,
"à§°": 315,
"à§±": 316,
"à§Ī": 317,
"à§ĭ": 318,
"à§į": 319,
"à§Ł": 320,
"à©°": 321,
"ੱ": 322,
"à©ģ": 323,
"à©Ĥ": 324,
"à©ĩ": 325,
"à©Ī": 326,
"<pad": 327,
"âĪĴ": 328
},
"merges": [
"à ¤",
"à ¥",
"à ¦",
"à ¨",
"â Ģ",
"à §",
"à ©",
"s >",
"< \\",
"< p",
"< s>",
"a d",
"Â °",
"Â ²",
"Ã ¡",
"Ã ¨",
"Ã ©",
"Ã ·",
"Ã Ĺ",
"Ë ļ",
"â Ī",
"ठ¡",
"ठ¢",
"ठ£",
"ठ¤",
"ठ¥",
"ठ¦",
"ठ§",
"ठ¨",
"ठª",
"ठ«",
"ठ¬",
"ठ®",
"ठ¯",
"ठ°",
"ठ²",
"ठ³",
"ठµ",
"ठ¶",
"ठ·",
"ठ¸",
"ठ¹",
"ठ¼",
"ठ½",
"ठ¾",
"ठ¿",
"ठģ",
"ठĤ",
"ठĥ",
"ठħ",
"ठĨ",
"ठĩ",
"ठĪ",
"ठī",
"ठĬ",
"ठĭ",
"ठį",
"ठı",
"ठIJ",
"ठij",
"ठĵ",
"ठĶ",
"ठķ",
"ठĸ",
"ठĹ",
"ठĺ",
"ठĻ",
"ठļ",
"ठĽ",
"ठľ",
"ठĿ",
"ठŀ",
"ठŁ",
"ठł",
"ठŃ",
"ॠ¤",
"ॠ¥",
"ॠ¦",
"ॠ§",
"ॠ¨",
"ॠ©",
"ॠª",
"ॠ«",
"ॠ¬",
"ॠ®",
"ॠ¯",
"ॠĢ",
"ॠģ",
"ॠĤ",
"ॠĥ",
"ॠĦ",
"ॠħ",
"ॠĩ",
"ॠĪ",
"ॠī",
"ॠĬ",
"ॠĭ",
"ॠĮ",
"ॠį",
"ॠIJ",
"ॠĺ",
"ॠĻ",
"ॠļ",
"ॠĽ",
"ॠľ",
"ॠĿ",
"ॠŀ",
"ॠŁ",
"ॠł",
"ॠŃ",
"ঠ¤",
"ঠ¥",
"ঠ¨",
"ঠª",
"ঠ¬",
"ঠ®",
"ঠ¯",
"ঠ²",
"ঠ¸",
"ঠ¹",
"ঠ¼",
"ঠ¾",
"ঠ¿",
"ঠĨ",
"ঠĩ",
"ঠı",
"ঠķ",
"ঠĹ",
"ঠļ",
"ঠĽ",
"ঠľ",
"ঠł",
"ਠ¤",
"ਠ¦",
"ਠ¨",
"ਠª",
"ਠ¬",
"ਠ®",
"ਠ²",
"ਠµ",
"ਠ¶",
"ਠ¸",
"ਠ¹",
"ਠ¼",
"ਠ¾",
"ਠ¿",
"ਠħ",
"ਠĨ",
"ਠī",
"ਠķ",
"ਠĸ",
"ਠļ",
"âĢ ¦",
"âĢ Į",
"âĢ į",
"âĢ ĵ",
"âĢ ĺ",
"âĢ Ļ",
"âĢ ľ",
"âĢ Ŀ",
"à§ °",
"à§ ±",
"à§ Ī",
"à§ ĭ",
"à§ į",
"à§ Ł",
"à© °",
"à© ±",
"à© ģ",
"à© Ĥ",
"à© ĩ",
"à© Ī",
"<\\ s>",
"<p ad",
"âĪ Ĵ",
"<pad >"
]
}
}