ULFBERTO commited on
Commit
f854063
·
verified ·
1 Parent(s): 0a98466

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +476 -0
tokenizer.json ADDED
@@ -0,0 +1,476 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "OxideLLM_TK_SSM_V1",
3
+ "iteration": 1200,
4
+ "vocab_size": 228,
5
+ "char2idx": {
6
+ "<|pad|>": 0,
7
+ "<|user|>": 1,
8
+ "<|assistant|>": 2,
9
+ "<|end|>": 3,
10
+ "\n": 4,
11
+ " ": 5,
12
+ "!": 6,
13
+ "\"": 7,
14
+ "#": 8,
15
+ "$": 9,
16
+ "%": 10,
17
+ "&": 11,
18
+ "'": 12,
19
+ "(": 13,
20
+ ")": 14,
21
+ "*": 15,
22
+ "+": 16,
23
+ ",": 17,
24
+ "-": 18,
25
+ ".": 19,
26
+ "/": 20,
27
+ "0": 21,
28
+ "1": 22,
29
+ "2": 23,
30
+ "3": 24,
31
+ "4": 25,
32
+ "5": 26,
33
+ "6": 27,
34
+ "7": 28,
35
+ "8": 29,
36
+ "9": 30,
37
+ ":": 31,
38
+ ";": 32,
39
+ "<": 33,
40
+ "=": 34,
41
+ ">": 35,
42
+ "?": 36,
43
+ "A": 37,
44
+ "B": 38,
45
+ "C": 39,
46
+ "D": 40,
47
+ "E": 41,
48
+ "F": 42,
49
+ "G": 43,
50
+ "H": 44,
51
+ "I": 45,
52
+ "J": 46,
53
+ "K": 47,
54
+ "L": 48,
55
+ "M": 49,
56
+ "N": 50,
57
+ "O": 51,
58
+ "P": 52,
59
+ "Q": 53,
60
+ "R": 54,
61
+ "S": 55,
62
+ "T": 56,
63
+ "U": 57,
64
+ "V": 58,
65
+ "W": 59,
66
+ "X": 60,
67
+ "Y": 61,
68
+ "Z": 62,
69
+ "[": 63,
70
+ "]": 64,
71
+ "^": 65,
72
+ "_": 66,
73
+ "a": 67,
74
+ "b": 68,
75
+ "c": 69,
76
+ "d": 70,
77
+ "e": 71,
78
+ "f": 72,
79
+ "g": 73,
80
+ "h": 74,
81
+ "i": 75,
82
+ "j": 76,
83
+ "k": 77,
84
+ "l": 78,
85
+ "m": 79,
86
+ "n": 80,
87
+ "o": 81,
88
+ "p": 82,
89
+ "q": 83,
90
+ "r": 84,
91
+ "s": 85,
92
+ "t": 86,
93
+ "u": 87,
94
+ "v": 88,
95
+ "w": 89,
96
+ "x": 90,
97
+ "y": 91,
98
+ "z": 92,
99
+ "|": 93,
100
+ "}": 94,
101
+ "¡": 95,
102
+ "£": 96,
103
+ "§": 97,
104
+ "©": 98,
105
+ "ª": 99,
106
+ "«": 100,
107
+ "°": 101,
108
+ "´": 102,
109
+ "·": 103,
110
+ "º": 104,
111
+ "»": 105,
112
+ "½": 106,
113
+ "¾": 107,
114
+ "¿": 108,
115
+ "Á": 109,
116
+ "Â": 110,
117
+ "Æ": 111,
118
+ "Ç": 112,
119
+ "È": 113,
120
+ "É": 114,
121
+ "Ê": 115,
122
+ "Í": 116,
123
+ "Î": 117,
124
+ "Ñ": 118,
125
+ "Ó": 119,
126
+ "Ú": 120,
127
+ "Ü": 121,
128
+ "à": 122,
129
+ "á": 123,
130
+ "â": 124,
131
+ "ä": 125,
132
+ "æ": 126,
133
+ "ç": 127,
134
+ "è": 128,
135
+ "é": 129,
136
+ "ê": 130,
137
+ "ë": 131,
138
+ "ì": 132,
139
+ "í": 133,
140
+ "î": 134,
141
+ "ï": 135,
142
+ "ñ": 136,
143
+ "ò": 137,
144
+ "ó": 138,
145
+ "ô": 139,
146
+ "ö": 140,
147
+ "ù": 141,
148
+ "ú": 142,
149
+ "û": 143,
150
+ "ü": 144,
151
+ "ā": 145,
152
+ "ē": 146,
153
+ "ě": 147,
154
+ "ī": 148,
155
+ "ō": 149,
156
+ "Œ": 150,
157
+ "œ": 151,
158
+ "̃": 152,
159
+ "Δ": 153,
160
+ "Ο": 154,
161
+ "Π": 155,
162
+ "Τ": 156,
163
+ "Φ": 157,
164
+ "ά": 158,
165
+ "έ": 159,
166
+ "ί": 160,
167
+ "α": 161,
168
+ "β": 162,
169
+ "γ": 163,
170
+ "δ": 164,
171
+ "ε": 165,
172
+ "η": 166,
173
+ "θ": 167,
174
+ "ι": 168,
175
+ "κ": 169,
176
+ "λ": 170,
177
+ "μ": 171,
178
+ "ν": 172,
179
+ "ξ": 173,
180
+ "ο": 174,
181
+ "π": 175,
182
+ "ρ": 176,
183
+ "ς": 177,
184
+ "σ": 178,
185
+ "τ": 179,
186
+ "υ": 180,
187
+ "χ": 181,
188
+ "ω": 182,
189
+ "ό": 183,
190
+ "ύ": 184,
191
+ "ώ": 185,
192
+ "ἀ": 186,
193
+ "ἄ": 187,
194
+ "ἐ": 188,
195
+ "ἔ": 189,
196
+ "ἠ": 190,
197
+ "ἡ": 191,
198
+ "ἤ": 192,
199
+ "ἦ": 193,
200
+ "ἰ": 194,
201
+ "ἵ": 195,
202
+ "ἶ": 196,
203
+ "ὁ": 197,
204
+ "ὄ": 198,
205
+ "ὅ": 199,
206
+ "ὐ": 200,
207
+ "ὑ": 201,
208
+ "ὰ": 202,
209
+ "ὲ": 203,
210
+ "ὴ": 204,
211
+ "ὶ": 205,
212
+ "ὸ": 206,
213
+ "ὺ": 207,
214
+ "ᾶ": 208,
215
+ "᾽": 209,
216
+ "ῆ": 210,
217
+ "ῖ": 211,
218
+ "ῦ": 212,
219
+ "ῶ": 213,
220
+ " ": 214,
221
+ "–": 215,
222
+ "—": 216,
223
+ "‘": 217,
224
+ "’": 218,
225
+ "“": 219,
226
+ "”": 220,
227
+ "•": 221,
228
+ "′": 222,
229
+ "™": 223,
230
+ "⅓": 224,
231
+ "○": 225,
232
+ "●": 226,
233
+ "": 227
234
+ },
235
+ "idx2char": {
236
+ "0": "<|pad|>",
237
+ "1": "<|user|>",
238
+ "2": "<|assistant|>",
239
+ "3": "<|end|>",
240
+ "4": "\n",
241
+ "5": " ",
242
+ "6": "!",
243
+ "7": "\"",
244
+ "8": "#",
245
+ "9": "$",
246
+ "10": "%",
247
+ "11": "&",
248
+ "12": "'",
249
+ "13": "(",
250
+ "14": ")",
251
+ "15": "*",
252
+ "16": "+",
253
+ "17": ",",
254
+ "18": "-",
255
+ "19": ".",
256
+ "20": "/",
257
+ "21": "0",
258
+ "22": "1",
259
+ "23": "2",
260
+ "24": "3",
261
+ "25": "4",
262
+ "26": "5",
263
+ "27": "6",
264
+ "28": "7",
265
+ "29": "8",
266
+ "30": "9",
267
+ "31": ":",
268
+ "32": ";",
269
+ "33": "<",
270
+ "34": "=",
271
+ "35": ">",
272
+ "36": "?",
273
+ "37": "A",
274
+ "38": "B",
275
+ "39": "C",
276
+ "40": "D",
277
+ "41": "E",
278
+ "42": "F",
279
+ "43": "G",
280
+ "44": "H",
281
+ "45": "I",
282
+ "46": "J",
283
+ "47": "K",
284
+ "48": "L",
285
+ "49": "M",
286
+ "50": "N",
287
+ "51": "O",
288
+ "52": "P",
289
+ "53": "Q",
290
+ "54": "R",
291
+ "55": "S",
292
+ "56": "T",
293
+ "57": "U",
294
+ "58": "V",
295
+ "59": "W",
296
+ "60": "X",
297
+ "61": "Y",
298
+ "62": "Z",
299
+ "63": "[",
300
+ "64": "]",
301
+ "65": "^",
302
+ "66": "_",
303
+ "67": "a",
304
+ "68": "b",
305
+ "69": "c",
306
+ "70": "d",
307
+ "71": "e",
308
+ "72": "f",
309
+ "73": "g",
310
+ "74": "h",
311
+ "75": "i",
312
+ "76": "j",
313
+ "77": "k",
314
+ "78": "l",
315
+ "79": "m",
316
+ "80": "n",
317
+ "81": "o",
318
+ "82": "p",
319
+ "83": "q",
320
+ "84": "r",
321
+ "85": "s",
322
+ "86": "t",
323
+ "87": "u",
324
+ "88": "v",
325
+ "89": "w",
326
+ "90": "x",
327
+ "91": "y",
328
+ "92": "z",
329
+ "93": "|",
330
+ "94": "}",
331
+ "95": "¡",
332
+ "96": "£",
333
+ "97": "§",
334
+ "98": "©",
335
+ "99": "ª",
336
+ "100": "«",
337
+ "101": "°",
338
+ "102": "´",
339
+ "103": "·",
340
+ "104": "º",
341
+ "105": "»",
342
+ "106": "½",
343
+ "107": "¾",
344
+ "108": "¿",
345
+ "109": "Á",
346
+ "110": "Â",
347
+ "111": "Æ",
348
+ "112": "Ç",
349
+ "113": "È",
350
+ "114": "É",
351
+ "115": "Ê",
352
+ "116": "Í",
353
+ "117": "Î",
354
+ "118": "Ñ",
355
+ "119": "Ó",
356
+ "120": "Ú",
357
+ "121": "Ü",
358
+ "122": "à",
359
+ "123": "á",
360
+ "124": "â",
361
+ "125": "ä",
362
+ "126": "æ",
363
+ "127": "ç",
364
+ "128": "è",
365
+ "129": "é",
366
+ "130": "ê",
367
+ "131": "ë",
368
+ "132": "ì",
369
+ "133": "í",
370
+ "134": "î",
371
+ "135": "ï",
372
+ "136": "ñ",
373
+ "137": "ò",
374
+ "138": "ó",
375
+ "139": "ô",
376
+ "140": "ö",
377
+ "141": "ù",
378
+ "142": "ú",
379
+ "143": "û",
380
+ "144": "ü",
381
+ "145": "ā",
382
+ "146": "ē",
383
+ "147": "ě",
384
+ "148": "ī",
385
+ "149": "ō",
386
+ "150": "Œ",
387
+ "151": "œ",
388
+ "152": "̃",
389
+ "153": "Δ",
390
+ "154": "Ο",
391
+ "155": "Π",
392
+ "156": "Τ",
393
+ "157": "Φ",
394
+ "158": "ά",
395
+ "159": "έ",
396
+ "160": "ί",
397
+ "161": "α",
398
+ "162": "β",
399
+ "163": "γ",
400
+ "164": "δ",
401
+ "165": "ε",
402
+ "166": "η",
403
+ "167": "θ",
404
+ "168": "ι",
405
+ "169": "κ",
406
+ "170": "λ",
407
+ "171": "μ",
408
+ "172": "ν",
409
+ "173": "ξ",
410
+ "174": "ο",
411
+ "175": "π",
412
+ "176": "ρ",
413
+ "177": "ς",
414
+ "178": "σ",
415
+ "179": "τ",
416
+ "180": "υ",
417
+ "181": "χ",
418
+ "182": "ω",
419
+ "183": "ό",
420
+ "184": "ύ",
421
+ "185": "ώ",
422
+ "186": "ἀ",
423
+ "187": "ἄ",
424
+ "188": "ἐ",
425
+ "189": "ἔ",
426
+ "190": "ἠ",
427
+ "191": "ἡ",
428
+ "192": "ἤ",
429
+ "193": "ἦ",
430
+ "194": "ἰ",
431
+ "195": "ἵ",
432
+ "196": "ἶ",
433
+ "197": "ὁ",
434
+ "198": "ὄ",
435
+ "199": "ὅ",
436
+ "200": "ὐ",
437
+ "201": "ὑ",
438
+ "202": "ὰ",
439
+ "203": "ὲ",
440
+ "204": "ὴ",
441
+ "205": "ὶ",
442
+ "206": "ὸ",
443
+ "207": "ὺ",
444
+ "208": "ᾶ",
445
+ "209": "᾽",
446
+ "210": "ῆ",
447
+ "211": "ῖ",
448
+ "212": "ῦ",
449
+ "213": "ῶ",
450
+ "214": " ",
451
+ "215": "–",
452
+ "216": "—",
453
+ "217": "‘",
454
+ "218": "’",
455
+ "219": "“",
456
+ "220": "”",
457
+ "221": "•",
458
+ "222": "′",
459
+ "223": "™",
460
+ "224": "⅓",
461
+ "225": "○",
462
+ "226": "●",
463
+ "227": ""
464
+ },
465
+ "special_tokens": [
466
+ "<|pad|>",
467
+ "<|user|>",
468
+ "<|assistant|>",
469
+ "<|end|>"
470
+ ],
471
+ "config": {
472
+ "dim": 128,
473
+ "state_dim": 16,
474
+ "n_layers": 4
475
+ }
476
+ }