b1n1yam commited on
Commit
bcb6aa8
·
verified ·
1 Parent(s): 6715680

Upload am_base256_tokenizer_sm.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. am_base256_tokenizer_sm.json +380 -0
am_base256_tokenizer_sm.json ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "[STOP]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "[UNK]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "[SPACE]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ }
33
+ ],
34
+ "normalizer": null,
35
+ "pre_tokenizer": {
36
+ "type": "Whitespace"
37
+ },
38
+ "post_processor": null,
39
+ "decoder": null,
40
+ "model": {
41
+ "type": "BPE",
42
+ "dropout": null,
43
+ "unk_token": "[UNK]",
44
+ "continuing_subword_prefix": null,
45
+ "end_of_word_suffix": null,
46
+ "fuse_unk": false,
47
+ "byte_fallback": false,
48
+ "ignore_merges": false,
49
+ "vocab": {
50
+ "[STOP]": 0,
51
+ "[UNK]": 1,
52
+ "[SPACE]": 2,
53
+ "'": 3,
54
+ ".": 4,
55
+ "/": 5,
56
+ "0": 6,
57
+ "1": 7,
58
+ "2": 8,
59
+ "3": 9,
60
+ "4": 10,
61
+ "5": 11,
62
+ "6": 12,
63
+ "7": 13,
64
+ "8": 14,
65
+ "9": 15,
66
+ ":": 16,
67
+ "?": 17,
68
+ "A": 18,
69
+ "B": 19,
70
+ "C": 20,
71
+ "D": 21,
72
+ "E": 22,
73
+ "F": 23,
74
+ "G": 24,
75
+ "H": 25,
76
+ "I": 26,
77
+ "J": 27,
78
+ "L": 28,
79
+ "N": 29,
80
+ "O": 30,
81
+ "P": 31,
82
+ "Q": 32,
83
+ "R": 33,
84
+ "S": 34,
85
+ "T": 35,
86
+ "U": 36,
87
+ "V": 37,
88
+ "W": 38,
89
+ "Y": 39,
90
+ "a": 40,
91
+ "b": 41,
92
+ "c": 42,
93
+ "d": 43,
94
+ "e": 44,
95
+ "f": 45,
96
+ "g": 46,
97
+ "h": 47,
98
+ "i": 48,
99
+ "j": 49,
100
+ "k": 50,
101
+ "l": 51,
102
+ "m": 52,
103
+ "n": 53,
104
+ "o": 54,
105
+ "p": 55,
106
+ "q": 56,
107
+ "r": 57,
108
+ "s": 58,
109
+ "t": 59,
110
+ "u": 60,
111
+ "v": 61,
112
+ "w": 62,
113
+ "x": 63,
114
+ "y": 64,
115
+ "z": 65,
116
+ "{": 66,
117
+ "ü": 67,
118
+ "а": 68,
119
+ "т": 69,
120
+ "ш": 70,
121
+ "ق": 71,
122
+ "م": 72,
123
+ "و": 73,
124
+ "ሀ": 74,
125
+ "ሁ": 75,
126
+ "ሂ": 76,
127
+ "ሃ": 77,
128
+ "ሄ": 78,
129
+ "ህ": 79,
130
+ "ሆ": 80,
131
+ "ሇ": 81,
132
+ "ለ": 82,
133
+ "ሉ": 83,
134
+ "ሊ": 84,
135
+ "ላ": 85,
136
+ "ሌ": 86,
137
+ "ል": 87,
138
+ "ሎ": 88,
139
+ "ሏ": 89,
140
+ "ሐ": 90,
141
+ "ሑ": 91,
142
+ "ሓ": 92,
143
+ "ሔ": 93,
144
+ "ሕ": 94,
145
+ "መ": 95,
146
+ "ሙ": 96,
147
+ "ሚ": 97,
148
+ "ማ": 98,
149
+ "ሜ": 99,
150
+ "ም": 100,
151
+ "ሞ": 101,
152
+ "ሟ": 102,
153
+ "ሠ": 103,
154
+ "ሡ": 104,
155
+ "ሣ": 105,
156
+ "ሥ": 106,
157
+ "ሦ": 107,
158
+ "ረ": 108,
159
+ "ሩ": 109,
160
+ "ሪ": 110,
161
+ "ራ": 111,
162
+ "ሬ": 112,
163
+ "ር": 113,
164
+ "ሮ": 114,
165
+ "ሯ": 115,
166
+ "ሰ": 116,
167
+ "ሱ": 117,
168
+ "ሲ": 118,
169
+ "ሳ": 119,
170
+ "ሴ": 120,
171
+ "ስ": 121,
172
+ "ሶ": 122,
173
+ "ሷ": 123,
174
+ "ሸ": 124,
175
+ "ሹ": 125,
176
+ "ሺ": 126,
177
+ "ሻ": 127,
178
+ "ሼ": 128,
179
+ "ሽ": 129,
180
+ "ሾ": 130,
181
+ "ሿ": 131,
182
+ "ቀ": 132,
183
+ "ቁ": 133,
184
+ "ቂ": 134,
185
+ "ቃ": 135,
186
+ "ቄ": 136,
187
+ "ቅ": 137,
188
+ "ቆ": 138,
189
+ "ቋ": 139,
190
+ "በ": 140,
191
+ "ቡ": 141,
192
+ "ቢ": 142,
193
+ "ባ": 143,
194
+ "ቤ": 144,
195
+ "ብ": 145,
196
+ "ቦ": 146,
197
+ "ቧ": 147,
198
+ "ቨ": 148,
199
+ "ቩ": 149,
200
+ "ቪ": 150,
201
+ "ቫ": 151,
202
+ "ቬ": 152,
203
+ "ቭ": 153,
204
+ "ቮ": 154,
205
+ "ተ": 155,
206
+ "ቱ": 156,
207
+ "ቲ": 157,
208
+ "ታ": 158,
209
+ "ቴ": 159,
210
+ "ት": 160,
211
+ "ቶ": 161,
212
+ "ቷ": 162,
213
+ "ቸ": 163,
214
+ "ቹ": 164,
215
+ "ቺ": 165,
216
+ "ቻ": 166,
217
+ "ቼ": 167,
218
+ "ች": 168,
219
+ "ቾ": 169,
220
+ "ቿ": 170,
221
+ "ኃ": 171,
222
+ "ኅ": 172,
223
+ "ኋ": 173,
224
+ "ነ": 174,
225
+ "ኑ": 175,
226
+ "ኒ": 176,
227
+ "ና": 177,
228
+ "ኔ": 178,
229
+ "ን": 179,
230
+ "ኖ": 180,
231
+ "ኗ": 181,
232
+ "ኘ": 182,
233
+ "ኙ": 183,
234
+ "ኚ": 184,
235
+ "ኛ": 185,
236
+ "ኜ": 186,
237
+ "ኝ": 187,
238
+ "ኞ": 188,
239
+ "ኟ": 189,
240
+ "አ": 190,
241
+ "ኡ": 191,
242
+ "ኢ": 192,
243
+ "ኣ": 193,
244
+ "ኤ": 194,
245
+ "እ": 195,
246
+ "ኦ": 196,
247
+ "ኧ": 197,
248
+ "ከ": 198,
249
+ "ኩ": 199,
250
+ "ኪ": 200,
251
+ "ካ": 201,
252
+ "ኬ": 202,
253
+ "ክ": 203,
254
+ "ኮ": 204,
255
+ "ኳ": 205,
256
+ "ኸ": 206,
257
+ "ኽ": 207,
258
+ "ወ": 208,
259
+ "ዉ": 209,
260
+ "ዊ": 210,
261
+ "ዋ": 211,
262
+ "ዌ": 212,
263
+ "ው": 213,
264
+ "ዎ": 214,
265
+ "ዐ": 215,
266
+ "ዑ": 216,
267
+ "ዒ": 217,
268
+ "ዓ": 218,
269
+ "ዔ": 219,
270
+ "ዕ": 220,
271
+ "ዖ": 221,
272
+ "ዘ": 222,
273
+ "ዙ": 223,
274
+ "ዚ": 224,
275
+ "ዛ": 225,
276
+ "ዜ": 226,
277
+ "ዝ": 227,
278
+ "ዞ": 228,
279
+ "ዟ": 229,
280
+ "ዠ": 230,
281
+ "ዡ": 231,
282
+ "ዢ": 232,
283
+ "ዣ": 233,
284
+ "ዤ": 234,
285
+ "ዥ": 235,
286
+ "ዦ": 236,
287
+ "ዧ": 237,
288
+ "የ": 238,
289
+ "ዩ": 239,
290
+ "ዪ": 240,
291
+ "ያ": 241,
292
+ "ዬ": 242,
293
+ "ይ": 243,
294
+ "ዮ": 244,
295
+ "ደ": 245,
296
+ "ዱ": 246,
297
+ "ዲ": 247,
298
+ "ዳ": 248,
299
+ "ዴ": 249,
300
+ "ድ": 250,
301
+ "ዶ": 251,
302
+ "ዷ": 252,
303
+ "ጀ": 253,
304
+ "ጁ": 254,
305
+ "ጂ": 255,
306
+ "ጃ": 256,
307
+ "ጄ": 257,
308
+ "ጅ": 258,
309
+ "ጆ": 259,
310
+ "ጇ": 260,
311
+ "ገ": 261,
312
+ "ጉ": 262,
313
+ "ጊ": 263,
314
+ "ጋ": 264,
315
+ "ጌ": 265,
316
+ "ግ": 266,
317
+ "ጎ": 267,
318
+ "ጓ": 268,
319
+ "ጠ": 269,
320
+ "ጡ": 270,
321
+ "ጢ": 271,
322
+ "ጣ": 272,
323
+ "ጤ": 273,
324
+ "ጥ": 274,
325
+ "ጦ": 275,
326
+ "ጧ": 276,
327
+ "ጨ": 277,
328
+ "ጩ": 278,
329
+ "ጪ": 279,
330
+ "ጫ": 280,
331
+ "ጬ": 281,
332
+ "ጭ": 282,
333
+ "ጮ": 283,
334
+ "ጯ": 284,
335
+ "ጲ": 285,
336
+ "ጳ": 286,
337
+ "ጴ": 287,
338
+ "ጵ": 288,
339
+ "ጶ": 289,
340
+ "ጸ": 290,
341
+ "ጹ": 291,
342
+ "ጺ": 292,
343
+ "ጻ": 293,
344
+ "ጼ": 294,
345
+ "ጽ": 295,
346
+ "ጾ": 296,
347
+ "ጿ": 297,
348
+ "ፀ": 298,
349
+ "ፁ": 299,
350
+ "ፂ": 300,
351
+ "ፃ": 301,
352
+ "ፄ": 302,
353
+ "ፅ": 303,
354
+ "ፆ": 304,
355
+ "ፈ": 305,
356
+ "ፉ": 306,
357
+ "ፊ": 307,
358
+ "ፋ": 308,
359
+ "ፌ": 309,
360
+ "ፍ": 310,
361
+ "ፎ": 311,
362
+ "ፏ": 312,
363
+ "ፐ": 313,
364
+ "ፑ": 314,
365
+ "ፒ": 315,
366
+ "ፓ": 316,
367
+ "ፔ": 317,
368
+ "ፕ": 318,
369
+ "ፖ": 319,
370
+ "።": 320,
371
+ "፯": 321,
372
+ "፱": 322,
373
+ "፲": 323,
374
+ "፺": 324,
375
+ "፻": 325
376
+ },
377
+ "merges": [],
378
+ "language": "am"
379
+ }
380
+ }