gsaltintas commited on
Commit
1e1cb62
·
verified ·
1 Parent(s): 1f77e13

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +5 -1
  2. special_tokens_map.json +0 -4
  3. tokenizer.json +29 -47
  4. tokenizer_config.json +0 -20
  5. vocab.json +342 -342
README.md CHANGED
@@ -47,4 +47,8 @@ tokens = tokenizer.encode("Hello, world!")
47
  ## Sample Encoding
48
  | Text | Tokens | Token IDs |
49
  |------|--------|-----------|
50
- | `yirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `y, i, r, m, i, Ġ, i, k, i, +, d, o, k, u, z, =, o, t, u, z` | `91, 75, 84, 79, 75, 223, 75, 77, 75, 3, 70, 81, 77, 87, 92, 4, 81, 86, 87, 92` |
 
 
 
 
 
47
  ## Sample Encoding
48
  | Text | Tokens | Token IDs |
49
  |------|--------|-----------|
50
+ | `yirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `y, i, r, m, i, Ġ, i, k, i, +, d, o, k, u, z, =, o, t, u, z` | `91, 75, 84, 79, 75, 223, 75, 77, 75, 13, 70, 81, 77, 87, 92, 31, 81, 86, 87, 92` |
51
+
52
+ Command used to create this tokenizer:
53
+ ```bash
54
+ ['/home/gsa/tokenizers2/flexitok/tokenizer_training/train_tokenizers.py', 'algorithm=bpe', 'vocab_size=2000', 'langs=[spa_Latn]', 'data_dir=/scratch/gsa/data/multilingual-addition/', 'output_dir=/scratch/gsa/trained_tokenizers/multilingual_addition', 'pretokenizer=custom:addition', 'number_handling=ltr_3digit', 'add_numbers=false', 'handle_contractions=false', 'unicode_normalization=nfc', 'use_byte_level_regex=false', 'byte_fallback=false', 'strip_zero_width=false', 'cjk_char_split=false', 'add_cjk_chars=false', 'max_lines=-1', 'test_string=yirmi iki+dokuz=otuz bir\\ntwenty two+nine=thirty one', 'hf.publish_to_hf=true', 'hf_repo_prefix=flexitok/', 'hf.hf_repo_id=flexitok/maddition_spa_Latn_2000', 'hf.collections=[flexitok/multilingual_addition_tokenizers]']
special_tokens_map.json CHANGED
@@ -1,8 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "+",
4
- "="
5
- ],
6
  "bos_token": "<s>",
7
  "eos_token": "</s>",
8
  "pad_token": "<pad>"
 
1
  {
 
 
 
 
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
  "pad_token": "<pad>"
tokenizer.json CHANGED
@@ -29,24 +29,6 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
- },
33
- {
34
- "id": 3,
35
- "content": "+",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
- },
42
- {
43
- "id": 4,
44
- "content": "=",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  }
51
  ],
52
  "normalizer": {
@@ -99,35 +81,35 @@
99
  "<s>": 0,
100
  "</s>": 1,
101
  "<pad>": 2,
102
- "+": 3,
103
- "=": 4,
104
- "!": 5,
105
- "\"": 6,
106
- "#": 7,
107
- "$": 8,
108
- "%": 9,
109
- "&": 10,
110
- "'": 11,
111
- "(": 12,
112
- ")": 13,
113
- "*": 14,
114
- ",": 15,
115
- "-": 16,
116
- ".": 17,
117
- "/": 18,
118
- "0": 19,
119
- "1": 20,
120
- "2": 21,
121
- "3": 22,
122
- "4": 23,
123
- "5": 24,
124
- "6": 25,
125
- "7": 26,
126
- "8": 27,
127
- "9": 28,
128
- ":": 29,
129
- ";": 30,
130
- "<": 31,
131
  ">": 32,
132
  "?": 33,
133
  "@": 34,
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
81
  "<s>": 0,
82
  "</s>": 1,
83
  "<pad>": 2,
84
+ "!": 3,
85
+ "\"": 4,
86
+ "#": 5,
87
+ "$": 6,
88
+ "%": 7,
89
+ "&": 8,
90
+ "'": 9,
91
+ "(": 10,
92
+ ")": 11,
93
+ "*": 12,
94
+ "+": 13,
95
+ ",": 14,
96
+ "-": 15,
97
+ ".": 16,
98
+ "/": 17,
99
+ "0": 18,
100
+ "1": 19,
101
+ "2": 20,
102
+ "3": 21,
103
+ "4": 22,
104
+ "5": 23,
105
+ "6": 24,
106
+ "7": 25,
107
+ "8": 26,
108
+ "9": 27,
109
+ ":": 28,
110
+ ";": 29,
111
+ "<": 30,
112
+ "=": 31,
113
  ">": 32,
114
  "?": 33,
115
  "@": 34,
tokenizer_config.json CHANGED
@@ -23,28 +23,8 @@
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
- },
27
- "3": {
28
- "content": "+",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "4": {
36
- "content": "=",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
  }
43
  },
44
- "additional_special_tokens": [
45
- "+",
46
- "="
47
- ],
48
  "bos_token": "<s>",
49
  "clean_up_tokenization_spaces": false,
50
  "eos_token": "</s>",
 
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  },
 
 
 
 
28
  "bos_token": "<s>",
29
  "clean_up_tokenization_spaces": false,
30
  "eos_token": "</s>",
vocab.json CHANGED
@@ -1,374 +1,374 @@
1
  {
2
- "os": 262,
3
- "veinticinco": 352,
4
- "ß": 158,
5
- "O": 49,
6
- "tenta": 294,
7
- "och": 271,
8
- "ć": 198,
9
- "đ": 208,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  "D": 38,
11
- "¸": 119,
12
- "Â": 129,
13
- "b": 68,
14
- "Ģ": 225,
15
- "co": 321,
16
- "mil": 285,
17
- "m": 79,
18
  "?": 33,
19
- "ciento": 320,
20
- "a": 67,
21
- "nce": 338,
22
- "t": 86,
23
- "µ": 116,
24
- "Ğ": 221,
25
  "catorce": 367,
 
 
 
 
 
 
 
 
26
  "Ð": 143,
27
- "cat": 335,
28
- "į": 238,
29
- "Ĕ": 211,
30
- "veintiuno": 351,
31
- "M": 47,
32
- "veinti": 327,
33
- "¥": 101,
34
- "ą": 196,
 
 
 
 
 
 
 
 
 
 
 
 
35
  "ä": 163,
36
- "cuatrocientos": 311,
37
- "Ì": 139,
38
- "^": 64,
39
- "Č": 203,
40
- "Ĭ": 235,
41
- "Ļ": 250,
42
- "h": 74,
43
- "¿": 126,
44
- "½": 124,
45
- "ù": 184,
46
- "doscientos": 312,
 
 
 
 
 
 
 
 
47
  "dieciséis": 360,
48
- "´": 115,
49
- "c": 69,
50
- ",": 15,
51
- "ento": 319,
52
- "ñ": 176,
53
- "ocientos": 277,
54
- "i": 75,
55
- "ci": 261,
56
- "E": 39,
57
- "ę": 216,
58
- "s": 85,
59
- "once": 340,
60
- "F": 40,
61
- "_": 65,
62
  "oc": 269,
63
- "sesenta": 298,
64
- "Ł": 256,
65
- "9": 28,
66
- "uno": 323,
67
- "ã": 162,
 
 
 
 
 
 
 
 
 
 
 
 
68
  "setenta": 299,
69
- "d": 70,
70
- "cinueve": 345,
71
- "é": 330,
72
- "": 117,
73
- "veintiocho": 350,
74
- "te": 278,
 
 
 
 
75
  "Õ": 148,
76
- "cero": 371,
77
- "trescientos": 314,
78
- "+": 3,
79
- "K": 45,
80
- "¦": 102,
81
- "int": 276,
82
- "nt": 259,
83
- "tr": 266,
84
- "doc": 337,
85
- "¹": 120,
86
- "en": 368,
87
- "or": 339,
88
  "A": 35,
89
- "veintisiete": 361,
 
 
 
 
 
 
 
 
 
 
90
  "f": 72,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  "ueve": 295,
92
- "cinco": 324,
93
- "â": 161,
94
- "</s>": 1,
95
- "`": 66,
96
- "ô": 179,
97
- "qu": 288,
98
- "tre": 273,
99
- "J": 44,
100
- "4": 23,
101
- "§": 103,
102
- "iete": 315,
103
- "Ġ": 223,
104
- "Û": 154,
105
- "R": 52,
106
- "×": 150,
107
- "at": 334,
108
- "¼": 123,
109
- "ó": 178,
110
- "/": 18,
111
- "ï": 174,
112
- "treint": 308,
113
- "ċ": 202,
114
- "éis": 333,
115
- "o": 81,
116
- "veintiséis": 362,
117
- "-": 16,
118
- "Ą": 195,
119
- "x": 90,
120
- "1": 20,
121
- "Í": 140,
122
- "@": 34,
123
  "Ė": 213,
124
- "Ó": 146,
125
- "=": 4,
126
- "S": 53,
127
- "veintinueve": 349,
128
- "Ĵ": 243,
129
- "G": 41,
130
- "is": 275,
131
- "séis": 341,
132
- "cu": 268,
133
- "z": 92,
134
- "veintis": 332,
135
- "®": 109,
136
- "Ø": 151,
137
- "ĩ": 232,
138
- "Z": 60,
139
- "ľ": 253,
140
- "Ç": 134,
141
- "Ò": 145,
142
- "ĺ": 249,
143
- "ent": 260,
144
- "č": 204,
145
- "3": 22,
146
- "noventa": 306,
147
  "seiscientos": 313,
 
 
 
 
 
 
 
 
 
 
 
148
  "diecisiete": 359,
149
- "ļ": 251,
150
- "Ĥ": 227,
151
- "inueve": 346,
152
- "Ê": 137,
153
- "ÿ": 190,
154
- "©": 105,
155
- "p": 82,
156
- "Ĝ": 219,
157
- ")": 13,
158
- "Ô": 147,
159
- "trece": 344,
160
- "Æ": 133,
161
- "ė": 214,
162
- "²": 113,
163
- "diez": 356,
164
- "~": 96,
165
- "6": 25,
166
- "l": 78,
167
- "veinticuatro": 353,
168
- "eve": 290,
169
- "'": 11,
170
- "º": 121,
171
- "Ý": 156,
172
  "Ķ": 245,
173
- "<pad>": 2,
174
- "quinientos": 316,
175
- "ientos": 291,
176
- "U": 55,
177
- "senta": 293,
178
- "ò": 177,
179
- "Ã": 130,
180
- "N": 48,
181
- "ü": 187,
182
- "V": 56,
183
- "doce": 364,
184
- "¨": 104,
185
- "veintitrés": 366,
186
- "Ù": 152,
187
- "W": 57,
188
- "Ī": 233,
189
- "veint": 302,
190
- "ç": 166,
191
- "quince": 347,
192
- "I": 43,
193
- "w": 89,
194
- "ë": 170,
195
- "ö": 181,
196
- "³": 114,
197
- "ĵ": 244,
198
- "k": 77,
199
- "!": 5,
200
- "Ď": 205,
201
- "g": 73,
202
- ".": 17,
203
- "se": 267,
204
- "siete": 322,
205
- "y": 91,
206
  "ve": 270,
207
- "Þ": 157,
208
- "C": 37,
209
- "quin": 289,
210
- "cua": 274,
211
- "Ċ": 201,
212
- "¯": 110,
213
- "Ā": 191,
214
- "cator": 363,
215
- "P": 50,
216
- "ĝ": 220,
217
- "ie": 286,
218
- ";": 30,
219
- "ĕ": 212,
220
- "è": 167,
221
- "³s": 342,
222
- "·": 118,
223
- "cien": 369,
224
- "e": 71,
225
- "£": 99,
226
- "seis": 282,
227
- "Ö": 149,
228
- "ĥ": 228,
229
- ":": 29,
230
- "L": 46,
231
- "dos": 281,
232
- "7": 26,
233
- "nueve": 326,
234
- "à": 159,
235
- "*": 14,
236
- "j": 76,
237
  "veinte": 348,
238
- "¡": 97,
239
- "ocho": 303,
240
- "õ": 180,
241
- "ce": 329,
242
- "é": 168,
243
- "Ü": 155,
244
- "Î": 141,
245
- "È": 135,
246
- "ď": 206,
247
- "ā": 192,
248
- "2": 21,
249
- "ĭ": 236,
250
- "¢": 98,
251
- "cuatro": 325,
252
- "setecientos": 317,
253
  "T": 54,
254
- "ı": 240,
255
- "Ľ": 252,
256
- "Ń": 258,
257
- "ro": 370,
258
- "dieciocho": 358,
259
- "entos": 263,
260
- "ú": 185,
 
 
 
 
 
 
 
 
 
 
 
 
261
  "÷": 182,
262
- "ð": 175,
263
- "]": 63,
264
- "%": 9,
265
- "Ĩ": 231,
266
- "{": 93,
267
- "ħ": 230,
268
- "#": 7,
269
- "Ħ": 229,
270
- "Ē": 209,
271
  "¬": 108,
272
- "å": 164,
273
- "Ñ": 144,
274
- "cincuenta": 310,
275
- "ě": 218,
276
- "treinta": 318,
277
- "Ă": 193,
278
- "$": 8,
279
  "cuatr": 280,
280
- "8": 27,
281
- "diecinueve": 357,
282
- "¤": 100,
283
- "Ŀ": 254,
284
- "Ë": 138,
285
- "ª": 106,
286
- "«": 107,
287
- "Y": 59,
288
- "Đ": 207,
289
- "tres": 283,
290
- "Ĺ": 248,
291
  "°": 111,
292
- "Ä": 131,
293
- "enta": 265,
294
- "|": 94,
295
- "ij": 242,
296
- "ī": 234,
297
- "ŀ": 255,
298
- "X": 58,
299
- "Q": 51,
300
  "Å": 132,
301
- "ġ": 224,
302
- "}": 95,
303
- "(": 12,
304
- "±": 112,
305
- "0": 19,
306
- "v": 88,
307
- "Ę": 215,
308
- "Á": 128,
309
- "ì": 171,
310
- "veintidÃ": 354,
311
- "ĉ": 200,
312
- "r": 84,
313
- "Ć": 197,
314
- "İ": 239,
315
- "B": 36,
316
  "sete": 297,
317
- "Į": 237,
318
- "ochocientos": 305,
319
- "æ": 165,
320
- "þ": 189,
321
- "cuenta": 300,
322
- "É": 136,
323
- "5": 24,
 
 
 
 
 
 
 
 
 
 
 
324
  "no": 272,
325
- "ĸ": 247,
326
- "ý": 188,
327
- "Ě": 217,
328
- "Ĉ": 199,
329
- "&": 10,
330
- "Ú": 153,
331
- "î": 173,
332
- "vecientos": 301,
333
- "ă": 194,
334
- "\\": 62,
335
- "veintitré": 355,
336
- "À": 127,
337
- "[": 61,
338
- "IJ": 241,
339
- "<": 31,
340
- "ķ": 246,
 
 
 
 
 
 
 
 
341
  "Ï": 142,
342
- "n": 80,
343
- "\"": 6,
344
- "H": 42,
345
- "cuarenta": 309,
346
- "¾": 125,
347
  "ģ": 226,
348
- "í": 172,
349
- "in": 287,
350
- "cin": 279,
351
- "novecientos": 307,
 
 
352
  "veintidós": 365,
353
- ">": 32,
354
- "ē": 210,
355
- "ł": 257,
356
- "il": 284,
357
- "cientos": 264,
358
- "renta": 292,
359
- "ø": 183,
360
- "q": 83,
361
- "ğ": 222,
362
- "u": 87,
363
- "tré": 343,
364
- "»": 122,
365
- "venta": 296,
366
  "<s>": 0,
367
- "á": 160,
368
- "ê": 169,
369
- "die": 328,
370
- "û": 186,
371
- "ochenta": 304,
372
- "": 336,
373
- "dieci": 331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  }
 
1
  {
2
+ "ô": 179,
3
+ "F": 40,
4
+ "cien": 369,
5
+ "ã": 162,
6
+ "cat": 335,
7
+ "é": 168,
8
+ "entos": 263,
9
+ "ê": 169,
10
+ "Ì": 139,
11
+ "Ü": 155,
12
+ "diez": 356,
13
+ "x": 90,
14
+ "ç": 166,
15
+ "Ł": 256,
16
+ "Ń": 258,
17
+ "í": 172,
18
+ "X": 58,
19
+ "³": 114,
20
+ ":": 28,
21
+ "ĕ": 212,
22
+ "ł": 257,
23
+ "é": 330,
24
+ "\\": 62,
25
+ "dos": 281,
26
+ "y": 91,
27
+ "ð": 175,
28
+ "W": 57,
29
+ "n": 80,
30
+ "9": 27,
31
+ "ï": 174,
32
+ "î": 173,
33
+ "ij": 242,
34
+ "Ħ": 229,
35
+ "/": 17,
36
+ "~": 96,
37
+ "Ą": 195,
38
+ "¶": 117,
39
+ "3": 21,
40
+ "cinco": 324,
41
+ "veintitré": 355,
42
+ "Ļ": 250,
43
+ "4": 22,
44
+ "Č": 203,
45
+ "Ý": 156,
46
+ "ė": 214,
47
  "D": 38,
48
+ "at": 334,
49
+ "¥": 101,
 
 
 
 
 
50
  "?": 33,
51
+ "Ĵ": 243,
52
+ "tr": 266,
53
+ "]": 63,
54
+ ">": 32,
 
 
55
  "catorce": 367,
56
+ "©": 105,
57
+ "seis": 282,
58
+ "c": 69,
59
+ "À": 127,
60
+ "ę": 216,
61
+ "à": 159,
62
+ "o": 81,
63
+ "ķ": 246,
64
  "Ð": 143,
65
+ "cuarenta": 309,
66
+ "m": 79,
67
+ "č": 204,
68
+ "İ": 239,
69
+ "I": 43,
70
+ "§": 103,
71
+ "vecientos": 301,
72
+ "[": 61,
73
+ "Ù": 152,
74
+ "i": 75,
75
+ "w": 89,
76
+ "enta": 265,
77
+ "û": 186,
78
+ "ocientos": 277,
79
+ "´": 115,
80
+ "sesenta": 298,
81
+ "³s": 342,
82
+ "ď": 206,
83
+ "nce": 338,
84
+ "N": 48,
85
  "ä": 163,
86
+ "æ": 165,
87
+ "Ī": 233,
88
+ "+": 13,
89
+ "siete": 322,
90
+ "die": 328,
91
+ "è": 167,
92
+ "a": 67,
93
+ "r": 84,
94
+ "Ĥ": 227,
95
+ "¯": 110,
96
+ "×": 150,
97
+ "ą": 196,
98
+ "ě": 218,
99
+ "Q": 51,
100
+ "P": 50,
101
+ "</s>": 1,
102
+ "J": 44,
103
+ "Ā": 191,
104
+ "ĭ": 236,
105
  "dieciséis": 360,
106
+ "¢": 98,
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  "oc": 269,
108
+ "=": 31,
109
+ "qu": 288,
110
+ "L": 46,
111
+ "ì": 171,
112
+ "R": 52,
113
+ "Ĩ": 231,
114
+ "veinticuatro": 353,
115
+ "ú": 185,
116
+ "nueve": 326,
117
+ "¿": 126,
118
+ "renta": 292,
119
+ "²": 113,
120
+ "2": 20,
121
+ "Ĺ": 248,
122
+ "H": 42,
123
+ "il": 284,
124
+ "B": 36,
125
  "setenta": 299,
126
+ "ÿ": 190,
127
+ "Ã": 130,
128
+ "Ñ": 144,
129
+ "s": 85,
130
+ "Ě": 217,
131
+ "once": 340,
132
+ "ā": 192,
133
+ "Ø": 151,
134
+ "·": 118,
135
+ "ie": 286,
136
  "Õ": 148,
137
+ "treinta": 318,
138
+ "setecientos": 317,
139
+ "Ĭ": 235,
140
+ "veint": 302,
141
+ "uno": 323,
142
+ "k": 77,
143
+ "V": 56,
144
+ "ı": 240,
 
 
 
 
145
  "A": 35,
146
+ "co": 321,
147
+ "Đ": 207,
148
+ "ĥ": 228,
149
+ "ľ": 253,
150
+ "trece": 344,
151
+ "se": 267,
152
+ "doce": 364,
153
+ "dÃ": 336,
154
+ "@": 34,
155
+ "Û": 154,
156
+ "U": 55,
157
  "f": 72,
158
+ "ī": 234,
159
+ "ochocientos": 305,
160
+ "ientos": 291,
161
+ ",": 14,
162
+ "t": 86,
163
+ "6": 24,
164
+ "K": 45,
165
+ "¤": 100,
166
+ "ª": 106,
167
+ "Ú": 153,
168
+ "»": 122,
169
+ "ĺ": 249,
170
+ "Ŀ": 254,
171
+ "Ę": 215,
172
+ "e": 71,
173
+ "±": 112,
174
  "ueve": 295,
175
+ "*": 12,
176
+ "á": 160,
177
+ "E": 39,
178
+ "<pad>": 2,
179
+ "5": 23,
180
+ "mil": 285,
181
+ "ĵ": 244,
182
+ "Â": 129,
183
+ "1": 19,
184
+ "p": 82,
185
+ "¦": 102,
186
+ "venta": 296,
187
+ "int": 276,
188
+ "Ă": 193,
189
+ "Ć": 197,
190
+ "ē": 210,
191
+ "veintitrés": 366,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  "Ė": 213,
193
+ "te": 278,
194
+ "¼": 123,
195
+ "£": 99,
196
+ "å": 164,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  "seiscientos": 313,
198
+ "cator": 363,
199
+ "ĉ": 200,
200
+ "veintinueve": 349,
201
+ "ý": 188,
202
+ "ħ": 230,
203
+ "ce": 329,
204
+ "dieci": 331,
205
+ "ocho": 303,
206
+ "`": 66,
207
+ "diecinueve": 357,
208
+ "ciento": 320,
209
  "diecisiete": 359,
210
+ "h": 74,
211
+ "cero": 371,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  "Ķ": 245,
213
+ "ochenta": 304,
214
+ "novecientos": 307,
215
+ "%": 7,
216
+ "is": 275,
217
+ "Ğ": 221,
218
+ "veintiocho": 350,
219
+ "µ": 116,
220
+ "¹": 120,
221
+ "¾": 125,
222
+ "trescientos": 314,
223
+ "â": 161,
224
+ "¡": 97,
225
+ "Ĕ": 211,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  "ve": 270,
227
+ "noventa": 306,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  "veinte": 348,
229
+ "Ď": 205,
230
+ "Á": 128,
231
+ "ĝ": 220,
232
+ "'": 9,
233
+ "veintiséis": 362,
 
 
 
 
 
 
 
 
 
 
234
  "T": 54,
235
+ "quinientos": 316,
236
+ "tre": 273,
237
+ "þ": 189,
238
+ "veinticinco": 352,
239
+ "!": 3,
240
+ "j": 76,
241
+ "veintis": 332,
242
+ "cincuenta": 310,
243
+ "Ģ": 225,
244
+ "C": 37,
245
+ "or": 339,
246
+ "Ê": 137,
247
+ "}": 95,
248
+ "tenta": 294,
249
+ "u": 87,
250
+ "ć": 198,
251
+ "quince": 347,
252
+ "(": 10,
253
+ "ġ": 224,
254
  "÷": 182,
255
+ "7": 25,
256
+ "z": 92,
257
+ "&": 8,
258
+ "ë": 170,
259
+ "veintidÃ": 354,
 
 
 
 
260
  "¬": 108,
261
+ "v": 88,
262
+ "Ç": 134,
 
 
 
 
 
263
  "cuatr": 280,
 
 
 
 
 
 
 
 
 
 
 
264
  "°": 111,
265
+ "nt": 259,
266
+ "cuenta": 300,
267
+ "^": 64,
268
+ ")": 11,
269
+ "S": 53,
270
+ "quin": 289,
271
+ "cuatrocientos": 311,
272
+ "cientos": 264,
273
  "Å": 132,
274
+ "tres": 283,
275
+ "Y": 59,
276
+ "ent": 260,
277
+ "treint": 308,
 
 
 
 
 
 
 
 
 
 
 
278
  "sete": 297,
279
+ "Ē": 209,
280
+ "º": 121,
281
+ "cin": 279,
282
+ "õ": 180,
283
+ "G": 41,
284
+ "doc": 337,
285
+ "Ľ": 252,
286
+ "l": 78,
287
+ "cu": 268,
288
+ "-": 15,
289
+ "đ": 208,
290
+ "ß": 158,
291
+ "dieciocho": 358,
292
+ "en": 368,
293
+ "Z": 60,
294
+ "O": 49,
295
+ "Þ": 157,
296
+ "_": 65,
297
  "no": 272,
298
+ "Ó": 146,
299
+ "iete": 315,
300
+ "Ë": 138,
301
+ "Ġ": 223,
302
+ "veintisiete": 361,
303
+ "ļ": 251,
304
+ "eve": 290,
305
+ "doscientos": 312,
306
+ "b": 68,
307
+ ";": 29,
308
+ "Ò": 145,
309
+ "«": 107,
310
+ "ó": 178,
311
+ "ü": 187,
312
+ "cuatro": 325,
313
+ "ro": 370,
314
+ "8": 26,
315
+ "¸": 119,
316
+ "Ö": 149,
317
+ "in": 287,
318
+ "Í": 140,
319
+ "¨": 104,
320
+ "ù": 184,
321
+ "q": 83,
322
  "Ï": 142,
 
 
 
 
 
323
  "ģ": 226,
324
+ "į": 238,
325
+ "ŀ": 255,
326
+ "d": 70,
327
+ "ö": 181,
328
+ ".": 16,
329
+ "os": 262,
330
  "veintidós": 365,
331
+ "È": 135,
332
+ "ĩ": 232,
333
+ "ñ": 176,
334
+ "|": 94,
335
+ "Æ": 133,
336
+ "{": 93,
337
+ "Į": 237,
 
 
 
 
 
 
338
  "<s>": 0,
339
+ "½": 124,
340
+ "séis": 341,
341
+ "<": 30,
342
+ "Î": 141,
343
+ "tré": 343,
344
+ "senta": 293,
345
+ "ğ": 222,
346
+ "ĸ": 247,
347
+ "É": 136,
348
+ "\"": 4,
349
+ "#": 5,
350
+ "inueve": 346,
351
+ "0": 18,
352
+ "Ċ": 201,
353
+ "cua": 274,
354
+ "M": 47,
355
+ "Ä": 131,
356
+ "och": 271,
357
+ "ø": 183,
358
+ "ci": 261,
359
+ "Ĝ": 219,
360
+ "g": 73,
361
+ "veinti": 327,
362
+ "éis": 333,
363
+ "IJ": 241,
364
+ "Ĉ": 199,
365
+ "ò": 177,
366
+ "ento": 319,
367
+ "Ô": 147,
368
+ "ċ": 202,
369
+ "ă": 194,
370
+ "$": 6,
371
+ "veintiuno": 351,
372
+ "®": 109,
373
+ "cinueve": 345
374
  }