gsaltintas commited on
Commit
87eb7cb
·
verified ·
1 Parent(s): a07183d

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +49 -0
  2. merges.txt +91 -0
  3. special_tokens_map.json +11 -0
  4. tokenizer.json +578 -0
  5. tokenizer_config.json +74 -0
  6. vocab.json +109 -0
README.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - und # ISO 639-3 code or "und" if not identifiable
5
+ tags:
6
+ - tokenizer
7
+ - bpe
8
+ - flexitok
9
+ - fineweb2
10
+ ---
11
+
12
+ # Byte-Level BPE Tokenizer: numeric (0K)
13
+
14
+ A **Byte-Level BPE** tokenizer trained on **numeric** data from Fineweb-2-HQ.
15
+
16
+ ## Training Details
17
+
18
+ | Parameter | Value |
19
+ |-----------|-------|
20
+ | Algorithm | Byte-Level BPE |
21
+ | Language | `numeric` |
22
+ | Target Vocab Size | 107 |
23
+ | Final Vocab Size | 107 |
24
+ | Pre-tokenizer | byte_level |
25
+ | Number handling | ltr_2digit |
26
+ | Contraction handling | False |
27
+ | Normalizer | NONE |
28
+ | Special Tokens | `<s>`, `</s>`, `<pad>`, `<unk>` |
29
+ | Training Shards | 1 |
30
+
31
+ ## Usage
32
+
33
+ ```python
34
+ from transformers import AutoTokenizer
35
+
36
+ tokenizer = AutoTokenizer.from_pretrained("None")
37
+ tokens = tokenizer.encode("Hello, world!")
38
+ ```
39
+
40
+ ## Files
41
+
42
+ - `tokenizer.json` — Full HuggingFace tokenizer
43
+ - `vocab.json` — Vocabulary mapping
44
+ - `merges.txt` — BPE merge rules
45
+
46
+ ## Sample Encoding
47
+ | Text | Tokens | Token IDs |
48
+ |------|--------|-----------|
49
+ | `123500119 mod 67` | `12, 35, 0, 0, 11, 9, , mod, , 67` | `19, 42, 7, 7, 18, 16, 6, 4, 6, 74` |
merges.txt ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ ['1', '0']
3
+ ['1', '1']
4
+ ['1', '2']
5
+ ['1', '3']
6
+ ['1', '4']
7
+ ['1', '5']
8
+ ['1', '6']
9
+ ['1', '7']
10
+ ['1', '8']
11
+ ['1', '9']
12
+ ['2', '0']
13
+ ['2', '1']
14
+ ['2', '2']
15
+ ['2', '3']
16
+ ['2', '4']
17
+ ['2', '5']
18
+ ['2', '6']
19
+ ['2', '7']
20
+ ['2', '8']
21
+ ['2', '9']
22
+ ['3', '0']
23
+ ['3', '1']
24
+ ['3', '2']
25
+ ['3', '3']
26
+ ['3', '4']
27
+ ['3', '5']
28
+ ['3', '6']
29
+ ['3', '7']
30
+ ['3', '8']
31
+ ['3', '9']
32
+ ['4', '0']
33
+ ['4', '1']
34
+ ['4', '2']
35
+ ['4', '3']
36
+ ['4', '4']
37
+ ['4', '5']
38
+ ['4', '6']
39
+ ['4', '7']
40
+ ['4', '8']
41
+ ['4', '9']
42
+ ['5', '0']
43
+ ['5', '1']
44
+ ['5', '2']
45
+ ['5', '3']
46
+ ['5', '4']
47
+ ['5', '5']
48
+ ['5', '6']
49
+ ['5', '7']
50
+ ['5', '8']
51
+ ['5', '9']
52
+ ['6', '0']
53
+ ['6', '1']
54
+ ['6', '2']
55
+ ['6', '3']
56
+ ['6', '4']
57
+ ['6', '5']
58
+ ['6', '6']
59
+ ['6', '7']
60
+ ['6', '8']
61
+ ['6', '9']
62
+ ['7', '0']
63
+ ['7', '1']
64
+ ['7', '2']
65
+ ['7', '3']
66
+ ['7', '4']
67
+ ['7', '5']
68
+ ['7', '6']
69
+ ['7', '7']
70
+ ['7', '8']
71
+ ['7', '9']
72
+ ['8', '0']
73
+ ['8', '1']
74
+ ['8', '2']
75
+ ['8', '3']
76
+ ['8', '4']
77
+ ['8', '5']
78
+ ['8', '6']
79
+ ['8', '7']
80
+ ['8', '8']
81
+ ['8', '9']
82
+ ['9', '0']
83
+ ['9', '1']
84
+ ['9', '2']
85
+ ['9', '3']
86
+ ['9', '4']
87
+ ['9', '5']
88
+ ['9', '6']
89
+ ['9', '7']
90
+ ['9', '8']
91
+ ['9', '9']
special_tokens_map.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "mod",
4
+ "=",
5
+ " "
6
+ ],
7
+ "bos_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "pad_token": "<pad>",
10
+ "unk_token": "<unk>"
11
+ }
tokenizer.json ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<unk>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<s>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "</s>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<pad>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "mod",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 5,
53
+ "content": "=",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 6,
62
+ "content": " ",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
+ }
69
+ ],
70
+ "normalizer": null,
71
+ "pre_tokenizer": {
72
+ "type": "Sequence",
73
+ "pretokenizers": [
74
+ {
75
+ "type": "Split",
76
+ "pattern": {
77
+ "Regex": "\\p{N}{1,2}"
78
+ },
79
+ "behavior": "Isolated",
80
+ "invert": false
81
+ },
82
+ {
83
+ "type": "ByteLevel",
84
+ "add_prefix_space": false,
85
+ "trim_offsets": true,
86
+ "use_regex": false
87
+ }
88
+ ]
89
+ },
90
+ "post_processor": null,
91
+ "decoder": {
92
+ "type": "ByteLevel",
93
+ "add_prefix_space": true,
94
+ "trim_offsets": true,
95
+ "use_regex": true
96
+ },
97
+ "model": {
98
+ "type": "BPE",
99
+ "dropout": null,
100
+ "unk_token": "<unk>",
101
+ "continuing_subword_prefix": null,
102
+ "end_of_word_suffix": null,
103
+ "fuse_unk": false,
104
+ "byte_fallback": false,
105
+ "ignore_merges": false,
106
+ "vocab": {
107
+ "<unk>": 0,
108
+ "<s>": 1,
109
+ "</s>": 2,
110
+ "<pad>": 3,
111
+ "mod": 4,
112
+ "=": 5,
113
+ " ": 6,
114
+ "0": 7,
115
+ "1": 8,
116
+ "2": 9,
117
+ "3": 10,
118
+ "4": 11,
119
+ "5": 12,
120
+ "6": 13,
121
+ "7": 14,
122
+ "8": 15,
123
+ "9": 16,
124
+ "10": 17,
125
+ "11": 18,
126
+ "12": 19,
127
+ "13": 20,
128
+ "14": 21,
129
+ "15": 22,
130
+ "16": 23,
131
+ "17": 24,
132
+ "18": 25,
133
+ "19": 26,
134
+ "20": 27,
135
+ "21": 28,
136
+ "22": 29,
137
+ "23": 30,
138
+ "24": 31,
139
+ "25": 32,
140
+ "26": 33,
141
+ "27": 34,
142
+ "28": 35,
143
+ "29": 36,
144
+ "30": 37,
145
+ "31": 38,
146
+ "32": 39,
147
+ "33": 40,
148
+ "34": 41,
149
+ "35": 42,
150
+ "36": 43,
151
+ "37": 44,
152
+ "38": 45,
153
+ "39": 46,
154
+ "40": 47,
155
+ "41": 48,
156
+ "42": 49,
157
+ "43": 50,
158
+ "44": 51,
159
+ "45": 52,
160
+ "46": 53,
161
+ "47": 54,
162
+ "48": 55,
163
+ "49": 56,
164
+ "50": 57,
165
+ "51": 58,
166
+ "52": 59,
167
+ "53": 60,
168
+ "54": 61,
169
+ "55": 62,
170
+ "56": 63,
171
+ "57": 64,
172
+ "58": 65,
173
+ "59": 66,
174
+ "60": 67,
175
+ "61": 68,
176
+ "62": 69,
177
+ "63": 70,
178
+ "64": 71,
179
+ "65": 72,
180
+ "66": 73,
181
+ "67": 74,
182
+ "68": 75,
183
+ "69": 76,
184
+ "70": 77,
185
+ "71": 78,
186
+ "72": 79,
187
+ "73": 80,
188
+ "74": 81,
189
+ "75": 82,
190
+ "76": 83,
191
+ "77": 84,
192
+ "78": 85,
193
+ "79": 86,
194
+ "80": 87,
195
+ "81": 88,
196
+ "82": 89,
197
+ "83": 90,
198
+ "84": 91,
199
+ "85": 92,
200
+ "86": 93,
201
+ "87": 94,
202
+ "88": 95,
203
+ "89": 96,
204
+ "90": 97,
205
+ "91": 98,
206
+ "92": 99,
207
+ "93": 100,
208
+ "94": 101,
209
+ "95": 102,
210
+ "96": 103,
211
+ "97": 104,
212
+ "98": 105,
213
+ "99": 106
214
+ },
215
+ "merges": [
216
+ [
217
+ "1",
218
+ "0"
219
+ ],
220
+ [
221
+ "1",
222
+ "1"
223
+ ],
224
+ [
225
+ "1",
226
+ "2"
227
+ ],
228
+ [
229
+ "1",
230
+ "3"
231
+ ],
232
+ [
233
+ "1",
234
+ "4"
235
+ ],
236
+ [
237
+ "1",
238
+ "5"
239
+ ],
240
+ [
241
+ "1",
242
+ "6"
243
+ ],
244
+ [
245
+ "1",
246
+ "7"
247
+ ],
248
+ [
249
+ "1",
250
+ "8"
251
+ ],
252
+ [
253
+ "1",
254
+ "9"
255
+ ],
256
+ [
257
+ "2",
258
+ "0"
259
+ ],
260
+ [
261
+ "2",
262
+ "1"
263
+ ],
264
+ [
265
+ "2",
266
+ "2"
267
+ ],
268
+ [
269
+ "2",
270
+ "3"
271
+ ],
272
+ [
273
+ "2",
274
+ "4"
275
+ ],
276
+ [
277
+ "2",
278
+ "5"
279
+ ],
280
+ [
281
+ "2",
282
+ "6"
283
+ ],
284
+ [
285
+ "2",
286
+ "7"
287
+ ],
288
+ [
289
+ "2",
290
+ "8"
291
+ ],
292
+ [
293
+ "2",
294
+ "9"
295
+ ],
296
+ [
297
+ "3",
298
+ "0"
299
+ ],
300
+ [
301
+ "3",
302
+ "1"
303
+ ],
304
+ [
305
+ "3",
306
+ "2"
307
+ ],
308
+ [
309
+ "3",
310
+ "3"
311
+ ],
312
+ [
313
+ "3",
314
+ "4"
315
+ ],
316
+ [
317
+ "3",
318
+ "5"
319
+ ],
320
+ [
321
+ "3",
322
+ "6"
323
+ ],
324
+ [
325
+ "3",
326
+ "7"
327
+ ],
328
+ [
329
+ "3",
330
+ "8"
331
+ ],
332
+ [
333
+ "3",
334
+ "9"
335
+ ],
336
+ [
337
+ "4",
338
+ "0"
339
+ ],
340
+ [
341
+ "4",
342
+ "1"
343
+ ],
344
+ [
345
+ "4",
346
+ "2"
347
+ ],
348
+ [
349
+ "4",
350
+ "3"
351
+ ],
352
+ [
353
+ "4",
354
+ "4"
355
+ ],
356
+ [
357
+ "4",
358
+ "5"
359
+ ],
360
+ [
361
+ "4",
362
+ "6"
363
+ ],
364
+ [
365
+ "4",
366
+ "7"
367
+ ],
368
+ [
369
+ "4",
370
+ "8"
371
+ ],
372
+ [
373
+ "4",
374
+ "9"
375
+ ],
376
+ [
377
+ "5",
378
+ "0"
379
+ ],
380
+ [
381
+ "5",
382
+ "1"
383
+ ],
384
+ [
385
+ "5",
386
+ "2"
387
+ ],
388
+ [
389
+ "5",
390
+ "3"
391
+ ],
392
+ [
393
+ "5",
394
+ "4"
395
+ ],
396
+ [
397
+ "5",
398
+ "5"
399
+ ],
400
+ [
401
+ "5",
402
+ "6"
403
+ ],
404
+ [
405
+ "5",
406
+ "7"
407
+ ],
408
+ [
409
+ "5",
410
+ "8"
411
+ ],
412
+ [
413
+ "5",
414
+ "9"
415
+ ],
416
+ [
417
+ "6",
418
+ "0"
419
+ ],
420
+ [
421
+ "6",
422
+ "1"
423
+ ],
424
+ [
425
+ "6",
426
+ "2"
427
+ ],
428
+ [
429
+ "6",
430
+ "3"
431
+ ],
432
+ [
433
+ "6",
434
+ "4"
435
+ ],
436
+ [
437
+ "6",
438
+ "5"
439
+ ],
440
+ [
441
+ "6",
442
+ "6"
443
+ ],
444
+ [
445
+ "6",
446
+ "7"
447
+ ],
448
+ [
449
+ "6",
450
+ "8"
451
+ ],
452
+ [
453
+ "6",
454
+ "9"
455
+ ],
456
+ [
457
+ "7",
458
+ "0"
459
+ ],
460
+ [
461
+ "7",
462
+ "1"
463
+ ],
464
+ [
465
+ "7",
466
+ "2"
467
+ ],
468
+ [
469
+ "7",
470
+ "3"
471
+ ],
472
+ [
473
+ "7",
474
+ "4"
475
+ ],
476
+ [
477
+ "7",
478
+ "5"
479
+ ],
480
+ [
481
+ "7",
482
+ "6"
483
+ ],
484
+ [
485
+ "7",
486
+ "7"
487
+ ],
488
+ [
489
+ "7",
490
+ "8"
491
+ ],
492
+ [
493
+ "7",
494
+ "9"
495
+ ],
496
+ [
497
+ "8",
498
+ "0"
499
+ ],
500
+ [
501
+ "8",
502
+ "1"
503
+ ],
504
+ [
505
+ "8",
506
+ "2"
507
+ ],
508
+ [
509
+ "8",
510
+ "3"
511
+ ],
512
+ [
513
+ "8",
514
+ "4"
515
+ ],
516
+ [
517
+ "8",
518
+ "5"
519
+ ],
520
+ [
521
+ "8",
522
+ "6"
523
+ ],
524
+ [
525
+ "8",
526
+ "7"
527
+ ],
528
+ [
529
+ "8",
530
+ "8"
531
+ ],
532
+ [
533
+ "8",
534
+ "9"
535
+ ],
536
+ [
537
+ "9",
538
+ "0"
539
+ ],
540
+ [
541
+ "9",
542
+ "1"
543
+ ],
544
+ [
545
+ "9",
546
+ "2"
547
+ ],
548
+ [
549
+ "9",
550
+ "3"
551
+ ],
552
+ [
553
+ "9",
554
+ "4"
555
+ ],
556
+ [
557
+ "9",
558
+ "5"
559
+ ],
560
+ [
561
+ "9",
562
+ "6"
563
+ ],
564
+ [
565
+ "9",
566
+ "7"
567
+ ],
568
+ [
569
+ "9",
570
+ "8"
571
+ ],
572
+ [
573
+ "9",
574
+ "9"
575
+ ]
576
+ ]
577
+ }
578
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<pad>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "mod",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "=",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": " ",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ }
59
+ },
60
+ "additional_special_tokens": [
61
+ "mod",
62
+ "=",
63
+ " "
64
+ ],
65
+ "bos_token": "<s>",
66
+ "clean_up_tokenization_spaces": false,
67
+ "eos_token": "</s>",
68
+ "extra_special_tokens": {},
69
+ "model_max_length": 1000000000000000019884624838656,
70
+ "pad_token": "<pad>",
71
+ "tokenizer_class": "PreTrainedTokenizerFast",
72
+ "unk_token": "<unk>",
73
+ "number_handling": "ltr_2digit"
74
+ }
vocab.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "33": 40,
3
+ "29": 36,
4
+ "42": 49,
5
+ "91": 98,
6
+ "<unk>": 0,
7
+ "10": 17,
8
+ "89": 96,
9
+ "17": 24,
10
+ "43": 50,
11
+ "97": 104,
12
+ "50": 57,
13
+ "74": 81,
14
+ "60": 67,
15
+ "65": 72,
16
+ "mod": 4,
17
+ "4": 11,
18
+ "47": 54,
19
+ "92": 99,
20
+ "66": 73,
21
+ "14": 21,
22
+ " ": 6,
23
+ "61": 68,
24
+ "22": 29,
25
+ "7": 14,
26
+ "30": 37,
27
+ "72": 79,
28
+ "69": 76,
29
+ "31": 38,
30
+ "52": 59,
31
+ "95": 102,
32
+ "64": 71,
33
+ "80": 87,
34
+ "58": 65,
35
+ "99": 106,
36
+ "98": 105,
37
+ "6": 13,
38
+ "93": 100,
39
+ "78": 85,
40
+ "20": 27,
41
+ "</s>": 2,
42
+ "32": 39,
43
+ "5": 12,
44
+ "23": 30,
45
+ "38": 45,
46
+ "76": 83,
47
+ "24": 31,
48
+ "44": 51,
49
+ "55": 62,
50
+ "25": 32,
51
+ "2": 9,
52
+ "62": 69,
53
+ "12": 19,
54
+ "35": 42,
55
+ "28": 35,
56
+ "54": 61,
57
+ "39": 46,
58
+ "49": 56,
59
+ "15": 22,
60
+ "94": 101,
61
+ "84": 91,
62
+ "45": 52,
63
+ "71": 78,
64
+ "57": 64,
65
+ "86": 93,
66
+ "<s>": 1,
67
+ "46": 53,
68
+ "73": 80,
69
+ "88": 95,
70
+ "19": 26,
71
+ "81": 88,
72
+ "37": 44,
73
+ "68": 75,
74
+ "40": 47,
75
+ "90": 97,
76
+ "59": 66,
77
+ "83": 90,
78
+ "79": 86,
79
+ "<pad>": 3,
80
+ "13": 20,
81
+ "96": 103,
82
+ "70": 77,
83
+ "53": 60,
84
+ "82": 89,
85
+ "34": 41,
86
+ "3": 10,
87
+ "41": 48,
88
+ "21": 28,
89
+ "48": 55,
90
+ "85": 92,
91
+ "51": 58,
92
+ "16": 23,
93
+ "9": 16,
94
+ "63": 70,
95
+ "1": 8,
96
+ "26": 33,
97
+ "75": 82,
98
+ "67": 74,
99
+ "87": 94,
100
+ "77": 84,
101
+ "11": 18,
102
+ "18": 25,
103
+ "8": 15,
104
+ "56": 63,
105
+ "27": 34,
106
+ "36": 43,
107
+ "0": 7,
108
+ "=": 5
109
+ }