codebyzeb commited on
Commit
2796ec0
·
verified ·
1 Parent(s): 7a0464f

Delete fw57M_Entropy_threshold_600

Browse files
fw57M_Entropy_threshold_600/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Entropy_threshold_600/stats.csv DELETED
@@ -1,114 +0,0 @@
1
- num_moves,vocab_size,unique_segments,threshold
2
- 0,515,1,5.133760083708694e-08
3
- 1000,516,20,9.797513484954834e-07
4
- 2000,516,25,1.4308184290712234e-06
5
- 3000,516,29,1.8228590761282248e-06
6
- 4000,517,32,2.1988930711813737e-06
7
- 5000,517,36,2.524690671634744e-06
8
- 6000,517,42,2.8358606414258247e-06
9
- 7000,517,46,3.12660563395184e-06
10
- 8000,517,49,3.4393651731079444e-06
11
- 9000,517,51,3.721004304679809e-06
12
- 10000,517,53,4.0115287447406445e-06
13
- 11000,517,57,4.295654434827156e-06
14
- 12000,517,59,4.567701580526773e-06
15
- 13000,518,65,4.852824531553779e-06
16
- 14000,518,69,5.1093265938106924e-06
17
- 15000,518,80,5.373336534830742e-06
18
- 16000,518,82,5.648853402817622e-06
19
- 17000,518,86,5.91430944041349e-06
20
- 18000,518,92,6.171561381052015e-06
21
- 19000,519,95,6.423524609999731e-06
22
- 20000,520,97,6.6993416112381965e-06
23
- 21000,520,100,6.9611223807442e-06
24
- 22000,520,107,7.221975920401746e-06
25
- 23000,520,113,7.476828159269644e-06
26
- 24000,520,119,7.740423825453036e-06
27
- 25000,520,124,8.002068170753773e-06
28
- 26000,520,127,8.264540156233124e-06
29
- 27000,520,131,8.528781108907424e-06
30
- 28000,520,133,8.775015885476023e-06
31
- 29000,521,136,9.042313649842981e-06
32
- 30000,522,139,9.290799425798468e-06
33
- 31000,522,145,9.529902854410466e-06
34
- 32000,522,145,9.800556654226966e-06
35
- 33000,523,152,1.0072719305753708e-05
36
- 34000,523,158,1.034065553540131e-05
37
- 35000,523,162,1.0616102372296154e-05
38
- 36000,524,168,1.089130910258973e-05
39
- 37000,525,171,1.1162328519276343e-05
40
- 38000,527,174,1.1432365681685042e-05
41
- 39000,527,175,1.1706822078849655e-05
42
- 40000,527,179,1.1987401194346603e-05
43
- 41000,529,183,1.2248384336999152e-05
44
- 42000,531,189,1.2501177479862235e-05
45
- 43000,532,194,1.278531999560073e-05
46
- 44000,532,197,1.3077251423965208e-05
47
- 45000,533,201,1.3361139281187207e-05
48
- 46000,534,205,1.3648355889017694e-05
49
- 47000,536,208,1.3923853657615837e-05
50
- 48000,536,212,1.4211380403139628e-05
51
- 49000,537,215,1.4509694665321149e-05
52
- 50000,538,216,1.4816629118286073e-05
53
- 51000,538,220,1.517553573648911e-05
54
- 52000,538,222,1.558921576361172e-05
55
- 53000,539,226,1.612767664482817e-05
56
- 54000,540,230,1.6818024960230105e-05
57
- 55000,541,235,1.7545489754411392e-05
58
- 56000,541,237,1.8080252630170435e-05
59
- 57000,541,240,1.8548524167272262e-05
60
- 58000,545,243,1.8917002307716757e-05
61
- 59000,546,245,1.9251374396844767e-05
62
- 60000,546,248,1.960298504855018e-05
63
- 61000,547,250,1.9936958778998815e-05
64
- 62000,547,256,2.026425499934703e-05
65
- 63000,549,262,2.0577797840815037e-05
66
- 64000,549,265,2.0897972717648372e-05
67
- 65000,551,269,2.1224368538241833e-05
68
- 66000,555,272,2.153844434360508e-05
69
- 67000,556,277,2.1878036932321265e-05
70
- 68000,558,280,2.219563612015918e-05
71
- 69000,559,286,2.251324440294411e-05
72
- 70000,560,287,2.2819762307335623e-05
73
- 71000,561,292,2.312927790626418e-05
74
- 72000,562,297,2.3458516807295382e-05
75
- 73000,564,304,2.379106626904104e-05
76
- 74000,565,307,2.4128063159878366e-05
77
- 75000,566,309,2.4470087737427093e-05
78
- 76000,567,315,2.4811053663142957e-05
79
- 77000,568,318,2.5139925128314644e-05
80
- 78000,568,328,2.5478777388343588e-05
81
- 79000,569,332,2.5811543309828267e-05
82
- 80000,572,336,2.6150722987949848e-05
83
- 81000,575,341,2.649636189744342e-05
84
- 82000,576,343,2.684458922885824e-05
85
- 83000,578,345,2.7200831027585082e-05
86
- 84000,578,350,2.755924651864916e-05
87
- 85000,578,354,2.7887024771189317e-05
88
- 86000,578,359,2.8231646865606308e-05
89
- 87000,578,361,2.8580387152032927e-05
90
- 88000,580,364,2.8935428417753428e-05
91
- 89000,580,370,2.9296967113623396e-05
92
- 90000,581,375,2.9660697691724636e-05
93
- 91000,582,378,3.002801167895086e-05
94
- 92000,582,382,3.039792864001356e-05
95
- 93000,582,387,3.074665801250376e-05
96
- 94000,582,390,3.110373654635623e-05
97
- 95000,583,395,3.150007250951603e-05
98
- 96000,584,397,3.189584822393954e-05
99
- 97000,585,400,3.224884130759165e-05
100
- 98000,588,403,3.264015685999766e-05
101
- 99000,590,409,3.302649565739557e-05
102
- 100000,591,412,3.339936301927082e-05
103
- 101000,592,420,3.377728717168793e-05
104
- 102000,593,424,3.417667176108807e-05
105
- 103000,593,429,3.4572040021885186e-05
106
- 104000,593,439,3.49707443092484e-05
107
- 105000,593,447,3.538808960001916e-05
108
- 106000,594,451,3.5759072488872334e-05
109
- 107000,594,452,3.614377783378586e-05
110
- 108000,595,453,3.6534518585540354e-05
111
- 109000,596,456,3.69427289115265e-05
112
- 110000,597,462,3.7310252082534134e-05
113
- 111000,598,464,3.769687100430019e-05
114
- 112000,600,471,3.812055001617409e-05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_threshold_600/tokenizer.json DELETED
@@ -1,665 +0,0 @@
1
- {
2
- "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
5
- "added_tokens": [
6
- {
7
- "id": 0,
8
- "content": "<|padding|>",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
- {
16
- "id": 1,
17
- "content": "<|endoftext|>",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
- },
24
- {
25
- "id": 514,
26
- "content": "<|unk|>",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- }
33
- ],
34
- "normalizer": {
35
- "type": "Sequence",
36
- "normalizers": [
37
- {
38
- "type": "NFD"
39
- }
40
- ]
41
- },
42
- "pre_tokenizer": {
43
- "type": "WhitespaceSplit"
44
- },
45
- "post_processor": {
46
- "type": "ByteLevel",
47
- "add_prefix_space": true,
48
- "trim_offsets": true,
49
- "use_regex": true
50
- },
51
- "decoder": {
52
- "type": "ByteLevel",
53
- "add_prefix_space": true,
54
- "trim_offsets": true,
55
- "use_regex": true
56
- },
57
- "model": {
58
- "type": "WordPiece",
59
- "unk_token": "<|unk|>",
60
- "continuing_subword_prefix": "##",
61
- "max_input_chars_per_word": 100,
62
- "vocab": {
63
- "<|padding|>": 0,
64
- "<|endoftext|>": 1,
65
- "!": 2,
66
- "\"": 3,
67
- "#": 4,
68
- "$": 5,
69
- "%": 6,
70
- "&": 7,
71
- "'": 8,
72
- "(": 9,
73
- ")": 10,
74
- "*": 11,
75
- "+": 12,
76
- ",": 13,
77
- "-": 14,
78
- ".": 15,
79
- "/": 16,
80
- "0": 17,
81
- "1": 18,
82
- "2": 19,
83
- "3": 20,
84
- "4": 21,
85
- "5": 22,
86
- "6": 23,
87
- "7": 24,
88
- "8": 25,
89
- "9": 26,
90
- ":": 27,
91
- ";": 28,
92
- "<": 29,
93
- "=": 30,
94
- ">": 31,
95
- "?": 32,
96
- "@": 33,
97
- "A": 34,
98
- "B": 35,
99
- "C": 36,
100
- "D": 37,
101
- "E": 38,
102
- "F": 39,
103
- "G": 40,
104
- "H": 41,
105
- "I": 42,
106
- "J": 43,
107
- "K": 44,
108
- "L": 45,
109
- "M": 46,
110
- "N": 47,
111
- "O": 48,
112
- "P": 49,
113
- "Q": 50,
114
- "R": 51,
115
- "S": 52,
116
- "T": 53,
117
- "U": 54,
118
- "V": 55,
119
- "W": 56,
120
- "X": 57,
121
- "Y": 58,
122
- "Z": 59,
123
- "[": 60,
124
- "\\": 61,
125
- "]": 62,
126
- "^": 63,
127
- "_": 64,
128
- "`": 65,
129
- "a": 66,
130
- "b": 67,
131
- "c": 68,
132
- "d": 69,
133
- "e": 70,
134
- "f": 71,
135
- "g": 72,
136
- "h": 73,
137
- "i": 74,
138
- "j": 75,
139
- "k": 76,
140
- "l": 77,
141
- "m": 78,
142
- "n": 79,
143
- "o": 80,
144
- "p": 81,
145
- "q": 82,
146
- "r": 83,
147
- "s": 84,
148
- "t": 85,
149
- "u": 86,
150
- "v": 87,
151
- "w": 88,
152
- "x": 89,
153
- "y": 90,
154
- "z": 91,
155
- "{": 92,
156
- "|": 93,
157
- "}": 94,
158
- "~": 95,
159
- "¡": 96,
160
- "¢": 97,
161
- "£": 98,
162
- "¤": 99,
163
- "¥": 100,
164
- "¦": 101,
165
- "§": 102,
166
- "¨": 103,
167
- "©": 104,
168
- "ª": 105,
169
- "«": 106,
170
- "¬": 107,
171
- "®": 108,
172
- "¯": 109,
173
- "°": 110,
174
- "±": 111,
175
- "²": 112,
176
- "³": 113,
177
- "´": 114,
178
- "µ": 115,
179
- "¶": 116,
180
- "·": 117,
181
- "¸": 118,
182
- "¹": 119,
183
- "º": 120,
184
- "»": 121,
185
- "¼": 122,
186
- "½": 123,
187
- "¾": 124,
188
- "¿": 125,
189
- "À": 126,
190
- "Á": 127,
191
- "Â": 128,
192
- "Ã": 129,
193
- "Ä": 130,
194
- "Å": 131,
195
- "Æ": 132,
196
- "Ç": 133,
197
- "È": 134,
198
- "É": 135,
199
- "Ê": 136,
200
- "Ë": 137,
201
- "Ì": 138,
202
- "Í": 139,
203
- "Î": 140,
204
- "Ï": 141,
205
- "Ð": 142,
206
- "Ñ": 143,
207
- "Ò": 144,
208
- "Ó": 145,
209
- "Ô": 146,
210
- "Õ": 147,
211
- "Ö": 148,
212
- "×": 149,
213
- "Ø": 150,
214
- "Ù": 151,
215
- "Ú": 152,
216
- "Û": 153,
217
- "Ü": 154,
218
- "Ý": 155,
219
- "Þ": 156,
220
- "ß": 157,
221
- "à": 158,
222
- "á": 159,
223
- "â": 160,
224
- "ã": 161,
225
- "ä": 162,
226
- "å": 163,
227
- "æ": 164,
228
- "ç": 165,
229
- "è": 166,
230
- "é": 167,
231
- "ê": 168,
232
- "ë": 169,
233
- "ì": 170,
234
- "í": 171,
235
- "î": 172,
236
- "ï": 173,
237
- "ð": 174,
238
- "ñ": 175,
239
- "ò": 176,
240
- "ó": 177,
241
- "ô": 178,
242
- "õ": 179,
243
- "ö": 180,
244
- "÷": 181,
245
- "ø": 182,
246
- "ù": 183,
247
- "ú": 184,
248
- "û": 185,
249
- "ü": 186,
250
- "ý": 187,
251
- "þ": 188,
252
- "ÿ": 189,
253
- "Ā": 190,
254
- "ā": 191,
255
- "Ă": 192,
256
- "ă": 193,
257
- "Ą": 194,
258
- "ą": 195,
259
- "Ć": 196,
260
- "ć": 197,
261
- "Ĉ": 198,
262
- "ĉ": 199,
263
- "Ċ": 200,
264
- "ċ": 201,
265
- "Č": 202,
266
- "č": 203,
267
- "Ď": 204,
268
- "ď": 205,
269
- "Đ": 206,
270
- "đ": 207,
271
- "Ē": 208,
272
- "ē": 209,
273
- "Ĕ": 210,
274
- "ĕ": 211,
275
- "Ė": 212,
276
- "ė": 213,
277
- "Ę": 214,
278
- "ę": 215,
279
- "Ě": 216,
280
- "ě": 217,
281
- "Ĝ": 218,
282
- "ĝ": 219,
283
- "Ğ": 220,
284
- "ğ": 221,
285
- "Ġ": 222,
286
- "ġ": 223,
287
- "Ģ": 224,
288
- "ģ": 225,
289
- "Ĥ": 226,
290
- "ĥ": 227,
291
- "Ħ": 228,
292
- "ħ": 229,
293
- "Ĩ": 230,
294
- "ĩ": 231,
295
- "Ī": 232,
296
- "ī": 233,
297
- "Ĭ": 234,
298
- "ĭ": 235,
299
- "Į": 236,
300
- "į": 237,
301
- "İ": 238,
302
- "ı": 239,
303
- "IJ": 240,
304
- "ij": 241,
305
- "Ĵ": 242,
306
- "ĵ": 243,
307
- "Ķ": 244,
308
- "ķ": 245,
309
- "ĸ": 246,
310
- "Ĺ": 247,
311
- "ĺ": 248,
312
- "Ļ": 249,
313
- "ļ": 250,
314
- "Ľ": 251,
315
- "ľ": 252,
316
- "Ŀ": 253,
317
- "ŀ": 254,
318
- "Ł": 255,
319
- "ł": 256,
320
- "Ń": 257,
321
- "##A": 258,
322
- "##-": 259,
323
- "##¤": 260,
324
- "##ı": 261,
325
- "##ù": 262,
326
- "##Ł": 263,
327
- "##u": 264,
328
- "##V": 265,
329
- "##Ī": 266,
330
- "##ĩ": 267,
331
- "##Ā": 268,
332
- "##ij": 269,
333
- "##ĸ": 270,
334
- "##·": 271,
335
- "##æ": 272,
336
- "##ĉ": 273,
337
- "##j": 274,
338
- "##è": 275,
339
- "##¦": 276,
340
- "##þ": 277,
341
- "##!": 278,
342
- "##~": 279,
343
- "##h": 280,
344
- "##Č": 281,
345
- "##ŀ": 282,
346
- "##}": 283,
347
- "##)": 284,
348
- "##¨": 285,
349
- "##[": 286,
350
- "##¢": 287,
351
- "##3": 288,
352
- "##<": 289,
353
- "##c": 290,
354
- "##Ã": 291,
355
- "##B": 292,
356
- "##Ø": 293,
357
- "##ĝ": 294,
358
- "##Ğ": 295,
359
- "##Ė": 296,
360
- "##e": 297,
361
- "##E": 298,
362
- "##Ĕ": 299,
363
- "##ģ": 300,
364
- "##á": 301,
365
- "##w": 302,
366
- "##ø": 303,
367
- "##«": 304,
368
- "##_": 305,
369
- "##đ": 306,
370
- "##Ě": 307,
371
- "##å": 308,
372
- "##³": 309,
373
- "##Ê": 310,
374
- "##%": 311,
375
- "##Ĩ": 312,
376
- "##°": 313,
377
- "##õ": 314,
378
- "##5": 315,
379
- "##p": 316,
380
- "##.": 317,
381
- "##¿": 318,
382
- "##ě": 319,
383
- "##ó": 320,
384
- "##IJ": 321,
385
- "##®": 322,
386
- "##ą": 323,
387
- "##9": 324,
388
- "##â": 325,
389
- "##ë": 326,
390
- "##ġ": 327,
391
- "##¡": 328,
392
- "##ì": 329,
393
- "##Ġ": 330,
394
- "##,": 331,
395
- "##º": 332,
396
- "##ÿ": 333,
397
- "##Þ": 334,
398
- "##ä": 335,
399
- "##S": 336,
400
- "##ĕ": 337,
401
- "##ĭ": 338,
402
- "##\\": 339,
403
- "##D": 340,
404
- "##¯": 341,
405
- "##ċ": 342,
406
- "##¸": 343,
407
- "##Ä": 344,
408
- "##$": 345,
409
- "##Ë": 346,
410
- "##î": 347,
411
- "##Į": 348,
412
- "##İ": 349,
413
- "##ľ": 350,
414
- "##Ö": 351,
415
- "##X": 352,
416
- "##>": 353,
417
- "##Ú": 354,
418
- "##ė": 355,
419
- "##M": 356,
420
- "##ħ": 357,
421
- "##J": 358,
422
- "##Í": 359,
423
- "##÷": 360,
424
- "##é": 361,
425
- "##Ď": 362,
426
- "##^": 363,
427
- "##¥": 364,
428
- "##µ": 365,
429
- "##ò": 366,
430
- "##;": 367,
431
- "##Ü": 368,
432
- "##1": 369,
433
- "##ü": 370,
434
- "##ĥ": 371,
435
- "##¹": 372,
436
- "##(": 373,
437
- "##Á": 374,
438
- "##¼": 375,
439
- "##Ì": 376,
440
- "##ö": 377,
441
- "##/": 378,
442
- "##»": 379,
443
- "##ķ": 380,
444
- "##©": 381,
445
- "##i": 382,
446
- "###": 383,
447
- "##ĺ": 384,
448
- "##ć": 385,
449
- "##Ñ": 386,
450
- "##Ĝ": 387,
451
- "##ð": 388,
452
- "##±": 389,
453
- "##È": 390,
454
- "##ý": 391,
455
- "##Å": 392,
456
- "##§": 393,
457
- "##Đ": 394,
458
- "##Ó": 395,
459
- "##m": 396,
460
- "##`": 397,
461
- "##F": 398,
462
- "##ï": 399,
463
- "##Y": 400,
464
- "##Ħ": 401,
465
- "##À": 402,
466
- "##?": 403,
467
- "##û": 404,
468
- "##+": 405,
469
- "##Æ": 406,
470
- "##į": 407,
471
- "##Ĵ": 408,
472
- "##í": 409,
473
- "##Ń": 410,
474
- "##Ă": 411,
475
- "##¶": 412,
476
- "##ī": 413,
477
- "##l": 414,
478
- "##Ð": 415,
479
- "##L": 416,
480
- "##Ĉ": 417,
481
- "##£": 418,
482
- "##ê": 419,
483
- "##o": 420,
484
- "##@": 421,
485
- "##Ŀ": 422,
486
- "##4": 423,
487
- "##¾": 424,
488
- "##Ċ": 425,
489
- "##ď": 426,
490
- "##O": 427,
491
- "##É": 428,
492
- "##U": 429,
493
- "##ã": 430,
494
- "##s": 431,
495
- "##Õ": 432,
496
- "##½": 433,
497
- "##ç": 434,
498
- "##{": 435,
499
- "##Ę": 436,
500
- "##Ç": 437,
501
- "##'": 438,
502
- "##Ļ": 439,
503
- "##=": 440,
504
- "##Z": 441,
505
- "##ă": 442,
506
- "##N": 443,
507
- "##8": 444,
508
- "##*": 445,
509
- "##´": 446,
510
- "##Ē": 447,
511
- "##ę": 448,
512
- "##v": 449,
513
- "##6": 450,
514
- "##&": 451,
515
- "##Ą": 452,
516
- "##H": 453,
517
- "##Ù": 454,
518
- "##z": 455,
519
- "##Ý": 456,
520
- "##f": 457,
521
- "##0": 458,
522
- "##Q": 459,
523
- "##Ć": 460,
524
- "##Û": 461,
525
- "##t": 462,
526
- "##y": 463,
527
- "##Ĺ": 464,
528
- "##Ï": 465,
529
- "##Ģ": 466,
530
- "##ļ": 467,
531
- "##d": 468,
532
- "##x": 469,
533
- "##k": 470,
534
- "##n": 471,
535
- "##2": 472,
536
- "##q": 473,
537
- "##|": 474,
538
- "##ú": 475,
539
- "##Ķ": 476,
540
- "##T": 477,
541
- "##ā": 478,
542
- "##ñ": 479,
543
- "##à": 480,
544
- "##ğ": 481,
545
- "##g": 482,
546
- "##¬": 483,
547
- "##Â": 484,
548
- "##Ĭ": 485,
549
- "##ł": 486,
550
- "##Ĥ": 487,
551
- "##a": 488,
552
- "##Ô": 489,
553
- "##Î": 490,
554
- "##K": 491,
555
- "##Ò": 492,
556
- "##b": 493,
557
- "##r": 494,
558
- "##ª": 495,
559
- "##ē": 496,
560
- "##\"": 497,
561
- "##ĵ": 498,
562
- "##R": 499,
563
- "##P": 500,
564
- "##ß": 501,
565
- "##Ľ": 502,
566
- "##ô": 503,
567
- "##]": 504,
568
- "##×": 505,
569
- "##7": 506,
570
- "##:": 507,
571
- "##²": 508,
572
- "##W": 509,
573
- "##č": 510,
574
- "##C": 511,
575
- "##G": 512,
576
- "##I": 513,
577
- "<|unk|>": 514,
578
- "##in": 515,
579
- "##�": 516,
580
- "##he": 517,
581
- "##en": 518,
582
- "##io": 519,
583
- "##me": 520,
584
- "##th": 521,
585
- "##pl": 522,
586
- "##es": 523,
587
- "##te": 524,
588
- "##the": 525,
589
- "##ie": 526,
590
- "##be": 527,
591
- "##ug": 528,
592
- "##ou": 529,
593
- "##ve": 530,
594
- "##men": 531,
595
- "##us": 532,
596
- "##ti": 533,
597
- "##an": 534,
598
- "##it": 535,
599
- "##ul": 536,
600
- "##ec": 537,
601
- "##de": 538,
602
- "�": 539,
603
- "##ar": 540,
604
- "##le": 541,
605
- "##ea": 542,
606
- "##ig": 543,
607
- "##er": 544,
608
- "##ag": 545,
609
- "##su": 546,
610
- "##as": 547,
611
- "##lo": 548,
612
- "##tu": 549,
613
- "##ev": 550,
614
- "##rc": 551,
615
- "##tio": 552,
616
- "##un": 553,
617
- "##nc": 554,
618
- "##opl": 555,
619
- "##ra": 556,
620
- "##hi": 557,
621
- "##el": 558,
622
- "##ce": 559,
623
- "##on": 560,
624
- "##ai": 561,
625
- "##au": 562,
626
- "##st": 563,
627
- "##ge": 564,
628
- "##ta": 565,
629
- "##im": 566,
630
- "##ne": 567,
631
- "##ca": 568,
632
- "##ur": 569,
633
- "##op": 570,
634
- "##il": 571,
635
- "##re": 572,
636
- "##mi": 573,
637
- "##is": 574,
638
- "##gh": 575,
639
- "##at": 576,
640
- "##ci": 577,
641
- "##wa": 578,
642
- "##to": 579,
643
- "##la": 580,
644
- "##id": 581,
645
- "##qu": 582,
646
- "##ad": 583,
647
- "##jec": 584,
648
- "##ic": 585,
649
- "##ia": 586,
650
- "##fu": 587,
651
- "##or": 588,
652
- "##ei": 589,
653
- "##na": 590,
654
- "##we": 591,
655
- "##lt": 592,
656
- "##cu": 593,
657
- "##no": 594,
658
- "##al": 595,
659
- "##ab": 596,
660
- "##iv": 597,
661
- "##ha": 598,
662
- "##ste": 599
663
- }
664
- }
665
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_threshold_600/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_threshold_600/vocab.json DELETED
@@ -1 +0,0 @@
1
- {"A": 34, "-": 14, "\u00a4": 99, "\u0131": 239, "\u00f9": 183, "\u0141": 255, "u": 86, "V": 55, "\u012a": 232, "\u0129": 231, "\u0100": 190, "\u0133": 241, "\u0138": 246, "\u00b7": 117, "\u00e6": 164, "\u0109": 199, "j": 75, "\u00e8": 166, "\u00a6": 101, "\u00fe": 188, "!": 2, "~": 95, "h": 73, "\u010c": 202, "\u0140": 254, "}": 94, ")": 10, "\u00a8": 103, "[": 60, "\u00a2": 97, "3": 20, "<": 29, "c": 68, "\u00c3": 129, "B": 35, "\u00d8": 150, "\u011d": 219, "\u011e": 220, "\u0116": 212, "e": 70, "E": 38, "\u0114": 210, "\u0123": 225, "\u00e1": 159, "w": 88, "\u00f8": 182, "\u00ab": 106, "_": 64, "\u0111": 207, "\u011a": 216, "\u00e5": 163, "\u00b3": 113, "\u00ca": 136, "%": 6, "\u0128": 230, "\u00b0": 110, "\u00f5": 179, "5": 22, "p": 81, ".": 15, "\u00bf": 125, "\u011b": 217, "\u00f3": 177, "\u0132": 240, "\u00ae": 108, "\u0105": 195, "9": 26, "\u00e2": 160, "\u00eb": 169, "\u0121": 223, "\u00a1": 96, "\u00ec": 170, "\u0120": 222, ",": 13, "\u00ba": 120, "\u00ff": 189, "\u00de": 156, "\u00e4": 162, "S": 52, "\u0115": 211, "\u012d": 235, "\\": 61, "D": 37, "\u00af": 109, "\u010b": 201, "\u00b8": 118, "\u00c4": 130, "$": 5, "\u00cb": 137, "\u00ee": 172, "\u012e": 236, "\u0130": 238, "\u013e": 252, "\u00d6": 148, "X": 57, ">": 31, "\u00da": 152, "\u0117": 213, "M": 46, "\u0127": 229, "J": 43, "\u00cd": 139, "\u00f7": 181, "\u00e9": 167, "\u010e": 204, "^": 63, "\u00a5": 100, "\u00b5": 115, "\u00f2": 176, ";": 28, "\u00dc": 154, "1": 18, "\u00fc": 186, "\u0125": 227, "\u00b9": 119, "(": 9, "\u00c1": 127, "\u00bc": 122, "\u00cc": 138, "\u00f6": 180, "/": 16, "\u00bb": 121, "\u0137": 245, "\u00a9": 104, "i": 74, "#": 4, "\u013a": 248, "\u0107": 197, "\u00d1": 143, "\u011c": 218, "\u00f0": 174, "<|endoftext|>": 1, "\u00b1": 111, "\u00c8": 134, "\u00fd": 187, "\u00c5": 131, "\u00a7": 102, "\u0110": 206, "\u00d3": 145, "m": 78, "`": 65, "F": 39, "\u00ef": 173, "Y": 58, "\u0126": 228, "\u00c0": 126, "?": 32, "\u00fb": 185, "+": 12, "\u00c6": 132, "\u012f": 237, "\u0134": 242, "\u00ed": 171, "\u0143": 257, "\u0102": 192, "\u00b6": 116, "\u012b": 233, "l": 77, "\u00d0": 142, "L": 45, "\u0108": 198, "\u00a3": 98, "\u00ea": 168, "o": 80, "@": 33, "\u013f": 253, "4": 21, "\u00be": 124, "\u010a": 200, "\u010f": 205, "O": 48, "\u00c9": 135, "U": 54, "\u00e3": 161, "s": 84, "\u00d5": 147, "\u00bd": 123, "\u00e7": 165, "{": 92, "\u0118": 214, "\u00c7": 133, "'": 8, "\u013b": 249, "=": 30, "Z": 59, "\u0103": 193, "N": 47, "8": 25, "*": 11, "\u00b4": 114, "\u0112": 208, "\u0119": 215, "v": 87, "6": 23, "&": 7, "\u0104": 194, "H": 41, "\u00d9": 151, "z": 91, "\u00dd": 155, "f": 71, "0": 17, "Q": 50, "\u0106": 196, "\u00db": 153, "t": 85, "y": 90, "\u0139": 247, "\u00cf": 141, "\u0122": 224, "\u013c": 250, "d": 69, "x": 89, "k": 76, "n": 79, "2": 19, "q": 82, "|": 93, "\u00fa": 184, "\u0136": 244, "T": 53, "\u0101": 191, "\u00f1": 175, "\u00e0": 158, "\u011f": 221, "g": 72, "\u00ac": 107, "\u00c2": 128, "\u012c": 234, "\u0142": 256, "\u0124": 226, "a": 66, "\u00d4": 146, "\u00ce": 140, "K": 44, "\u00d2": 144, "b": 67, "r": 83, "\u00aa": 105, "\u0113": 209, "\"": 3, "\u0135": 243, "R": 51, "P": 49, "\u00df": 157, "\u013d": 251, "\u00f4": 178, "]": 62, "\u00d7": 149, "7": 24, ":": 27, "\u00b2": 112, "W": 56, "\u010d": 203, "C": 36, "G": 40, "I": 42, "<|padding|>": 0, "##A": 258, "##-": 259, "##\u00a4": 260, "##\u0131": 261, "##\u00f9": 262, "##\u0141": 263, "##u": 264, "##V": 265, "##\u012a": 266, "##\u0129": 267, "##\u0100": 268, "##\u0133": 269, "##\u0138": 270, "##\u00b7": 271, "##\u00e6": 272, "##\u0109": 273, "##j": 274, "##\u00e8": 275, "##\u00a6": 276, "##\u00fe": 277, "##!": 278, "##~": 279, "##h": 280, "##\u010c": 281, "##\u0140": 282, "##}": 283, "##)": 284, "##\u00a8": 285, "##[": 286, "##\u00a2": 287, "##3": 288, "##<": 289, "##c": 290, "##\u00c3": 291, "##B": 292, "##\u00d8": 293, "##\u011d": 294, "##\u011e": 295, "##\u0116": 296, "##e": 297, "##E": 298, "##\u0114": 299, "##\u0123": 300, "##\u00e1": 301, "##w": 302, "##\u00f8": 303, "##\u00ab": 304, "##_": 305, "##\u0111": 306, "##\u011a": 307, "##\u00e5": 308, "##\u00b3": 309, "##\u00ca": 310, "##%": 311, "##\u0128": 312, "##\u00b0": 313, "##\u00f5": 314, "##5": 315, "##p": 316, "##.": 317, "##\u00bf": 318, "##\u011b": 319, "##\u00f3": 320, "##\u0132": 321, "##\u00ae": 322, "##\u0105": 323, "##9": 324, "##\u00e2": 325, "##\u00eb": 326, "##\u0121": 327, "##\u00a1": 328, "##\u00ec": 329, "##\u0120": 330, "##,": 331, "##\u00ba": 332, "##\u00ff": 333, "##\u00de": 334, "##\u00e4": 335, "##S": 336, "##\u0115": 337, "##\u012d": 338, "##\\": 339, "##D": 340, "##\u00af": 341, "##\u010b": 342, "##\u00b8": 343, "##\u00c4": 344, "##$": 345, "##\u00cb": 346, "##\u00ee": 347, "##\u012e": 348, "##\u0130": 349, "##\u013e": 350, "##\u00d6": 351, "##X": 352, "##>": 353, "##\u00da": 354, "##\u0117": 355, "##M": 356, "##\u0127": 357, "##J": 358, "##\u00cd": 359, "##\u00f7": 360, "##\u00e9": 361, "##\u010e": 362, "##^": 363, "##\u00a5": 364, "##\u00b5": 365, "##\u00f2": 366, "##;": 367, "##\u00dc": 368, "##1": 369, "##\u00fc": 370, "##\u0125": 371, "##\u00b9": 372, "##(": 373, "##\u00c1": 374, "##\u00bc": 375, "##\u00cc": 376, "##\u00f6": 377, "##/": 378, "##\u00bb": 379, "##\u0137": 380, "##\u00a9": 381, "##i": 382, "###": 383, "##\u013a": 384, "##\u0107": 385, "##\u00d1": 386, "##\u011c": 387, "##\u00f0": 388, "##\u00b1": 389, "##\u00c8": 390, "##\u00fd": 391, "##\u00c5": 392, "##\u00a7": 393, "##\u0110": 394, "##\u00d3": 395, "##m": 396, "##`": 397, "##F": 398, "##\u00ef": 399, "##Y": 400, "##\u0126": 401, "##\u00c0": 402, "##?": 403, "##\u00fb": 404, "##+": 405, "##\u00c6": 406, "##\u012f": 407, "##\u0134": 408, "##\u00ed": 409, "##\u0143": 410, "##\u0102": 411, "##\u00b6": 412, "##\u012b": 413, "##l": 414, "##\u00d0": 415, "##L": 416, "##\u0108": 417, "##\u00a3": 418, "##\u00ea": 419, "##o": 420, "##@": 421, "##\u013f": 422, "##4": 423, "##\u00be": 424, "##\u010a": 425, "##\u010f": 426, "##O": 427, "##\u00c9": 428, "##U": 429, "##\u00e3": 430, "##s": 431, "##\u00d5": 432, "##\u00bd": 433, "##\u00e7": 434, "##{": 435, "##\u0118": 436, "##\u00c7": 437, "##'": 438, "##\u013b": 439, "##=": 440, "##Z": 441, "##\u0103": 442, "##N": 443, "##8": 444, "##*": 445, "##\u00b4": 446, "##\u0112": 447, "##\u0119": 448, "##v": 449, "##6": 450, "##&": 451, "##\u0104": 452, "##H": 453, "##\u00d9": 454, "##z": 455, "##\u00dd": 456, "##f": 457, "##0": 458, "##Q": 459, "##\u0106": 460, "##\u00db": 461, "##t": 462, "##y": 463, "##\u0139": 464, "##\u00cf": 465, "##\u0122": 466, "##\u013c": 467, "##d": 468, "##x": 469, "##k": 470, "##n": 471, "##2": 472, "##q": 473, "##|": 474, "##\u00fa": 475, "##\u0136": 476, "##T": 477, "##\u0101": 478, "##\u00f1": 479, "##\u00e0": 480, "##\u011f": 481, "##g": 482, "##\u00ac": 483, "##\u00c2": 484, "##\u012c": 485, "##\u0142": 486, "##\u0124": 487, "##a": 488, "##\u00d4": 489, "##\u00ce": 490, "##K": 491, "##\u00d2": 492, "##b": 493, "##r": 494, "##\u00aa": 495, "##\u0113": 496, "##\"": 497, "##\u0135": 498, "##R": 499, "##P": 500, "##\u00df": 501, "##\u013d": 502, "##\u00f4": 503, "##]": 504, "##\u00d7": 505, "##7": 506, "##:": 507, "##\u00b2": 508, "##W": 509, "##\u010d": 510, "##C": 511, "##G": 512, "##I": 513, "<|unk|>": 514, "##in": 515, "##\ufffd": 516, "##he": 517, "##en": 518, "##io": 519, "##me": 520, "##th": 521, "##pl": 522, "##es": 523, "##te": 524, "##the": 525, "##ie": 526, "##be": 527, "##ug": 528, "##ou": 529, "##ve": 530, "##men": 531, "##us": 532, "##ti": 533, "##an": 534, "##it": 535, "##ul": 536, "##ec": 537, "##de": 538, "\ufffd": 539, "##ar": 540, "##le": 541, "##ea": 542, "##ig": 543, "##er": 544, "##ag": 545, "##su": 546, "##as": 547, "##lo": 548, "##tu": 549, "##ev": 550, "##rc": 551, "##tio": 552, "##un": 553, "##nc": 554, "##opl": 555, "##ra": 556, "##hi": 557, "##el": 558, "##ce": 559, "##on": 560, "##ai": 561, "##au": 562, "##st": 563, "##ge": 564, "##ta": 565, "##im": 566, "##ne": 567, "##ca": 568, "##ur": 569, "##op": 570, "##il": 571, "##re": 572, "##mi": 573, "##is": 574, "##gh": 575, "##at": 576, "##ci": 577, "##wa": 578, "##to": 579, "##la": 580, "##id": 581, "##qu": 582, "##ad": 583, "##jec": 584, "##ic": 585, "##ia": 586, "##fu": 587, "##or": 588, "##ei": 589, "##na": 590, "##we": 591, "##lt": 592, "##cu": 593, "##no": 594, "##al": 595, "##ab": 596, "##iv": 597, "##ha": 598, "##ste": 599}