codebyzeb commited on
Commit
7a0464f
·
verified ·
1 Parent(s): d6aebef

Upload folder using huggingface_hub

Browse files
fw57M_Entropy_threshold_600/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|padding|>",
5
+ "unk_token": "<|unk|>"
6
+ }
fw57M_Entropy_threshold_600/stats.csv ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ num_moves,vocab_size,unique_segments,threshold
2
+ 0,515,1,5.133760083708694e-08
3
+ 1000,516,20,9.797513484954834e-07
4
+ 2000,516,25,1.4308184290712234e-06
5
+ 3000,516,29,1.8228590761282248e-06
6
+ 4000,517,32,2.1988930711813737e-06
7
+ 5000,517,36,2.524690671634744e-06
8
+ 6000,517,42,2.8358606414258247e-06
9
+ 7000,517,46,3.12660563395184e-06
10
+ 8000,517,49,3.4393651731079444e-06
11
+ 9000,517,51,3.721004304679809e-06
12
+ 10000,517,53,4.0115287447406445e-06
13
+ 11000,517,57,4.295654434827156e-06
14
+ 12000,517,59,4.567701580526773e-06
15
+ 13000,518,65,4.852824531553779e-06
16
+ 14000,518,69,5.1093265938106924e-06
17
+ 15000,518,80,5.373336534830742e-06
18
+ 16000,518,82,5.648853402817622e-06
19
+ 17000,518,86,5.91430944041349e-06
20
+ 18000,518,92,6.171561381052015e-06
21
+ 19000,519,95,6.423524609999731e-06
22
+ 20000,520,97,6.6993416112381965e-06
23
+ 21000,520,100,6.9611223807442e-06
24
+ 22000,520,107,7.221975920401746e-06
25
+ 23000,520,113,7.476828159269644e-06
26
+ 24000,520,119,7.740423825453036e-06
27
+ 25000,520,124,8.002068170753773e-06
28
+ 26000,520,127,8.264540156233124e-06
29
+ 27000,520,131,8.528781108907424e-06
30
+ 28000,520,133,8.775015885476023e-06
31
+ 29000,521,136,9.042313649842981e-06
32
+ 30000,522,139,9.290799425798468e-06
33
+ 31000,522,145,9.529902854410466e-06
34
+ 32000,522,145,9.800556654226966e-06
35
+ 33000,523,152,1.0072719305753708e-05
36
+ 34000,523,158,1.034065553540131e-05
37
+ 35000,523,162,1.0616102372296154e-05
38
+ 36000,524,168,1.089130910258973e-05
39
+ 37000,525,171,1.1162328519276343e-05
40
+ 38000,527,174,1.1432365681685042e-05
41
+ 39000,527,175,1.1706822078849655e-05
42
+ 40000,527,179,1.1987401194346603e-05
43
+ 41000,529,183,1.2248384336999152e-05
44
+ 42000,531,189,1.2501177479862235e-05
45
+ 43000,532,194,1.278531999560073e-05
46
+ 44000,532,197,1.3077251423965208e-05
47
+ 45000,533,201,1.3361139281187207e-05
48
+ 46000,534,205,1.3648355889017694e-05
49
+ 47000,536,208,1.3923853657615837e-05
50
+ 48000,536,212,1.4211380403139628e-05
51
+ 49000,537,215,1.4509694665321149e-05
52
+ 50000,538,216,1.4816629118286073e-05
53
+ 51000,538,220,1.517553573648911e-05
54
+ 52000,538,222,1.558921576361172e-05
55
+ 53000,539,226,1.612767664482817e-05
56
+ 54000,540,230,1.6818024960230105e-05
57
+ 55000,541,235,1.7545489754411392e-05
58
+ 56000,541,237,1.8080252630170435e-05
59
+ 57000,541,240,1.8548524167272262e-05
60
+ 58000,545,243,1.8917002307716757e-05
61
+ 59000,546,245,1.9251374396844767e-05
62
+ 60000,546,248,1.960298504855018e-05
63
+ 61000,547,250,1.9936958778998815e-05
64
+ 62000,547,256,2.026425499934703e-05
65
+ 63000,549,262,2.0577797840815037e-05
66
+ 64000,549,265,2.0897972717648372e-05
67
+ 65000,551,269,2.1224368538241833e-05
68
+ 66000,555,272,2.153844434360508e-05
69
+ 67000,556,277,2.1878036932321265e-05
70
+ 68000,558,280,2.219563612015918e-05
71
+ 69000,559,286,2.251324440294411e-05
72
+ 70000,560,287,2.2819762307335623e-05
73
+ 71000,561,292,2.312927790626418e-05
74
+ 72000,562,297,2.3458516807295382e-05
75
+ 73000,564,304,2.379106626904104e-05
76
+ 74000,565,307,2.4128063159878366e-05
77
+ 75000,566,309,2.4470087737427093e-05
78
+ 76000,567,315,2.4811053663142957e-05
79
+ 77000,568,318,2.5139925128314644e-05
80
+ 78000,568,328,2.5478777388343588e-05
81
+ 79000,569,332,2.5811543309828267e-05
82
+ 80000,572,336,2.6150722987949848e-05
83
+ 81000,575,341,2.649636189744342e-05
84
+ 82000,576,343,2.684458922885824e-05
85
+ 83000,578,345,2.7200831027585082e-05
86
+ 84000,578,350,2.755924651864916e-05
87
+ 85000,578,354,2.7887024771189317e-05
88
+ 86000,578,359,2.8231646865606308e-05
89
+ 87000,578,361,2.8580387152032927e-05
90
+ 88000,580,364,2.8935428417753428e-05
91
+ 89000,580,370,2.9296967113623396e-05
92
+ 90000,581,375,2.9660697691724636e-05
93
+ 91000,582,378,3.002801167895086e-05
94
+ 92000,582,382,3.039792864001356e-05
95
+ 93000,582,387,3.074665801250376e-05
96
+ 94000,582,390,3.110373654635623e-05
97
+ 95000,583,395,3.150007250951603e-05
98
+ 96000,584,397,3.189584822393954e-05
99
+ 97000,585,400,3.224884130759165e-05
100
+ 98000,588,403,3.264015685999766e-05
101
+ 99000,590,409,3.302649565739557e-05
102
+ 100000,591,412,3.339936301927082e-05
103
+ 101000,592,420,3.377728717168793e-05
104
+ 102000,593,424,3.417667176108807e-05
105
+ 103000,593,429,3.4572040021885186e-05
106
+ 104000,593,439,3.49707443092484e-05
107
+ 105000,593,447,3.538808960001916e-05
108
+ 106000,594,451,3.5759072488872334e-05
109
+ 107000,594,452,3.614377783378586e-05
110
+ 108000,595,453,3.6534518585540354e-05
111
+ 109000,596,456,3.69427289115265e-05
112
+ 110000,597,462,3.7310252082534134e-05
113
+ 111000,598,464,3.769687100430019e-05
114
+ 112000,600,471,3.812055001617409e-05
fw57M_Entropy_threshold_600/tokenizer.json ADDED
@@ -0,0 +1,665 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<|padding|>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<|endoftext|>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 514,
26
+ "content": "<|unk|>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ }
33
+ ],
34
+ "normalizer": {
35
+ "type": "Sequence",
36
+ "normalizers": [
37
+ {
38
+ "type": "NFD"
39
+ }
40
+ ]
41
+ },
42
+ "pre_tokenizer": {
43
+ "type": "WhitespaceSplit"
44
+ },
45
+ "post_processor": {
46
+ "type": "ByteLevel",
47
+ "add_prefix_space": true,
48
+ "trim_offsets": true,
49
+ "use_regex": true
50
+ },
51
+ "decoder": {
52
+ "type": "ByteLevel",
53
+ "add_prefix_space": true,
54
+ "trim_offsets": true,
55
+ "use_regex": true
56
+ },
57
+ "model": {
58
+ "type": "WordPiece",
59
+ "unk_token": "<|unk|>",
60
+ "continuing_subword_prefix": "##",
61
+ "max_input_chars_per_word": 100,
62
+ "vocab": {
63
+ "<|padding|>": 0,
64
+ "<|endoftext|>": 1,
65
+ "!": 2,
66
+ "\"": 3,
67
+ "#": 4,
68
+ "$": 5,
69
+ "%": 6,
70
+ "&": 7,
71
+ "'": 8,
72
+ "(": 9,
73
+ ")": 10,
74
+ "*": 11,
75
+ "+": 12,
76
+ ",": 13,
77
+ "-": 14,
78
+ ".": 15,
79
+ "/": 16,
80
+ "0": 17,
81
+ "1": 18,
82
+ "2": 19,
83
+ "3": 20,
84
+ "4": 21,
85
+ "5": 22,
86
+ "6": 23,
87
+ "7": 24,
88
+ "8": 25,
89
+ "9": 26,
90
+ ":": 27,
91
+ ";": 28,
92
+ "<": 29,
93
+ "=": 30,
94
+ ">": 31,
95
+ "?": 32,
96
+ "@": 33,
97
+ "A": 34,
98
+ "B": 35,
99
+ "C": 36,
100
+ "D": 37,
101
+ "E": 38,
102
+ "F": 39,
103
+ "G": 40,
104
+ "H": 41,
105
+ "I": 42,
106
+ "J": 43,
107
+ "K": 44,
108
+ "L": 45,
109
+ "M": 46,
110
+ "N": 47,
111
+ "O": 48,
112
+ "P": 49,
113
+ "Q": 50,
114
+ "R": 51,
115
+ "S": 52,
116
+ "T": 53,
117
+ "U": 54,
118
+ "V": 55,
119
+ "W": 56,
120
+ "X": 57,
121
+ "Y": 58,
122
+ "Z": 59,
123
+ "[": 60,
124
+ "\\": 61,
125
+ "]": 62,
126
+ "^": 63,
127
+ "_": 64,
128
+ "`": 65,
129
+ "a": 66,
130
+ "b": 67,
131
+ "c": 68,
132
+ "d": 69,
133
+ "e": 70,
134
+ "f": 71,
135
+ "g": 72,
136
+ "h": 73,
137
+ "i": 74,
138
+ "j": 75,
139
+ "k": 76,
140
+ "l": 77,
141
+ "m": 78,
142
+ "n": 79,
143
+ "o": 80,
144
+ "p": 81,
145
+ "q": 82,
146
+ "r": 83,
147
+ "s": 84,
148
+ "t": 85,
149
+ "u": 86,
150
+ "v": 87,
151
+ "w": 88,
152
+ "x": 89,
153
+ "y": 90,
154
+ "z": 91,
155
+ "{": 92,
156
+ "|": 93,
157
+ "}": 94,
158
+ "~": 95,
159
+ "¡": 96,
160
+ "¢": 97,
161
+ "£": 98,
162
+ "¤": 99,
163
+ "¥": 100,
164
+ "¦": 101,
165
+ "§": 102,
166
+ "¨": 103,
167
+ "©": 104,
168
+ "ª": 105,
169
+ "«": 106,
170
+ "¬": 107,
171
+ "®": 108,
172
+ "¯": 109,
173
+ "°": 110,
174
+ "±": 111,
175
+ "²": 112,
176
+ "³": 113,
177
+ "´": 114,
178
+ "µ": 115,
179
+ "¶": 116,
180
+ "·": 117,
181
+ "¸": 118,
182
+ "¹": 119,
183
+ "º": 120,
184
+ "»": 121,
185
+ "¼": 122,
186
+ "½": 123,
187
+ "¾": 124,
188
+ "¿": 125,
189
+ "À": 126,
190
+ "Á": 127,
191
+ "Â": 128,
192
+ "Ã": 129,
193
+ "Ä": 130,
194
+ "Å": 131,
195
+ "Æ": 132,
196
+ "Ç": 133,
197
+ "È": 134,
198
+ "É": 135,
199
+ "Ê": 136,
200
+ "Ë": 137,
201
+ "Ì": 138,
202
+ "Í": 139,
203
+ "Î": 140,
204
+ "Ï": 141,
205
+ "Ð": 142,
206
+ "Ñ": 143,
207
+ "Ò": 144,
208
+ "Ó": 145,
209
+ "Ô": 146,
210
+ "Õ": 147,
211
+ "Ö": 148,
212
+ "×": 149,
213
+ "Ø": 150,
214
+ "Ù": 151,
215
+ "Ú": 152,
216
+ "Û": 153,
217
+ "Ü": 154,
218
+ "Ý": 155,
219
+ "Þ": 156,
220
+ "ß": 157,
221
+ "à": 158,
222
+ "á": 159,
223
+ "â": 160,
224
+ "ã": 161,
225
+ "ä": 162,
226
+ "å": 163,
227
+ "æ": 164,
228
+ "ç": 165,
229
+ "è": 166,
230
+ "é": 167,
231
+ "ê": 168,
232
+ "ë": 169,
233
+ "ì": 170,
234
+ "í": 171,
235
+ "î": 172,
236
+ "ï": 173,
237
+ "ð": 174,
238
+ "ñ": 175,
239
+ "ò": 176,
240
+ "ó": 177,
241
+ "ô": 178,
242
+ "õ": 179,
243
+ "ö": 180,
244
+ "÷": 181,
245
+ "ø": 182,
246
+ "ù": 183,
247
+ "ú": 184,
248
+ "û": 185,
249
+ "ü": 186,
250
+ "ý": 187,
251
+ "þ": 188,
252
+ "ÿ": 189,
253
+ "Ā": 190,
254
+ "ā": 191,
255
+ "Ă": 192,
256
+ "ă": 193,
257
+ "Ą": 194,
258
+ "ą": 195,
259
+ "Ć": 196,
260
+ "ć": 197,
261
+ "Ĉ": 198,
262
+ "ĉ": 199,
263
+ "Ċ": 200,
264
+ "ċ": 201,
265
+ "Č": 202,
266
+ "č": 203,
267
+ "Ď": 204,
268
+ "ď": 205,
269
+ "Đ": 206,
270
+ "đ": 207,
271
+ "Ē": 208,
272
+ "ē": 209,
273
+ "Ĕ": 210,
274
+ "ĕ": 211,
275
+ "Ė": 212,
276
+ "ė": 213,
277
+ "Ę": 214,
278
+ "ę": 215,
279
+ "Ě": 216,
280
+ "ě": 217,
281
+ "Ĝ": 218,
282
+ "ĝ": 219,
283
+ "Ğ": 220,
284
+ "ğ": 221,
285
+ "Ġ": 222,
286
+ "ġ": 223,
287
+ "Ģ": 224,
288
+ "ģ": 225,
289
+ "Ĥ": 226,
290
+ "ĥ": 227,
291
+ "Ħ": 228,
292
+ "ħ": 229,
293
+ "Ĩ": 230,
294
+ "ĩ": 231,
295
+ "Ī": 232,
296
+ "ī": 233,
297
+ "Ĭ": 234,
298
+ "ĭ": 235,
299
+ "Į": 236,
300
+ "į": 237,
301
+ "İ": 238,
302
+ "ı": 239,
303
+ "IJ": 240,
304
+ "ij": 241,
305
+ "Ĵ": 242,
306
+ "ĵ": 243,
307
+ "Ķ": 244,
308
+ "ķ": 245,
309
+ "ĸ": 246,
310
+ "Ĺ": 247,
311
+ "ĺ": 248,
312
+ "Ļ": 249,
313
+ "ļ": 250,
314
+ "Ľ": 251,
315
+ "ľ": 252,
316
+ "Ŀ": 253,
317
+ "ŀ": 254,
318
+ "Ł": 255,
319
+ "ł": 256,
320
+ "Ń": 257,
321
+ "##A": 258,
322
+ "##-": 259,
323
+ "##¤": 260,
324
+ "##ı": 261,
325
+ "##ù": 262,
326
+ "##Ł": 263,
327
+ "##u": 264,
328
+ "##V": 265,
329
+ "##Ī": 266,
330
+ "##ĩ": 267,
331
+ "##Ā": 268,
332
+ "##ij": 269,
333
+ "##ĸ": 270,
334
+ "##·": 271,
335
+ "##æ": 272,
336
+ "##ĉ": 273,
337
+ "##j": 274,
338
+ "##è": 275,
339
+ "##¦": 276,
340
+ "##þ": 277,
341
+ "##!": 278,
342
+ "##~": 279,
343
+ "##h": 280,
344
+ "##Č": 281,
345
+ "##ŀ": 282,
346
+ "##}": 283,
347
+ "##)": 284,
348
+ "##¨": 285,
349
+ "##[": 286,
350
+ "##¢": 287,
351
+ "##3": 288,
352
+ "##<": 289,
353
+ "##c": 290,
354
+ "##Ã": 291,
355
+ "##B": 292,
356
+ "##Ø": 293,
357
+ "##ĝ": 294,
358
+ "##Ğ": 295,
359
+ "##Ė": 296,
360
+ "##e": 297,
361
+ "##E": 298,
362
+ "##Ĕ": 299,
363
+ "##ģ": 300,
364
+ "##á": 301,
365
+ "##w": 302,
366
+ "##ø": 303,
367
+ "##«": 304,
368
+ "##_": 305,
369
+ "##đ": 306,
370
+ "##Ě": 307,
371
+ "##å": 308,
372
+ "##³": 309,
373
+ "##Ê": 310,
374
+ "##%": 311,
375
+ "##Ĩ": 312,
376
+ "##°": 313,
377
+ "##õ": 314,
378
+ "##5": 315,
379
+ "##p": 316,
380
+ "##.": 317,
381
+ "##¿": 318,
382
+ "##ě": 319,
383
+ "##ó": 320,
384
+ "##IJ": 321,
385
+ "##®": 322,
386
+ "##ą": 323,
387
+ "##9": 324,
388
+ "##â": 325,
389
+ "##ë": 326,
390
+ "##ġ": 327,
391
+ "##¡": 328,
392
+ "##ì": 329,
393
+ "##Ġ": 330,
394
+ "##,": 331,
395
+ "##º": 332,
396
+ "##ÿ": 333,
397
+ "##Þ": 334,
398
+ "##ä": 335,
399
+ "##S": 336,
400
+ "##ĕ": 337,
401
+ "##ĭ": 338,
402
+ "##\\": 339,
403
+ "##D": 340,
404
+ "##¯": 341,
405
+ "##ċ": 342,
406
+ "##¸": 343,
407
+ "##Ä": 344,
408
+ "##$": 345,
409
+ "##Ë": 346,
410
+ "##î": 347,
411
+ "##Į": 348,
412
+ "##İ": 349,
413
+ "##ľ": 350,
414
+ "##Ö": 351,
415
+ "##X": 352,
416
+ "##>": 353,
417
+ "##Ú": 354,
418
+ "##ė": 355,
419
+ "##M": 356,
420
+ "##ħ": 357,
421
+ "##J": 358,
422
+ "##Í": 359,
423
+ "##÷": 360,
424
+ "##é": 361,
425
+ "##Ď": 362,
426
+ "##^": 363,
427
+ "##¥": 364,
428
+ "##µ": 365,
429
+ "##ò": 366,
430
+ "##;": 367,
431
+ "##Ü": 368,
432
+ "##1": 369,
433
+ "##ü": 370,
434
+ "##ĥ": 371,
435
+ "##¹": 372,
436
+ "##(": 373,
437
+ "##Á": 374,
438
+ "##¼": 375,
439
+ "##Ì": 376,
440
+ "##ö": 377,
441
+ "##/": 378,
442
+ "##»": 379,
443
+ "##ķ": 380,
444
+ "##©": 381,
445
+ "##i": 382,
446
+ "###": 383,
447
+ "##ĺ": 384,
448
+ "##ć": 385,
449
+ "##Ñ": 386,
450
+ "##Ĝ": 387,
451
+ "##ð": 388,
452
+ "##±": 389,
453
+ "##È": 390,
454
+ "##ý": 391,
455
+ "##Å": 392,
456
+ "##§": 393,
457
+ "##Đ": 394,
458
+ "##Ó": 395,
459
+ "##m": 396,
460
+ "##`": 397,
461
+ "##F": 398,
462
+ "##ï": 399,
463
+ "##Y": 400,
464
+ "##Ħ": 401,
465
+ "##À": 402,
466
+ "##?": 403,
467
+ "##û": 404,
468
+ "##+": 405,
469
+ "##Æ": 406,
470
+ "##į": 407,
471
+ "##Ĵ": 408,
472
+ "##í": 409,
473
+ "##Ń": 410,
474
+ "##Ă": 411,
475
+ "##¶": 412,
476
+ "##ī": 413,
477
+ "##l": 414,
478
+ "##Ð": 415,
479
+ "##L": 416,
480
+ "##Ĉ": 417,
481
+ "##£": 418,
482
+ "##ê": 419,
483
+ "##o": 420,
484
+ "##@": 421,
485
+ "##Ŀ": 422,
486
+ "##4": 423,
487
+ "##¾": 424,
488
+ "##Ċ": 425,
489
+ "##ď": 426,
490
+ "##O": 427,
491
+ "##É": 428,
492
+ "##U": 429,
493
+ "##ã": 430,
494
+ "##s": 431,
495
+ "##Õ": 432,
496
+ "##½": 433,
497
+ "##ç": 434,
498
+ "##{": 435,
499
+ "##Ę": 436,
500
+ "##Ç": 437,
501
+ "##'": 438,
502
+ "##Ļ": 439,
503
+ "##=": 440,
504
+ "##Z": 441,
505
+ "##ă": 442,
506
+ "##N": 443,
507
+ "##8": 444,
508
+ "##*": 445,
509
+ "##´": 446,
510
+ "##Ē": 447,
511
+ "##ę": 448,
512
+ "##v": 449,
513
+ "##6": 450,
514
+ "##&": 451,
515
+ "##Ą": 452,
516
+ "##H": 453,
517
+ "##Ù": 454,
518
+ "##z": 455,
519
+ "##Ý": 456,
520
+ "##f": 457,
521
+ "##0": 458,
522
+ "##Q": 459,
523
+ "##Ć": 460,
524
+ "##Û": 461,
525
+ "##t": 462,
526
+ "##y": 463,
527
+ "##Ĺ": 464,
528
+ "##Ï": 465,
529
+ "##Ģ": 466,
530
+ "##ļ": 467,
531
+ "##d": 468,
532
+ "##x": 469,
533
+ "##k": 470,
534
+ "##n": 471,
535
+ "##2": 472,
536
+ "##q": 473,
537
+ "##|": 474,
538
+ "##ú": 475,
539
+ "##Ķ": 476,
540
+ "##T": 477,
541
+ "##ā": 478,
542
+ "##ñ": 479,
543
+ "##à": 480,
544
+ "##ğ": 481,
545
+ "##g": 482,
546
+ "##¬": 483,
547
+ "##Â": 484,
548
+ "##Ĭ": 485,
549
+ "##ł": 486,
550
+ "##Ĥ": 487,
551
+ "##a": 488,
552
+ "##Ô": 489,
553
+ "##Î": 490,
554
+ "##K": 491,
555
+ "##Ò": 492,
556
+ "##b": 493,
557
+ "##r": 494,
558
+ "##ª": 495,
559
+ "##ē": 496,
560
+ "##\"": 497,
561
+ "##ĵ": 498,
562
+ "##R": 499,
563
+ "##P": 500,
564
+ "##ß": 501,
565
+ "##Ľ": 502,
566
+ "##ô": 503,
567
+ "##]": 504,
568
+ "##×": 505,
569
+ "##7": 506,
570
+ "##:": 507,
571
+ "##²": 508,
572
+ "##W": 509,
573
+ "##č": 510,
574
+ "##C": 511,
575
+ "##G": 512,
576
+ "##I": 513,
577
+ "<|unk|>": 514,
578
+ "##in": 515,
579
+ "##�": 516,
580
+ "##he": 517,
581
+ "##en": 518,
582
+ "##io": 519,
583
+ "##me": 520,
584
+ "##th": 521,
585
+ "##pl": 522,
586
+ "##es": 523,
587
+ "##te": 524,
588
+ "##the": 525,
589
+ "##ie": 526,
590
+ "##be": 527,
591
+ "##ug": 528,
592
+ "##ou": 529,
593
+ "##ve": 530,
594
+ "##men": 531,
595
+ "##us": 532,
596
+ "##ti": 533,
597
+ "##an": 534,
598
+ "##it": 535,
599
+ "##ul": 536,
600
+ "##ec": 537,
601
+ "##de": 538,
602
+ "�": 539,
603
+ "##ar": 540,
604
+ "##le": 541,
605
+ "##ea": 542,
606
+ "##ig": 543,
607
+ "##er": 544,
608
+ "##ag": 545,
609
+ "##su": 546,
610
+ "##as": 547,
611
+ "##lo": 548,
612
+ "##tu": 549,
613
+ "##ev": 550,
614
+ "##rc": 551,
615
+ "##tio": 552,
616
+ "##un": 553,
617
+ "##nc": 554,
618
+ "##opl": 555,
619
+ "##ra": 556,
620
+ "##hi": 557,
621
+ "##el": 558,
622
+ "##ce": 559,
623
+ "##on": 560,
624
+ "##ai": 561,
625
+ "##au": 562,
626
+ "##st": 563,
627
+ "##ge": 564,
628
+ "##ta": 565,
629
+ "##im": 566,
630
+ "##ne": 567,
631
+ "##ca": 568,
632
+ "##ur": 569,
633
+ "##op": 570,
634
+ "##il": 571,
635
+ "##re": 572,
636
+ "##mi": 573,
637
+ "##is": 574,
638
+ "##gh": 575,
639
+ "##at": 576,
640
+ "##ci": 577,
641
+ "##wa": 578,
642
+ "##to": 579,
643
+ "##la": 580,
644
+ "##id": 581,
645
+ "##qu": 582,
646
+ "##ad": 583,
647
+ "##jec": 584,
648
+ "##ic": 585,
649
+ "##ia": 586,
650
+ "##fu": 587,
651
+ "##or": 588,
652
+ "##ei": 589,
653
+ "##na": 590,
654
+ "##we": 591,
655
+ "##lt": 592,
656
+ "##cu": 593,
657
+ "##no": 594,
658
+ "##al": 595,
659
+ "##ab": 596,
660
+ "##iv": 597,
661
+ "##ha": 598,
662
+ "##ste": 599
663
+ }
664
+ }
665
+ }
fw57M_Entropy_threshold_600/tokenizer_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|padding|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "514": {
21
+ "content": "<|unk|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "bos_token": "<|endoftext|>",
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "<|endoftext|>",
32
+ "extra_special_tokens": {},
33
+ "model_max_length": 1000000000000000019884624838656,
34
+ "pad_token": "<|padding|>",
35
+ "tokenizer_class": "PreTrainedTokenizer",
36
+ "unk_token": "<|unk|>"
37
+ }
fw57M_Entropy_threshold_600/vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"A": 34, "-": 14, "\u00a4": 99, "\u0131": 239, "\u00f9": 183, "\u0141": 255, "u": 86, "V": 55, "\u012a": 232, "\u0129": 231, "\u0100": 190, "\u0133": 241, "\u0138": 246, "\u00b7": 117, "\u00e6": 164, "\u0109": 199, "j": 75, "\u00e8": 166, "\u00a6": 101, "\u00fe": 188, "!": 2, "~": 95, "h": 73, "\u010c": 202, "\u0140": 254, "}": 94, ")": 10, "\u00a8": 103, "[": 60, "\u00a2": 97, "3": 20, "<": 29, "c": 68, "\u00c3": 129, "B": 35, "\u00d8": 150, "\u011d": 219, "\u011e": 220, "\u0116": 212, "e": 70, "E": 38, "\u0114": 210, "\u0123": 225, "\u00e1": 159, "w": 88, "\u00f8": 182, "\u00ab": 106, "_": 64, "\u0111": 207, "\u011a": 216, "\u00e5": 163, "\u00b3": 113, "\u00ca": 136, "%": 6, "\u0128": 230, "\u00b0": 110, "\u00f5": 179, "5": 22, "p": 81, ".": 15, "\u00bf": 125, "\u011b": 217, "\u00f3": 177, "\u0132": 240, "\u00ae": 108, "\u0105": 195, "9": 26, "\u00e2": 160, "\u00eb": 169, "\u0121": 223, "\u00a1": 96, "\u00ec": 170, "\u0120": 222, ",": 13, "\u00ba": 120, "\u00ff": 189, "\u00de": 156, "\u00e4": 162, "S": 52, "\u0115": 211, "\u012d": 235, "\\": 61, "D": 37, "\u00af": 109, "\u010b": 201, "\u00b8": 118, "\u00c4": 130, "$": 5, "\u00cb": 137, "\u00ee": 172, "\u012e": 236, "\u0130": 238, "\u013e": 252, "\u00d6": 148, "X": 57, ">": 31, "\u00da": 152, "\u0117": 213, "M": 46, "\u0127": 229, "J": 43, "\u00cd": 139, "\u00f7": 181, "\u00e9": 167, "\u010e": 204, "^": 63, "\u00a5": 100, "\u00b5": 115, "\u00f2": 176, ";": 28, "\u00dc": 154, "1": 18, "\u00fc": 186, "\u0125": 227, "\u00b9": 119, "(": 9, "\u00c1": 127, "\u00bc": 122, "\u00cc": 138, "\u00f6": 180, "/": 16, "\u00bb": 121, "\u0137": 245, "\u00a9": 104, "i": 74, "#": 4, "\u013a": 248, "\u0107": 197, "\u00d1": 143, "\u011c": 218, "\u00f0": 174, "<|endoftext|>": 1, "\u00b1": 111, "\u00c8": 134, "\u00fd": 187, "\u00c5": 131, "\u00a7": 102, "\u0110": 206, "\u00d3": 145, "m": 78, "`": 65, "F": 39, "\u00ef": 173, "Y": 58, "\u0126": 228, "\u00c0": 126, "?": 32, "\u00fb": 185, "+": 12, "\u00c6": 132, "\u012f": 237, "\u0134": 242, "\u00ed": 171, "\u0143": 257, "\u0102": 192, "\u00b6": 116, "\u012b": 233, "l": 77, "\u00d0": 142, "L": 45, "\u0108": 198, "\u00a3": 98, "\u00ea": 168, "o": 80, "@": 33, "\u013f": 253, "4": 21, "\u00be": 124, "\u010a": 200, "\u010f": 205, "O": 48, "\u00c9": 135, "U": 54, "\u00e3": 161, "s": 84, "\u00d5": 147, "\u00bd": 123, "\u00e7": 165, "{": 92, "\u0118": 214, "\u00c7": 133, "'": 8, "\u013b": 249, "=": 30, "Z": 59, "\u0103": 193, "N": 47, "8": 25, "*": 11, "\u00b4": 114, "\u0112": 208, "\u0119": 215, "v": 87, "6": 23, "&": 7, "\u0104": 194, "H": 41, "\u00d9": 151, "z": 91, "\u00dd": 155, "f": 71, "0": 17, "Q": 50, "\u0106": 196, "\u00db": 153, "t": 85, "y": 90, "\u0139": 247, "\u00cf": 141, "\u0122": 224, "\u013c": 250, "d": 69, "x": 89, "k": 76, "n": 79, "2": 19, "q": 82, "|": 93, "\u00fa": 184, "\u0136": 244, "T": 53, "\u0101": 191, "\u00f1": 175, "\u00e0": 158, "\u011f": 221, "g": 72, "\u00ac": 107, "\u00c2": 128, "\u012c": 234, "\u0142": 256, "\u0124": 226, "a": 66, "\u00d4": 146, "\u00ce": 140, "K": 44, "\u00d2": 144, "b": 67, "r": 83, "\u00aa": 105, "\u0113": 209, "\"": 3, "\u0135": 243, "R": 51, "P": 49, "\u00df": 157, "\u013d": 251, "\u00f4": 178, "]": 62, "\u00d7": 149, "7": 24, ":": 27, "\u00b2": 112, "W": 56, "\u010d": 203, "C": 36, "G": 40, "I": 42, "<|padding|>": 0, "##A": 258, "##-": 259, "##\u00a4": 260, "##\u0131": 261, "##\u00f9": 262, "##\u0141": 263, "##u": 264, "##V": 265, "##\u012a": 266, "##\u0129": 267, "##\u0100": 268, "##\u0133": 269, "##\u0138": 270, "##\u00b7": 271, "##\u00e6": 272, "##\u0109": 273, "##j": 274, "##\u00e8": 275, "##\u00a6": 276, "##\u00fe": 277, "##!": 278, "##~": 279, "##h": 280, "##\u010c": 281, "##\u0140": 282, "##}": 283, "##)": 284, "##\u00a8": 285, "##[": 286, "##\u00a2": 287, "##3": 288, "##<": 289, "##c": 290, "##\u00c3": 291, "##B": 292, "##\u00d8": 293, "##\u011d": 294, "##\u011e": 295, "##\u0116": 296, "##e": 297, "##E": 298, "##\u0114": 299, "##\u0123": 300, "##\u00e1": 301, "##w": 302, "##\u00f8": 303, "##\u00ab": 304, "##_": 305, "##\u0111": 306, "##\u011a": 307, "##\u00e5": 308, "##\u00b3": 309, "##\u00ca": 310, "##%": 311, "##\u0128": 312, "##\u00b0": 313, "##\u00f5": 314, "##5": 315, "##p": 316, "##.": 317, "##\u00bf": 318, "##\u011b": 319, "##\u00f3": 320, "##\u0132": 321, "##\u00ae": 322, "##\u0105": 323, "##9": 324, "##\u00e2": 325, "##\u00eb": 326, "##\u0121": 327, "##\u00a1": 328, "##\u00ec": 329, "##\u0120": 330, "##,": 331, "##\u00ba": 332, "##\u00ff": 333, "##\u00de": 334, "##\u00e4": 335, "##S": 336, "##\u0115": 337, "##\u012d": 338, "##\\": 339, "##D": 340, "##\u00af": 341, "##\u010b": 342, "##\u00b8": 343, "##\u00c4": 344, "##$": 345, "##\u00cb": 346, "##\u00ee": 347, "##\u012e": 348, "##\u0130": 349, "##\u013e": 350, "##\u00d6": 351, "##X": 352, "##>": 353, "##\u00da": 354, "##\u0117": 355, "##M": 356, "##\u0127": 357, "##J": 358, "##\u00cd": 359, "##\u00f7": 360, "##\u00e9": 361, "##\u010e": 362, "##^": 363, "##\u00a5": 364, "##\u00b5": 365, "##\u00f2": 366, "##;": 367, "##\u00dc": 368, "##1": 369, "##\u00fc": 370, "##\u0125": 371, "##\u00b9": 372, "##(": 373, "##\u00c1": 374, "##\u00bc": 375, "##\u00cc": 376, "##\u00f6": 377, "##/": 378, "##\u00bb": 379, "##\u0137": 380, "##\u00a9": 381, "##i": 382, "###": 383, "##\u013a": 384, "##\u0107": 385, "##\u00d1": 386, "##\u011c": 387, "##\u00f0": 388, "##\u00b1": 389, "##\u00c8": 390, "##\u00fd": 391, "##\u00c5": 392, "##\u00a7": 393, "##\u0110": 394, "##\u00d3": 395, "##m": 396, "##`": 397, "##F": 398, "##\u00ef": 399, "##Y": 400, "##\u0126": 401, "##\u00c0": 402, "##?": 403, "##\u00fb": 404, "##+": 405, "##\u00c6": 406, "##\u012f": 407, "##\u0134": 408, "##\u00ed": 409, "##\u0143": 410, "##\u0102": 411, "##\u00b6": 412, "##\u012b": 413, "##l": 414, "##\u00d0": 415, "##L": 416, "##\u0108": 417, "##\u00a3": 418, "##\u00ea": 419, "##o": 420, "##@": 421, "##\u013f": 422, "##4": 423, "##\u00be": 424, "##\u010a": 425, "##\u010f": 426, "##O": 427, "##\u00c9": 428, "##U": 429, "##\u00e3": 430, "##s": 431, "##\u00d5": 432, "##\u00bd": 433, "##\u00e7": 434, "##{": 435, "##\u0118": 436, "##\u00c7": 437, "##'": 438, "##\u013b": 439, "##=": 440, "##Z": 441, "##\u0103": 442, "##N": 443, "##8": 444, "##*": 445, "##\u00b4": 446, "##\u0112": 447, "##\u0119": 448, "##v": 449, "##6": 450, "##&": 451, "##\u0104": 452, "##H": 453, "##\u00d9": 454, "##z": 455, "##\u00dd": 456, "##f": 457, "##0": 458, "##Q": 459, "##\u0106": 460, "##\u00db": 461, "##t": 462, "##y": 463, "##\u0139": 464, "##\u00cf": 465, "##\u0122": 466, "##\u013c": 467, "##d": 468, "##x": 469, "##k": 470, "##n": 471, "##2": 472, "##q": 473, "##|": 474, "##\u00fa": 475, "##\u0136": 476, "##T": 477, "##\u0101": 478, "##\u00f1": 479, "##\u00e0": 480, "##\u011f": 481, "##g": 482, "##\u00ac": 483, "##\u00c2": 484, "##\u012c": 485, "##\u0142": 486, "##\u0124": 487, "##a": 488, "##\u00d4": 489, "##\u00ce": 490, "##K": 491, "##\u00d2": 492, "##b": 493, "##r": 494, "##\u00aa": 495, "##\u0113": 496, "##\"": 497, "##\u0135": 498, "##R": 499, "##P": 500, "##\u00df": 501, "##\u013d": 502, "##\u00f4": 503, "##]": 504, "##\u00d7": 505, "##7": 506, "##:": 507, "##\u00b2": 508, "##W": 509, "##\u010d": 510, "##C": 511, "##G": 512, "##I": 513, "<|unk|>": 514, "##in": 515, "##\ufffd": 516, "##he": 517, "##en": 518, "##io": 519, "##me": 520, "##th": 521, "##pl": 522, "##es": 523, "##te": 524, "##the": 525, "##ie": 526, "##be": 527, "##ug": 528, "##ou": 529, "##ve": 530, "##men": 531, "##us": 532, "##ti": 533, "##an": 534, "##it": 535, "##ul": 536, "##ec": 537, "##de": 538, "\ufffd": 539, "##ar": 540, "##le": 541, "##ea": 542, "##ig": 543, "##er": 544, "##ag": 545, "##su": 546, "##as": 547, "##lo": 548, "##tu": 549, "##ev": 550, "##rc": 551, "##tio": 552, "##un": 553, "##nc": 554, "##opl": 555, "##ra": 556, "##hi": 557, "##el": 558, "##ce": 559, "##on": 560, "##ai": 561, "##au": 562, "##st": 563, "##ge": 564, "##ta": 565, "##im": 566, "##ne": 567, "##ca": 568, "##ur": 569, "##op": 570, "##il": 571, "##re": 572, "##mi": 573, "##is": 574, "##gh": 575, "##at": 576, "##ci": 577, "##wa": 578, "##to": 579, "##la": 580, "##id": 581, "##qu": 582, "##ad": 583, "##jec": 584, "##ic": 585, "##ia": 586, "##fu": 587, "##or": 588, "##ei": 589, "##na": 590, "##we": 591, "##lt": 592, "##cu": 593, "##no": 594, "##al": 595, "##ab": 596, "##iv": 597, "##ha": 598, "##ste": 599}