Zeb commited on
Commit
43d278f
·
1 Parent(s): b3a3239

Remove old tokenizers

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. fw57Mmulti_Entropy_threshold_128000/special_tokens_map.json +0 -6
  2. fw57Mmulti_Entropy_threshold_128000/stats.csv +0 -0
  3. fw57Mmulti_Entropy_threshold_128000/tokenizer.json +0 -0
  4. fw57Mmulti_Entropy_threshold_128000/tokenizer_config.json +0 -37
  5. fw57Mmulti_Entropy_threshold_128000/vocab.json +0 -0
  6. fw57Mmulti_Entropy_threshold_16000/special_tokens_map.json +0 -6
  7. fw57Mmulti_Entropy_threshold_16000/stats.csv +0 -0
  8. fw57Mmulti_Entropy_threshold_16000/tokenizer.json +0 -0
  9. fw57Mmulti_Entropy_threshold_16000/tokenizer_config.json +0 -37
  10. fw57Mmulti_Entropy_threshold_16000/vocab.json +0 -0
  11. fw57Mmulti_Entropy_threshold_256000/special_tokens_map.json +0 -6
  12. fw57Mmulti_Entropy_threshold_256000/stats.csv +0 -0
  13. fw57Mmulti_Entropy_threshold_256000/tokenizer.json +0 -1046
  14. fw57Mmulti_Entropy_threshold_256000/tokenizer_config.json +0 -37
  15. fw57Mmulti_Entropy_threshold_256000/vocab.json +0 -1
  16. fw57Mmulti_Entropy_threshold_32000/special_tokens_map.json +0 -6
  17. fw57Mmulti_Entropy_threshold_32000/stats.csv +0 -0
  18. fw57Mmulti_Entropy_threshold_32000/tokenizer.json +0 -0
  19. fw57Mmulti_Entropy_threshold_32000/tokenizer_config.json +0 -37
  20. fw57Mmulti_Entropy_threshold_32000/vocab.json +0 -0
  21. fw57Mmulti_Entropy_threshold_64000/special_tokens_map.json +0 -6
  22. fw57Mmulti_Entropy_threshold_64000/stats.csv +0 -0
  23. fw57Mmulti_Entropy_threshold_64000/tokenizer.json +0 -0
  24. fw57Mmulti_Entropy_threshold_64000/tokenizer_config.json +0 -37
  25. fw57Mmulti_Entropy_threshold_64000/vocab.json +0 -0
  26. fw57Mmulti_Entropy_threshold_8064/special_tokens_map.json +0 -6
  27. fw57Mmulti_Entropy_threshold_8064/stats.csv +0 -0
  28. fw57Mmulti_Entropy_threshold_8064/tokenizer.json +0 -0
  29. fw57Mmulti_Entropy_threshold_8064/tokenizer_config.json +0 -37
  30. fw57Mmulti_Entropy_threshold_8064/vocab.json +0 -0
  31. fw57Mmulti_Surprisal_threshold_128000/special_tokens_map.json +0 -6
  32. fw57Mmulti_Surprisal_threshold_128000/stats.csv +0 -0
  33. fw57Mmulti_Surprisal_threshold_128000/tokenizer.json +0 -0
  34. fw57Mmulti_Surprisal_threshold_128000/tokenizer_config.json +0 -37
  35. fw57Mmulti_Surprisal_threshold_128000/vocab.json +0 -0
  36. fw57Mmulti_Surprisal_threshold_16000/special_tokens_map.json +0 -6
  37. fw57Mmulti_Surprisal_threshold_16000/stats.csv +0 -0
  38. fw57Mmulti_Surprisal_threshold_16000/tokenizer.json +0 -0
  39. fw57Mmulti_Surprisal_threshold_16000/tokenizer_config.json +0 -37
  40. fw57Mmulti_Surprisal_threshold_16000/vocab.json +0 -0
  41. fw57Mmulti_Surprisal_threshold_256000/special_tokens_map.json +0 -6
  42. fw57Mmulti_Surprisal_threshold_256000/stats.csv +0 -0
  43. fw57Mmulti_Surprisal_threshold_256000/tokenizer.json +0 -1390
  44. fw57Mmulti_Surprisal_threshold_256000/tokenizer_config.json +0 -37
  45. fw57Mmulti_Surprisal_threshold_256000/vocab.json +0 -1
  46. fw57Mmulti_Surprisal_threshold_32000/special_tokens_map.json +0 -6
  47. fw57Mmulti_Surprisal_threshold_32000/stats.csv +0 -0
  48. fw57Mmulti_Surprisal_threshold_32000/tokenizer.json +0 -0
  49. fw57Mmulti_Surprisal_threshold_32000/tokenizer_config.json +0 -37
  50. fw57Mmulti_Surprisal_threshold_32000/vocab.json +0 -0
fw57Mmulti_Entropy_threshold_128000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_128000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_128000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_128000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_128000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_16000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_16000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_16000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_16000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_16000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_256000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_256000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_256000/tokenizer.json DELETED
@@ -1,1046 +0,0 @@
1
- {
2
- "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
5
- "added_tokens": [
6
- {
7
- "id": 0,
8
- "content": "<|padding|>",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
- {
16
- "id": 1,
17
- "content": "<|endoftext|>",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
- },
24
- {
25
- "id": 698,
26
- "content": "<|unk|>",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- }
33
- ],
34
- "normalizer": null,
35
- "pre_tokenizer": {
36
- "type": "ByteLevel",
37
- "add_prefix_space": true,
38
- "trim_offsets": true,
39
- "use_regex": true
40
- },
41
- "post_processor": {
42
- "type": "ByteLevel",
43
- "add_prefix_space": true,
44
- "trim_offsets": true,
45
- "use_regex": true
46
- },
47
- "decoder": {
48
- "type": "ByteLevel",
49
- "add_prefix_space": true,
50
- "trim_offsets": true,
51
- "use_regex": true
52
- },
53
- "model": {
54
- "type": "WordPiece",
55
- "unk_token": "<|unk|>",
56
- "continuing_subword_prefix": "##",
57
- "max_input_chars_per_word": 100,
58
- "vocab": {
59
- "<|padding|>": 0,
60
- "<|endoftext|>": 1,
61
- "Ġd": 2,
62
- "d": 3,
63
- "##d": 4,
64
- "Ġĩ": 5,
65
- "ĩ": 6,
66
- "##ĩ": 7,
67
- "Ġº": 8,
68
- "º": 9,
69
- "##º": 10,
70
- "5": 11,
71
- "##5": 12,
72
- "Ġć": 13,
73
- "ć": 14,
74
- "##ć": 15,
75
- "?": 16,
76
- "##?": 17,
77
- "ĠĮ": 18,
78
- "Į": 19,
79
- "##Į": 20,
80
- "ĠV": 21,
81
- "V": 22,
82
- "##V": 23,
83
- ",": 24,
84
- "##,": 25,
85
- "ĠĐ": 26,
86
- "Đ": 27,
87
- "##Đ": 28,
88
- "ĠU": 29,
89
- "U": 30,
90
- "##U": 31,
91
- "Ġä": 32,
92
- "ä": 33,
93
- "##ä": 34,
94
- "`": 35,
95
- "##`": 36,
96
- "ĠÜ": 37,
97
- "Ü": 38,
98
- "##Ü": 39,
99
- "Ġk": 40,
100
- "k": 41,
101
- "##k": 42,
102
- "Ġâ": 43,
103
- "â": 44,
104
- "##â": 45,
105
- "Ġb": 46,
106
- "b": 47,
107
- "##b": 48,
108
- "@": 49,
109
- "##@": 50,
110
- "Ġæ": 51,
111
- "æ": 52,
112
- "##æ": 53,
113
- "¾": 54,
114
- "##¾": 55,
115
- "Ġþ": 56,
116
- "þ": 57,
117
- "##þ": 58,
118
- "Ġj": 59,
119
- "j": 60,
120
- "##j": 61,
121
- "Ġf": 62,
122
- "f": 63,
123
- "##f": 64,
124
- "Ġú": 65,
125
- "ú": 66,
126
- "##ú": 67,
127
- "1": 68,
128
- "##1": 69,
129
- "Ġª": 70,
130
- "ª": 71,
131
- "##ª": 72,
132
- "Ġm": 73,
133
- "m": 74,
134
- "##m": 75,
135
- "$": 76,
136
- "##$": 77,
137
- "¦": 78,
138
- "##¦": 79,
139
- "2": 80,
140
- "##2": 81,
141
- "ĠIJ": 82,
142
- "IJ": 83,
143
- "##IJ": 84,
144
- "¯": 85,
145
- "##¯": 86,
146
- "ĠD": 87,
147
- "D": 88,
148
- "##D": 89,
149
- "Ġÿ": 90,
150
- "ÿ": 91,
151
- "##ÿ": 92,
152
- "ĠĄ": 93,
153
- "Ą": 94,
154
- "##Ą": 95,
155
- "}": 96,
156
- "##}": 97,
157
- "ĠĻ": 98,
158
- "Ļ": 99,
159
- "##Ļ": 100,
160
- ".": 101,
161
- "##.": 102,
162
- "©": 103,
163
- "##©": 104,
164
- "ĠY": 105,
165
- "Y": 106,
166
- "##Y": 107,
167
- "ĠĖ": 108,
168
- "Ė": 109,
169
- "##Ė": 110,
170
- "Ġŀ": 111,
171
- "ŀ": 112,
172
- "##ŀ": 113,
173
- "Ġę": 114,
174
- "ę": 115,
175
- "##ę": 116,
176
- "0": 117,
177
- "##0": 118,
178
- "Ġò": 119,
179
- "ò": 120,
180
- "##ò": 121,
181
- "Ġđ": 122,
182
- "đ": 123,
183
- "##đ": 124,
184
- "ĠÐ": 125,
185
- "Ð": 126,
186
- "##Ð": 127,
187
- "ĠØ": 128,
188
- "Ø": 129,
189
- "##Ø": 130,
190
- "~": 131,
191
- "##~": 132,
192
- "Ġó": 133,
193
- "ó": 134,
194
- "##ó": 135,
195
- "Ġē": 136,
196
- "ē": 137,
197
- "##ē": 138,
198
- "ĠĹ": 139,
199
- "Ĺ": 140,
200
- "##Ĺ": 141,
201
- "®": 142,
202
- "##®": 143,
203
- "ĠÛ": 144,
204
- "Û": 145,
205
- "##Û": 146,
206
- "Ġċ": 147,
207
- "ċ": 148,
208
- "##ċ": 149,
209
- "Ġð": 150,
210
- "ð": 151,
211
- "##ð": 152,
212
- "Ġij": 153,
213
- "ij": 154,
214
- "##ij": 155,
215
- "Ġs": 156,
216
- "s": 157,
217
- "##s": 158,
218
- "Ġq": 159,
219
- "q": 160,
220
- "##q": 161,
221
- "«": 162,
222
- "##«": 163,
223
- "ĠR": 164,
224
- "R": 165,
225
- "##R": 166,
226
- "ĠM": 167,
227
- "M": 168,
228
- "##M": 169,
229
- "²": 170,
230
- "##²": 171,
231
- "°": 172,
232
- "##°": 173,
233
- "Ġc": 174,
234
- "c": 175,
235
- "##c": 176,
236
- "£": 177,
237
- "##£": 178,
238
- "¥": 179,
239
- "##¥": 180,
240
- "ĠC": 181,
241
- "C": 182,
242
- "##C": 183,
243
- "ĠT": 184,
244
- "T": 185,
245
- "##T": 186,
246
- "ĠÞ": 187,
247
- "Þ": 188,
248
- "##Þ": 189,
249
- "Ġn": 190,
250
- "n": 191,
251
- "##n": 192,
252
- "ĠĂ": 193,
253
- "Ă": 194,
254
- "##Ă": 195,
255
- "Ġķ": 196,
256
- "ķ": 197,
257
- "##ķ": 198,
258
- "¿": 199,
259
- "##¿": 200,
260
- "|": 201,
261
- "##|": 202,
262
- "ĠZ": 203,
263
- "Z": 204,
264
- "##Z": 205,
265
- "Ġü": 206,
266
- "ü": 207,
267
- "##ü": 208,
268
- "ĠĨ": 209,
269
- "Ĩ": 210,
270
- "##Ĩ": 211,
271
- "Ġģ": 212,
272
- "ģ": 213,
273
- "##ģ": 214,
274
- "ĠÌ": 215,
275
- "Ì": 216,
276
- "##Ì": 217,
277
- "½": 218,
278
- "##½": 219,
279
- "»": 220,
280
- "##»": 221,
281
- "¸": 222,
282
- "##¸": 223,
283
- "ĠG": 224,
284
- "G": 225,
285
- "##G": 226,
286
- "ĠÒ": 227,
287
- "Ò": 228,
288
- "##Ò": 229,
289
- "×": 230,
290
- "##×": 231,
291
- "Ġă": 232,
292
- "ă": 233,
293
- "##ă": 234,
294
- "ĠĪ": 235,
295
- "Ī": 236,
296
- "##Ī": 237,
297
- "Ġî": 238,
298
- "î": 239,
299
- "##î": 240,
300
- "Ġg": 241,
301
- "g": 242,
302
- "##g": 243,
303
- "¤": 244,
304
- "##¤": 245,
305
- "ĠÇ": 246,
306
- "Ç": 247,
307
- "##Ç": 248,
308
- "4": 249,
309
- "##4": 250,
310
- "´": 251,
311
- "##´": 252,
312
- "§": 253,
313
- "##§": 254,
314
- "ĠÈ": 255,
315
- "È": 256,
316
- "##È": 257,
317
- "Ġè": 258,
318
- "è": 259,
319
- "##è": 260,
320
- "Ġt": 261,
321
- "t": 262,
322
- "##t": 263,
323
- "Ġā": 264,
324
- "ā": 265,
325
- "##ā": 266,
326
- "{": 267,
327
- "##{": 268,
328
- "ĠA": 269,
329
- "A": 270,
330
- "##A": 271,
331
- "ĠĢ": 272,
332
- "Ģ": 273,
333
- "##Ģ": 274,
334
- "Ġļ": 275,
335
- "ļ": 276,
336
- "##ļ": 277,
337
- "#": 278,
338
- "###": 279,
339
- "ĠI": 280,
340
- "I": 281,
341
- "##I": 282,
342
- "ĠÕ": 283,
343
- "Õ": 284,
344
- "##Õ": 285,
345
- "Ġá": 286,
346
- "á": 287,
347
- "##á": 288,
348
- "±": 289,
349
- "##±": 290,
350
- "Ġğ": 291,
351
- "ğ": 292,
352
- "##ğ": 293,
353
- "ĠÉ": 294,
354
- "É": 295,
355
- "##É": 296,
356
- "ĠĆ": 297,
357
- "Ć": 298,
358
- "##Ć": 299,
359
- "ĠĴ": 300,
360
- "Ĵ": 301,
361
- "##Ĵ": 302,
362
- "Ġù": 303,
363
- "ù": 304,
364
- "##ù": 305,
365
- "]": 306,
366
- "##]": 307,
367
- "Ġû": 308,
368
- "û": 309,
369
- "##û": 310,
370
- "ĠÓ": 311,
371
- "Ó": 312,
372
- "##Ó": 313,
373
- "ĠĜ": 314,
374
- "Ĝ": 315,
375
- "##Ĝ": 316,
376
- "Ġy": 317,
377
- "y": 318,
378
- "##y": 319,
379
- "³": 320,
380
- "##³": 321,
381
- "[": 322,
382
- "##[": 323,
383
- "Ġħ": 324,
384
- "ħ": 325,
385
- "##ħ": 326,
386
- "Ġĸ": 327,
387
- "ĸ": 328,
388
- "##ĸ": 329,
389
- "ĠŁ": 330,
390
- "Ł": 331,
391
- "##Ł": 332,
392
- "Ġį": 333,
393
- "į": 334,
394
- "##į": 335,
395
- "_": 336,
396
- "##_": 337,
397
- ":": 338,
398
- "##:": 339,
399
- "-": 340,
400
- "##-": 341,
401
- "ĠW": 342,
402
- "W": 343,
403
- "##W": 344,
404
- "ĠÁ": 345,
405
- "Á": 346,
406
- "##Á": 347,
407
- "Ġã": 348,
408
- "ã": 349,
409
- "##ã": 350,
410
- "ĠÆ": 351,
411
- "Æ": 352,
412
- "##Æ": 353,
413
- "ĠĚ": 354,
414
- "Ě": 355,
415
- "##Ě": 356,
416
- "Ġĥ": 357,
417
- "ĥ": 358,
418
- "##ĥ": 359,
419
- "¨": 360,
420
- "##¨": 361,
421
- "ĠL": 362,
422
- "L": 363,
423
- "##L": 364,
424
- "Ġô": 365,
425
- "ô": 366,
426
- "##ô": 367,
427
- "Ġĵ": 368,
428
- "ĵ": 369,
429
- "##ĵ": 370,
430
- "ĠĦ": 371,
431
- "Ħ": 372,
432
- "##Ħ": 373,
433
- "ĠÀ": 374,
434
- "À": 375,
435
- "##À": 376,
436
- "ĠÎ": 377,
437
- "Î": 378,
438
- "##Î": 379,
439
- "ĠĀ": 380,
440
- "Ā": 381,
441
- "##Ā": 382,
442
- "ĠĘ": 383,
443
- "Ę": 384,
444
- "##Ę": 385,
445
- "9": 386,
446
- "##9": 387,
447
- "ĠF": 388,
448
- "F": 389,
449
- "##F": 390,
450
- "ĠĊ": 391,
451
- "Ċ": 392,
452
- "##Ċ": 393,
453
- "ĠÄ": 394,
454
- "Ä": 395,
455
- "##Ä": 396,
456
- "ĠE": 397,
457
- "E": 398,
458
- "##E": 399,
459
- "¬": 400,
460
- "##¬": 401,
461
- "'": 402,
462
- "##'": 403,
463
- ">": 404,
464
- "##>": 405,
465
- "ĠX": 406,
466
- "X": 407,
467
- "##X": 408,
468
- "Ġı": 409,
469
- "ı": 410,
470
- "##ı": 411,
471
- "Ġď": 412,
472
- "ď": 413,
473
- "##ď": 414,
474
- "ĠB": 415,
475
- "B": 416,
476
- "##B": 417,
477
- "Ġą": 418,
478
- "ą": 419,
479
- "##ą": 420,
480
- "ĠÃ": 421,
481
- "Ã": 422,
482
- "##Ã": 423,
483
- "Ġo": 424,
484
- "o": 425,
485
- "##o": 426,
486
- "¹": 427,
487
- "##¹": 428,
488
- "Ġa": 429,
489
- "a": 430,
490
- "##a": 431,
491
- "Ġr": 432,
492
- "r": 433,
493
- "##r": 434,
494
- "8": 435,
495
- "##8": 436,
496
- ")": 437,
497
- "##)": 438,
498
- ";": 439,
499
- "##;": 440,
500
- "ĠQ": 441,
501
- "Q": 442,
502
- "##Q": 443,
503
- "Ġx": 444,
504
- "x": 445,
505
- "##x": 446,
506
- "·": 447,
507
- "##·": 448,
508
- "ĠÏ": 449,
509
- "Ï": 450,
510
- "##Ï": 451,
511
- "Ġë": 452,
512
- "ë": 453,
513
- "##ë": 454,
514
- "ĠĿ": 455,
515
- "Ŀ": 456,
516
- "##Ŀ": 457,
517
- "Ġö": 458,
518
- "ö": 459,
519
- "##ö": 460,
520
- "ĠĔ": 461,
521
- "Ĕ": 462,
522
- "##Ĕ": 463,
523
- "Ġç": 464,
524
- "ç": 465,
525
- "##ç": 466,
526
- "Ġu": 467,
527
- "u": 468,
528
- "##u": 469,
529
- "Ġľ": 470,
530
- "ľ": 471,
531
- "##ľ": 472,
532
- "Ġé": 473,
533
- "é": 474,
534
- "##é": 475,
535
- "Ġà": 476,
536
- "à": 477,
537
- "##à": 478,
538
- "Ġñ": 479,
539
- "ñ": 480,
540
- "##ñ": 481,
541
- "3": 482,
542
- "##3": 483,
543
- "Ġĉ": 484,
544
- "ĉ": 485,
545
- "##ĉ": 486,
546
- "Ġł": 487,
547
- "ł": 488,
548
- "##ł": 489,
549
- "Ġz": 490,
550
- "z": 491,
551
- "##z": 492,
552
- "Ġí": 493,
553
- "í": 494,
554
- "##í": 495,
555
- "Ġw": 496,
556
- "w": 497,
557
- "##w": 498,
558
- "ĠĶ": 499,
559
- "Ķ": 500,
560
- "##Ķ": 501,
561
- "ĠŃ": 502,
562
- "Ń": 503,
563
- "##Ń": 504,
564
- "ĠÚ": 505,
565
- "Ú": 506,
566
- "##Ú": 507,
567
- "+": 508,
568
- "##+": 509,
569
- "ĠÊ": 510,
570
- "Ê": 511,
571
- "##Ê": 512,
572
- "Ġĕ": 513,
573
- "ĕ": 514,
574
- "##ĕ": 515,
575
- "¼": 516,
576
- "##¼": 517,
577
- "ĠĎ": 518,
578
- "Ď": 519,
579
- "##Ď": 520,
580
- "Ġė": 521,
581
- "ė": 522,
582
- "##ė": 523,
583
- "ĠÅ": 524,
584
- "Å": 525,
585
- "##Å": 526,
586
- "Ġß": 527,
587
- "ß": 528,
588
- "##ß": 529,
589
- "ĠJ": 530,
590
- "J": 531,
591
- "##J": 532,
592
- "Ġµ": 533,
593
- "µ": 534,
594
- "##µ": 535,
595
- "ĠĬ": 536,
596
- "Ĭ": 537,
597
- "##Ĭ": 538,
598
- "Ġý": 539,
599
- "ý": 540,
600
- "##ý": 541,
601
- "=": 542,
602
- "##=": 543,
603
- "ĠÝ": 544,
604
- "Ý": 545,
605
- "##Ý": 546,
606
- "Ġě": 547,
607
- "ě": 548,
608
- "##ě": 549,
609
- "Ġč": 550,
610
- "č": 551,
611
- "##č": 552,
612
- "ĠK": 553,
613
- "K": 554,
614
- "##K": 555,
615
- "ĠO": 556,
616
- "O": 557,
617
- "##O": 558,
618
- "ĠÑ": 559,
619
- "Ñ": 560,
620
- "##Ñ": 561,
621
- "^": 562,
622
- "##^": 563,
623
- "*": 564,
624
- "##*": 565,
625
- "ĠĒ": 566,
626
- "Ē": 567,
627
- "##Ē": 568,
628
- "!": 569,
629
- "##!": 570,
630
- "Ġĭ": 571,
631
- "ĭ": 572,
632
- "##ĭ": 573,
633
- "ĠS": 574,
634
- "S": 575,
635
- "##S": 576,
636
- "Ġİ": 577,
637
- "İ": 578,
638
- "##İ": 579,
639
- "Ġø": 580,
640
- "ø": 581,
641
- "##ø": 582,
642
- "ĠÍ": 583,
643
- "Í": 584,
644
- "##Í": 585,
645
- "ĠH": 586,
646
- "H": 587,
647
- "##H": 588,
648
- "Ġì": 589,
649
- "ì": 590,
650
- "##ì": 591,
651
- "Ġe": 592,
652
- "e": 593,
653
- "##e": 594,
654
- "¶": 595,
655
- "##¶": 596,
656
- "(": 597,
657
- "##(": 598,
658
- "Ġõ": 599,
659
- "õ": 600,
660
- "##õ": 601,
661
- "ĠĈ": 602,
662
- "Ĉ": 603,
663
- "##Ĉ": 604,
664
- "Ġī": 605,
665
- "ī": 606,
666
- "##ī": 607,
667
- "Ġġ": 608,
668
- "ġ": 609,
669
- "##ġ": 610,
670
- "/": 611,
671
- "##/": 612,
672
- "ĠÂ": 613,
673
- "Â": 614,
674
- "##Â": 615,
675
- "Ġv": 616,
676
- "v": 617,
677
- "##v": 618,
678
- "%": 619,
679
- "##%": 620,
680
- "ĠÙ": 621,
681
- "Ù": 622,
682
- "##Ù": 623,
683
- "&": 624,
684
- "##&": 625,
685
- "ĠË": 626,
686
- "Ë": 627,
687
- "##Ë": 628,
688
- "Ġh": 629,
689
- "h": 630,
690
- "##h": 631,
691
- "Ġĝ": 632,
692
- "ĝ": 633,
693
- "##ĝ": 634,
694
- "ĠÔ": 635,
695
- "Ô": 636,
696
- "##Ô": 637,
697
- "Ġï": 638,
698
- "ï": 639,
699
- "##ï": 640,
700
- "ĠP": 641,
701
- "P": 642,
702
- "##P": 643,
703
- "ĠĤ": 644,
704
- "Ĥ": 645,
705
- "##Ĥ": 646,
706
- "Ġp": 647,
707
- "p": 648,
708
- "##p": 649,
709
- "Ġ": 650,
710
- "##Ġ": 651,
711
- "\\": 652,
712
- "##\\": 653,
713
- "ĠÖ": 654,
714
- "Ö": 655,
715
- "##Ö": 656,
716
- "<": 657,
717
- "##<": 658,
718
- "¢": 659,
719
- "##¢": 660,
720
- "Ġå": 661,
721
- "å": 662,
722
- "##å": 663,
723
- "ĠČ": 664,
724
- "Č": 665,
725
- "##Č": 666,
726
- "Ġê": 667,
727
- "ê": 668,
728
- "##ê": 669,
729
- "ĠN": 670,
730
- "N": 671,
731
- "##N": 672,
732
- "7": 673,
733
- "##7": 674,
734
- "ĠĞ": 675,
735
- "Ğ": 676,
736
- "##Ğ": 677,
737
- "Ġĺ": 678,
738
- "ĺ": 679,
739
- "##ĺ": 680,
740
- "\"": 681,
741
- "##\"": 682,
742
- "¡": 683,
743
- "##¡": 684,
744
- "Ġl": 685,
745
- "l": 686,
746
- "##l": 687,
747
- "ĠĽ": 688,
748
- "Ľ": 689,
749
- "##Ľ": 690,
750
- "Ġi": 691,
751
- "i": 692,
752
- "##i": 693,
753
- "6": 694,
754
- "##6": 695,
755
- "÷": 696,
756
- "##÷": 697,
757
- "<|unk|>": 698,
758
- "##ng": 699,
759
- "##pi": 700,
760
- "##ó": 701,
761
- "##ħ": 702,
762
- "##in": 703,
763
- "##é": 704,
764
- "##ga": 705,
765
- "##ÃŃ": 706,
766
- "##ku": 707,
767
- "##an": 708,
768
- "##ah": 709,
769
- "##as": 710,
770
- "##ºŃ": 711,
771
- "##bo": 712,
772
- "##ka": 713,
773
- "##iÃ": 714,
774
- "##og": 715,
775
- "##ag": 716,
776
- "##sa": 717,
777
- "##ik": 718,
778
- "##de": 719,
779
- "##ad": 720,
780
- "##asa": 721,
781
- "##mi": 722,
782
- "##al": 723,
783
- "##³w": 724,
784
- "##nt": 725,
785
- "##ÙĦ": 726,
786
- "##ibo": 727,
787
- "##ro": 728,
788
- "##ak": 729,
789
- "##os": 730,
790
- "##id": 731,
791
- "##ĢĻ": 732,
792
- "##§h": 733,
793
- "##ĊĊ": 734,
794
- "##ig": 735,
795
- "##00": 736,
796
- ":/": 737,
797
- "##ir": 738,
798
- "##ib": 739,
799
- "##op": 740,
800
- "##§Ã": 741,
801
- "##ab": 742,
802
- "##la": 743,
803
- "##ul": 744,
804
- "##ż": 745,
805
- "##es": 746,
806
- "##lag": 747,
807
- "##na": 748,
808
- "##da": 749,
809
- "##ina": 750,
810
- "##ta": 751,
811
- "##ac": 752,
812
- "##ar": 753,
813
- "##ep": 754,
814
- "##Ãł": 755,
815
- "##tro": 756,
816
- "##en": 757,
817
- "##tz": 758,
818
- "##§a": 759,
819
- "##©s": 760,
820
- "##á": 761,
821
- "##pan": 762,
822
- "##em": 763,
823
- "##¹´": 764,
824
- "##it": 765,
825
- "##ä": 766,
826
- "##lo": 767,
827
- "##ala": 768,
828
- "##ba": 769,
829
- "##te": 770,
830
- "##//": 771,
831
- "##aka": 772,
832
- "##ri": 773,
833
- "##hi": 774,
834
- "##at": 775,
835
- "##ti": 776,
836
- "##¼j": 777,
837
- "##ö": 778,
838
- "##ra": 779,
839
- "##ang": 780,
840
- "##ten": 781,
841
- "##ahi": 782,
842
- "##ko": 783,
843
- "##pa": 784,
844
- "##oz": 785,
845
- "##un": 786,
846
- "##ı": 787,
847
- "##lu": 788,
848
- "##sar": 789,
849
- "##er": 790,
850
- "##ngan": 791,
851
- "##zten": 792,
852
- "##im": 793,
853
- "##re": 794,
854
- "##mu": 795,
855
- "##to": 796,
856
- "##eh": 797,
857
- "##tr": 798,
858
- "##aba": 799,
859
- "##ĦØ": 800,
860
- "##zt": 801,
861
- "##gs": 802,
862
- "##ts": 803,
863
- "##gsa": 804,
864
- "##rr": 805,
865
- "##him": 806,
866
- "##den": 807,
867
- "##ngang": 808,
868
- "##za": 809,
869
- "##Ä¡": 810,
870
- "##ao": 811,
871
- "##asad": 812,
872
- "##alag": 813,
873
- "##rt": 814,
874
- "##dp": 815,
875
- "##ire": 816,
876
- "##ek": 817,
877
- "##ió": 818,
878
- "##az": 819,
879
- "##et": 820,
880
- "##á»": 821,
881
- "##np": 822,
882
- "##err": 823,
883
- "##ki": 824,
884
- "##ni": 825,
885
- "##il": 826,
886
- "##ho": 827,
887
- "##sad": 828,
888
- "##gan": 829,
889
- "##on": 830,
890
- "##bi": 831,
891
- "##kar": 832,
892
- "##ĪÙ": 833,
893
- "##nd": 834,
894
- "##³n": 835,
895
- "##¶m": 836,
896
- "##ahin": 837,
897
- "##aci": 838,
898
- "##ue": 839,
899
- "##or": 840,
900
- "##rri": 841,
901
- "##ny": 842,
902
- "##od": 843,
903
- "##aga": 844,
904
- "##¶z": 845,
905
- "##ert": 846,
906
- "##ou": 847,
907
- "##is": 848,
908
- "##dl": 849,
909
- "an": 850,
910
- "##bab": 851,
911
- "##ci": 852,
912
- "##Ñı": 853,
913
- "##¡s": 854,
914
- "##iy": 855,
915
- "##ren": 856,
916
- "##±Å": 857,
917
- "##gw": 858,
918
- "##jo": 859,
919
- "##©g": 860,
920
- "##erri": 861,
921
- "##sÃ": 862,
922
- "Ġda": 863,
923
- "##nai": 864,
924
- "##oa": 865,
925
- "##big": 866,
926
- "##ÙĦØ": 867,
927
- "##kÃ": 868,
928
- "##ÅŁ": 869,
929
- "##ch": 870,
930
- "##ros": 871,
931
- "##do": 872,
932
- "##el": 873,
933
- "20": 874,
934
- "##ĊĊĊĊ": 875,
935
- "##eka": 876,
936
- "##ĭi": 877,
937
- "##..": 878,
938
- "##iren": 879,
939
- "##tu": 880,
940
- "##̬": 881,
941
- "##ags": 882,
942
- "##dla": 883,
943
- "##aw": 884,
944
- "##tod": 885,
945
- "##iz": 886,
946
- "##uk": 887,
947
- "##¡r": 888,
948
- "##li": 889,
949
- "##kk": 890,
950
- "##ijĺ": 891,
951
- "Ġsa": 892,
952
- "##¤n": 893,
953
- "##agat": 894,
954
- "##hin": 895,
955
- "##br": 896,
956
- "##ila": 897,
957
- "##rts": 898,
958
- "##ako": 899,
959
- "##ur": 900,
960
- "##le": 901,
961
- "##ai": 902,
962
- "##ªu": 903,
963
- "##Ã¥": 904,
964
- "##¡g": 905,
965
- "##ay": 906,
966
- "##ke": 907,
967
- "##adp": 908,
968
- "##bag": 909,
969
- "##¼jo": 910,
970
- "##ia": 911,
971
- "##ale": 912,
972
- "##ºŃt": 913,
973
- "##ha": 914,
974
- "##me": 915,
975
- "##alo": 916,
976
- "##ug": 917,
977
- "##po": 918,
978
- "##dto": 919,
979
- "##au": 920,
980
- "##wa": 921,
981
- "##ie": 922,
982
- "##bot": 923,
983
- "##ma": 924,
984
- "##inai": 925,
985
- "##gang": 926,
986
- "##º©": 927,
987
- "##gi": 928,
988
- "ĊĊ": 929,
989
- "##epa": 930,
990
- "##gat": 931,
991
- "##ong": 932,
992
- "##tso": 933,
993
- "##erts": 934,
994
- "##rÃ": 935,
995
- "##ez": 936,
996
- "##êu": 937,
997
- "##yo": 938,
998
- "##»į": 939,
999
- "##um": 940,
1000
- "##ss": 941,
1001
- "##Ńpo": 942,
1002
- "##am": 943,
1003
- "##ŁÄ": 944,
1004
- "##ae": 945,
1005
- "##oga": 946,
1006
- "##kö": 947,
1007
- "##mm": 948,
1008
- "##ero": 949,
1009
- "##ll": 950,
1010
- "##mb": 951,
1011
- "##us": 952,
1012
- "##tt": 953,
1013
- "##ب": 954,
1014
- "##tÃ": 955,
1015
- "ha": 956,
1016
- "##ua": 957,
1017
- "##xx": 958,
1018
- "Ġkina": 959,
1019
- "##alags": 960,
1020
- "##mÃ": 961,
1021
- "##zj": 962,
1022
- "##eg": 963,
1023
- "##si": 964,
1024
- "##abo": 965,
1025
- "##ui": 966,
1026
- "##oÃ": 967,
1027
- "##ali": 968,
1028
- "##ate": 969,
1029
- "##ég": 970,
1030
- "##eme": 971,
1031
- "##¿t": 972,
1032
- "##ihan": 973,
1033
- "##iha": 974,
1034
- "##sh": 975,
1035
- "##nda": 976,
1036
- "##ing": 977,
1037
- "##sy": 978,
1038
- "##±z": 979,
1039
- "##nh": 980,
1040
- "##ebi": 981,
1041
- "##hÃ": 982,
1042
- "##akar": 983,
1043
- "##gt": 984
1044
- }
1045
- }
1046
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_256000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_256000/vocab.json DELETED
@@ -1 +0,0 @@
1
- {"<|padding|>": 0, "<|endoftext|>": 1, "\u0120d": 2, "d": 3, "##d": 4, "\u0120\u0129": 5, "\u0129": 6, "##\u0129": 7, "\u0120\u00ba": 8, "\u00ba": 9, "##\u00ba": 10, "5": 11, "##5": 12, "\u0120\u0107": 13, "\u0107": 14, "##\u0107": 15, "?": 16, "##?": 17, "\u0120\u012e": 18, "\u012e": 19, "##\u012e": 20, "\u0120V": 21, "V": 22, "##V": 23, ",": 24, "##,": 25, "\u0120\u0110": 26, "\u0110": 27, "##\u0110": 28, "\u0120U": 29, "U": 30, "##U": 31, "\u0120\u00e4": 32, "\u00e4": 33, "##\u00e4": 34, "`": 35, "##`": 36, "\u0120\u00dc": 37, "\u00dc": 38, "##\u00dc": 39, "\u0120k": 40, "k": 41, "##k": 42, "\u0120\u00e2": 43, "\u00e2": 44, "##\u00e2": 45, "\u0120b": 46, "b": 47, "##b": 48, "@": 49, "##@": 50, "\u0120\u00e6": 51, "\u00e6": 52, "##\u00e6": 53, "\u00be": 54, "##\u00be": 55, "\u0120\u00fe": 56, "\u00fe": 57, "##\u00fe": 58, "\u0120j": 59, "j": 60, "##j": 61, "\u0120f": 62, "f": 63, "##f": 64, "\u0120\u00fa": 65, "\u00fa": 66, "##\u00fa": 67, "1": 68, "##1": 69, "\u0120\u00aa": 70, "\u00aa": 71, "##\u00aa": 72, "\u0120m": 73, "m": 74, "##m": 75, "$": 76, "##$": 77, "\u00a6": 78, "##\u00a6": 79, "2": 80, "##2": 81, "\u0120\u0132": 82, "\u0132": 83, "##\u0132": 84, "\u00af": 85, "##\u00af": 86, "\u0120D": 87, "D": 88, "##D": 89, "\u0120\u00ff": 90, "\u00ff": 91, "##\u00ff": 92, "\u0120\u0104": 93, "\u0104": 94, "##\u0104": 95, "}": 96, "##}": 97, "\u0120\u013b": 98, "\u013b": 99, "##\u013b": 100, ".": 101, "##.": 102, "\u00a9": 103, "##\u00a9": 104, "\u0120Y": 105, "Y": 106, "##Y": 107, "\u0120\u0116": 108, "\u0116": 109, "##\u0116": 110, "\u0120\u0140": 111, "\u0140": 112, "##\u0140": 113, "\u0120\u0119": 114, "\u0119": 115, "##\u0119": 116, "0": 117, "##0": 118, "\u0120\u00f2": 119, "\u00f2": 120, "##\u00f2": 121, "\u0120\u0111": 122, "\u0111": 123, "##\u0111": 124, "\u0120\u00d0": 125, "\u00d0": 126, "##\u00d0": 127, "\u0120\u00d8": 128, "\u00d8": 129, "##\u00d8": 130, "~": 131, "##~": 132, "\u0120\u00f3": 133, "\u00f3": 134, "##\u00f3": 135, "\u0120\u0113": 136, "\u0113": 137, "##\u0113": 138, "\u0120\u0139": 139, "\u0139": 140, "##\u0139": 141, "\u00ae": 142, "##\u00ae": 143, "\u0120\u00db": 144, "\u00db": 145, "##\u00db": 146, "\u0120\u010b": 147, "\u010b": 148, "##\u010b": 149, "\u0120\u00f0": 150, "\u00f0": 151, "##\u00f0": 152, "\u0120\u0133": 153, "\u0133": 154, "##\u0133": 155, "\u0120s": 156, "s": 157, "##s": 158, "\u0120q": 159, "q": 160, "##q": 161, "\u00ab": 162, "##\u00ab": 163, "\u0120R": 164, "R": 165, "##R": 166, "\u0120M": 167, "M": 168, "##M": 169, "\u00b2": 170, "##\u00b2": 171, "\u00b0": 172, "##\u00b0": 173, "\u0120c": 174, "c": 175, "##c": 176, "\u00a3": 177, "##\u00a3": 178, "\u00a5": 179, "##\u00a5": 180, "\u0120C": 181, "C": 182, "##C": 183, "\u0120T": 184, "T": 185, "##T": 186, "\u0120\u00de": 187, "\u00de": 188, "##\u00de": 189, "\u0120n": 190, "n": 191, "##n": 192, "\u0120\u0102": 193, "\u0102": 194, "##\u0102": 195, "\u0120\u0137": 196, "\u0137": 197, "##\u0137": 198, "\u00bf": 199, "##\u00bf": 200, "|": 201, "##|": 202, "\u0120Z": 203, "Z": 204, "##Z": 205, "\u0120\u00fc": 206, "\u00fc": 207, "##\u00fc": 208, "\u0120\u0128": 209, "\u0128": 210, "##\u0128": 211, "\u0120\u0123": 212, "\u0123": 213, "##\u0123": 214, "\u0120\u00cc": 215, "\u00cc": 216, "##\u00cc": 217, "\u00bd": 218, "##\u00bd": 219, "\u00bb": 220, "##\u00bb": 221, "\u00b8": 222, "##\u00b8": 223, "\u0120G": 224, "G": 225, "##G": 226, "\u0120\u00d2": 227, "\u00d2": 228, "##\u00d2": 229, "\u00d7": 230, "##\u00d7": 231, "\u0120\u0103": 232, "\u0103": 233, "##\u0103": 234, "\u0120\u012a": 235, "\u012a": 236, "##\u012a": 237, "\u0120\u00ee": 238, "\u00ee": 239, "##\u00ee": 240, "\u0120g": 241, "g": 242, "##g": 243, "\u00a4": 244, "##\u00a4": 245, "\u0120\u00c7": 246, "\u00c7": 247, "##\u00c7": 248, "4": 249, "##4": 250, "\u00b4": 251, "##\u00b4": 252, "\u00a7": 253, "##\u00a7": 254, "\u0120\u00c8": 255, "\u00c8": 256, "##\u00c8": 257, "\u0120\u00e8": 258, "\u00e8": 259, "##\u00e8": 260, "\u0120t": 261, "t": 262, "##t": 263, "\u0120\u0101": 264, "\u0101": 265, "##\u0101": 266, "{": 267, "##{": 268, "\u0120A": 269, "A": 270, "##A": 271, "\u0120\u0122": 272, "\u0122": 273, "##\u0122": 274, "\u0120\u013c": 275, "\u013c": 276, "##\u013c": 277, "#": 278, "###": 279, "\u0120I": 280, "I": 281, "##I": 282, "\u0120\u00d5": 283, "\u00d5": 284, "##\u00d5": 285, "\u0120\u00e1": 286, "\u00e1": 287, "##\u00e1": 288, "\u00b1": 289, "##\u00b1": 290, "\u0120\u011f": 291, "\u011f": 292, "##\u011f": 293, "\u0120\u00c9": 294, "\u00c9": 295, "##\u00c9": 296, "\u0120\u0106": 297, "\u0106": 298, "##\u0106": 299, "\u0120\u0134": 300, "\u0134": 301, "##\u0134": 302, "\u0120\u00f9": 303, "\u00f9": 304, "##\u00f9": 305, "]": 306, "##]": 307, "\u0120\u00fb": 308, "\u00fb": 309, "##\u00fb": 310, "\u0120\u00d3": 311, "\u00d3": 312, "##\u00d3": 313, "\u0120\u011c": 314, "\u011c": 315, "##\u011c": 316, "\u0120y": 317, "y": 318, "##y": 319, "\u00b3": 320, "##\u00b3": 321, "[": 322, "##[": 323, "\u0120\u0127": 324, "\u0127": 325, "##\u0127": 326, "\u0120\u0138": 327, "\u0138": 328, "##\u0138": 329, "\u0120\u0141": 330, "\u0141": 331, "##\u0141": 332, "\u0120\u012f": 333, "\u012f": 334, "##\u012f": 335, "_": 336, "##_": 337, ":": 338, "##:": 339, "-": 340, "##-": 341, "\u0120W": 342, "W": 343, "##W": 344, "\u0120\u00c1": 345, "\u00c1": 346, "##\u00c1": 347, "\u0120\u00e3": 348, "\u00e3": 349, "##\u00e3": 350, "\u0120\u00c6": 351, "\u00c6": 352, "##\u00c6": 353, "\u0120\u011a": 354, "\u011a": 355, "##\u011a": 356, "\u0120\u0125": 357, "\u0125": 358, "##\u0125": 359, "\u00a8": 360, "##\u00a8": 361, "\u0120L": 362, "L": 363, "##L": 364, "\u0120\u00f4": 365, "\u00f4": 366, "##\u00f4": 367, "\u0120\u0135": 368, "\u0135": 369, "##\u0135": 370, "\u0120\u0126": 371, "\u0126": 372, "##\u0126": 373, "\u0120\u00c0": 374, "\u00c0": 375, "##\u00c0": 376, "\u0120\u00ce": 377, "\u00ce": 378, "##\u00ce": 379, "\u0120\u0100": 380, "\u0100": 381, "##\u0100": 382, "\u0120\u0118": 383, "\u0118": 384, "##\u0118": 385, "9": 386, "##9": 387, "\u0120F": 388, "F": 389, "##F": 390, "\u0120\u010a": 391, "\u010a": 392, "##\u010a": 393, "\u0120\u00c4": 394, "\u00c4": 395, "##\u00c4": 396, "\u0120E": 397, "E": 398, "##E": 399, "\u00ac": 400, "##\u00ac": 401, "'": 402, "##'": 403, ">": 404, "##>": 405, "\u0120X": 406, "X": 407, "##X": 408, "\u0120\u0131": 409, "\u0131": 410, "##\u0131": 411, "\u0120\u010f": 412, "\u010f": 413, "##\u010f": 414, "\u0120B": 415, "B": 416, "##B": 417, "\u0120\u0105": 418, "\u0105": 419, "##\u0105": 420, "\u0120\u00c3": 421, "\u00c3": 422, "##\u00c3": 423, "\u0120o": 424, "o": 425, "##o": 426, "\u00b9": 427, "##\u00b9": 428, "\u0120a": 429, "a": 430, "##a": 431, "\u0120r": 432, "r": 433, "##r": 434, "8": 435, "##8": 436, ")": 437, "##)": 438, ";": 439, "##;": 440, "\u0120Q": 441, "Q": 442, "##Q": 443, "\u0120x": 444, "x": 445, "##x": 446, "\u00b7": 447, "##\u00b7": 448, "\u0120\u00cf": 449, "\u00cf": 450, "##\u00cf": 451, "\u0120\u00eb": 452, "\u00eb": 453, "##\u00eb": 454, "\u0120\u013f": 455, "\u013f": 456, "##\u013f": 457, "\u0120\u00f6": 458, "\u00f6": 459, "##\u00f6": 460, "\u0120\u0114": 461, "\u0114": 462, "##\u0114": 463, "\u0120\u00e7": 464, "\u00e7": 465, "##\u00e7": 466, "\u0120u": 467, "u": 468, "##u": 469, "\u0120\u013e": 470, "\u013e": 471, "##\u013e": 472, "\u0120\u00e9": 473, "\u00e9": 474, "##\u00e9": 475, "\u0120\u00e0": 476, "\u00e0": 477, "##\u00e0": 478, "\u0120\u00f1": 479, "\u00f1": 480, "##\u00f1": 481, "3": 482, "##3": 483, "\u0120\u0109": 484, "\u0109": 485, "##\u0109": 486, "\u0120\u0142": 487, "\u0142": 488, "##\u0142": 489, "\u0120z": 490, "z": 491, "##z": 492, "\u0120\u00ed": 493, "\u00ed": 494, "##\u00ed": 495, "\u0120w": 496, "w": 497, "##w": 498, "\u0120\u0136": 499, "\u0136": 500, "##\u0136": 501, "\u0120\u0143": 502, "\u0143": 503, "##\u0143": 504, "\u0120\u00da": 505, "\u00da": 506, "##\u00da": 507, "+": 508, "##+": 509, "\u0120\u00ca": 510, "\u00ca": 511, "##\u00ca": 512, "\u0120\u0115": 513, "\u0115": 514, "##\u0115": 515, "\u00bc": 516, "##\u00bc": 517, "\u0120\u010e": 518, "\u010e": 519, "##\u010e": 520, "\u0120\u0117": 521, "\u0117": 522, "##\u0117": 523, "\u0120\u00c5": 524, "\u00c5": 525, "##\u00c5": 526, "\u0120\u00df": 527, "\u00df": 528, "##\u00df": 529, "\u0120J": 530, "J": 531, "##J": 532, "\u0120\u00b5": 533, "\u00b5": 534, "##\u00b5": 535, "\u0120\u012c": 536, "\u012c": 537, "##\u012c": 538, "\u0120\u00fd": 539, "\u00fd": 540, "##\u00fd": 541, "=": 542, "##=": 543, "\u0120\u00dd": 544, "\u00dd": 545, "##\u00dd": 546, "\u0120\u011b": 547, "\u011b": 548, "##\u011b": 549, "\u0120\u010d": 550, "\u010d": 551, "##\u010d": 552, "\u0120K": 553, "K": 554, "##K": 555, "\u0120O": 556, "O": 557, "##O": 558, "\u0120\u00d1": 559, "\u00d1": 560, "##\u00d1": 561, "^": 562, "##^": 563, "*": 564, "##*": 565, "\u0120\u0112": 566, "\u0112": 567, "##\u0112": 568, "!": 569, "##!": 570, "\u0120\u012d": 571, "\u012d": 572, "##\u012d": 573, "\u0120S": 574, "S": 575, "##S": 576, "\u0120\u0130": 577, "\u0130": 578, "##\u0130": 579, "\u0120\u00f8": 580, "\u00f8": 581, "##\u00f8": 582, "\u0120\u00cd": 583, "\u00cd": 584, "##\u00cd": 585, "\u0120H": 586, "H": 587, "##H": 588, "\u0120\u00ec": 589, "\u00ec": 590, "##\u00ec": 591, "\u0120e": 592, "e": 593, "##e": 594, "\u00b6": 595, "##\u00b6": 596, "(": 597, "##(": 598, "\u0120\u00f5": 599, "\u00f5": 600, "##\u00f5": 601, "\u0120\u0108": 602, "\u0108": 603, "##\u0108": 604, "\u0120\u012b": 605, "\u012b": 606, "##\u012b": 607, "\u0120\u0121": 608, "\u0121": 609, "##\u0121": 610, "/": 611, "##/": 612, "\u0120\u00c2": 613, "\u00c2": 614, "##\u00c2": 615, "\u0120v": 616, "v": 617, "##v": 618, "%": 619, "##%": 620, "\u0120\u00d9": 621, "\u00d9": 622, "##\u00d9": 623, "&": 624, "##&": 625, "\u0120\u00cb": 626, "\u00cb": 627, "##\u00cb": 628, "\u0120h": 629, "h": 630, "##h": 631, "\u0120\u011d": 632, "\u011d": 633, "##\u011d": 634, "\u0120\u00d4": 635, "\u00d4": 636, "##\u00d4": 637, "\u0120\u00ef": 638, "\u00ef": 639, "##\u00ef": 640, "\u0120P": 641, "P": 642, "##P": 643, "\u0120\u0124": 644, "\u0124": 645, "##\u0124": 646, "\u0120p": 647, "p": 648, "##p": 649, "\u0120": 650, "##\u0120": 651, "\\": 652, "##\\": 653, "\u0120\u00d6": 654, "\u00d6": 655, "##\u00d6": 656, "<": 657, "##<": 658, "\u00a2": 659, "##\u00a2": 660, "\u0120\u00e5": 661, "\u00e5": 662, "##\u00e5": 663, "\u0120\u010c": 664, "\u010c": 665, "##\u010c": 666, "\u0120\u00ea": 667, "\u00ea": 668, "##\u00ea": 669, "\u0120N": 670, "N": 671, "##N": 672, "7": 673, "##7": 674, "\u0120\u011e": 675, "\u011e": 676, "##\u011e": 677, "\u0120\u013a": 678, "\u013a": 679, "##\u013a": 680, "\"": 681, "##\"": 682, "\u00a1": 683, "##\u00a1": 684, "\u0120l": 685, "l": 686, "##l": 687, "\u0120\u013d": 688, "\u013d": 689, "##\u013d": 690, "\u0120i": 691, "i": 692, "##i": 693, "6": 694, "##6": 695, "\u00f7": 696, "##\u00f7": 697, "<|unk|>": 698, "##ng": 699, "##pi": 700, "##\u00c3\u00b3": 701, "##\u00c4\u00a7": 702, "##in": 703, "##\u00c3\u00a9": 704, "##ga": 705, "##\u00c3\u0143": 706, "##ku": 707, "##an": 708, "##ah": 709, "##as": 710, "##\u00ba\u0143": 711, "##bo": 712, "##ka": 713, "##i\u00c3": 714, "##og": 715, "##ag": 716, "##sa": 717, "##ik": 718, "##de": 719, "##ad": 720, "##asa": 721, "##mi": 722, "##al": 723, "##\u00b3w": 724, "##nt": 725, "##\u00d9\u0126": 726, "##ibo": 727, "##ro": 728, "##ak": 729, "##os": 730, "##id": 731, "##\u0122\u013b": 732, "##\u00a7h": 733, "##\u010a\u010a": 734, "##ig": 735, "##00": 736, ":/": 737, "##ir": 738, "##ib": 739, "##op": 740, "##\u00a7\u00c3": 741, "##ab": 742, "##la": 743, "##ul": 744, "##\u00c5\u00bc": 745, "##es": 746, "##lag": 747, "##na": 748, "##da": 749, "##ina": 750, "##ta": 751, "##ac": 752, "##ar": 753, "##ep": 754, "##\u00c3\u0142": 755, "##tro": 756, "##en": 757, "##tz": 758, "##\u00a7a": 759, "##\u00a9s": 760, "##\u00c3\u00a1": 761, "##pan": 762, "##em": 763, "##\u00b9\u00b4": 764, "##it": 765, "##\u00c3\u00a4": 766, "##lo": 767, "##ala": 768, "##ba": 769, "##te": 770, "##//": 771, "##aka": 772, "##ri": 773, "##hi": 774, "##at": 775, "##ti": 776, "##\u00bcj": 777, "##\u00c3\u00b6": 778, "##ra": 779, "##ang": 780, "##ten": 781, "##ahi": 782, "##ko": 783, "##pa": 784, "##oz": 785, "##un": 786, "##\u00c4\u00b1": 787, "##lu": 788, "##sar": 789, "##er": 790, "##ngan": 791, "##zten": 792, "##im": 793, "##re": 794, "##mu": 795, "##to": 796, "##eh": 797, "##tr": 798, "##aba": 799, "##\u0126\u00d8": 800, "##zt": 801, "##gs": 802, "##ts": 803, "##gsa": 804, "##rr": 805, "##him": 806, "##den": 807, "##ngang": 808, "##za": 809, "##\u00c4\u00a1": 810, "##ao": 811, "##asad": 812, "##alag": 813, "##rt": 814, "##dp": 815, "##ire": 816, "##ek": 817, "##i\u00c3\u00b3": 818, "##az": 819, "##et": 820, "##\u00e1\u00bb": 821, "##np": 822, "##err": 823, "##ki": 824, "##ni": 825, "##il": 826, "##ho": 827, "##sad": 828, "##gan": 829, "##on": 830, "##bi": 831, "##kar": 832, "##\u012a\u00d9": 833, "##nd": 834, "##\u00b3n": 835, "##\u00b6m": 836, "##ahin": 837, "##aci": 838, "##ue": 839, "##or": 840, "##rri": 841, "##ny": 842, "##od": 843, "##aga": 844, "##\u00b6z": 845, "##ert": 846, "##ou": 847, "##is": 848, "##dl": 849, "an": 850, "##bab": 851, "##ci": 852, "##\u00d1\u0131": 853, "##\u00a1s": 854, "##iy": 855, "##ren": 856, "##\u00b1\u00c5": 857, "##gw": 858, "##jo": 859, "##\u00a9g": 860, "##erri": 861, "##s\u00c3": 862, "\u0120da": 863, "##nai": 864, "##oa": 865, "##big": 866, "##\u00d9\u0126\u00d8": 867, "##k\u00c3": 868, "##\u00c5\u0141": 869, "##ch": 870, "##ros": 871, "##do": 872, "##el": 873, "20": 874, "##\u010a\u010a\u010a\u010a": 875, "##eka": 876, "##\u012di": 877, "##..": 878, "##iren": 879, "##tu": 880, "##\u0122\u00ac": 881, "##ags": 882, "##dla": 883, "##aw": 884, "##tod": 885, "##iz": 886, "##uk": 887, "##\u00a1r": 888, "##li": 889, "##kk": 890, "##\u0133\u013a": 891, "\u0120sa": 892, "##\u00a4n": 893, "##agat": 894, "##hin": 895, "##br": 896, "##ila": 897, "##rts": 898, "##ako": 899, "##ur": 900, "##le": 901, "##ai": 902, "##\u00aau": 903, "##\u00c3\u00a5": 904, "##\u00a1g": 905, "##ay": 906, "##ke": 907, "##adp": 908, "##bag": 909, "##\u00bcjo": 910, "##ia": 911, "##ale": 912, "##\u00ba\u0143t": 913, "##ha": 914, "##me": 915, "##alo": 916, "##ug": 917, "##po": 918, "##dto": 919, "##au": 920, "##wa": 921, "##ie": 922, "##bot": 923, "##ma": 924, "##inai": 925, "##gang": 926, "##\u00ba\u00a9": 927, "##gi": 928, "\u010a\u010a": 929, "##epa": 930, "##gat": 931, "##ong": 932, "##tso": 933, "##erts": 934, "##r\u00c3": 935, "##ez": 936, "##\u00c3\u00aau": 937, "##yo": 938, "##\u00bb\u012f": 939, "##um": 940, "##ss": 941, "##\u0143po": 942, "##am": 943, "##\u0141\u00c4": 944, "##ae": 945, "##oga": 946, "##k\u00c3\u00b6": 947, "##mm": 948, "##ero": 949, "##ll": 950, "##mb": 951, "##us": 952, "##tt": 953, "##\u00d8\u00a8": 954, "##t\u00c3": 955, "ha": 956, "##ua": 957, "##xx": 958, "\u0120kina": 959, "##alags": 960, "##m\u00c3": 961, "##zj": 962, "##eg": 963, "##si": 964, "##abo": 965, "##ui": 966, "##o\u00c3": 967, "##ali": 968, "##ate": 969, "##\u00c3\u00a9g": 970, "##eme": 971, "##\u00bft": 972, "##ihan": 973, "##iha": 974, "##sh": 975, "##nda": 976, "##ing": 977, "##sy": 978, "##\u00b1z": 979, "##nh": 980, "##ebi": 981, "##h\u00c3": 982, "##akar": 983, "##gt": 984}
 
 
fw57Mmulti_Entropy_threshold_32000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_32000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_32000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_32000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_32000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_64000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_64000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_64000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_64000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_64000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_8064/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_8064/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_8064/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Entropy_threshold_8064/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Entropy_threshold_8064/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_128000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_128000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_128000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_128000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_128000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_16000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_16000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_16000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_16000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_16000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_256000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_256000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_256000/tokenizer.json DELETED
@@ -1,1390 +0,0 @@
1
- {
2
- "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
5
- "added_tokens": [
6
- {
7
- "id": 0,
8
- "content": "<|padding|>",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
- {
16
- "id": 1,
17
- "content": "<|endoftext|>",
18
- "single_word": false,
19
- "lstrip": false,
20
- "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
- },
24
- {
25
- "id": 698,
26
- "content": "<|unk|>",
27
- "single_word": false,
28
- "lstrip": false,
29
- "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
- }
33
- ],
34
- "normalizer": null,
35
- "pre_tokenizer": {
36
- "type": "ByteLevel",
37
- "add_prefix_space": true,
38
- "trim_offsets": true,
39
- "use_regex": true
40
- },
41
- "post_processor": {
42
- "type": "ByteLevel",
43
- "add_prefix_space": true,
44
- "trim_offsets": true,
45
- "use_regex": true
46
- },
47
- "decoder": {
48
- "type": "ByteLevel",
49
- "add_prefix_space": true,
50
- "trim_offsets": true,
51
- "use_regex": true
52
- },
53
- "model": {
54
- "type": "WordPiece",
55
- "unk_token": "<|unk|>",
56
- "continuing_subword_prefix": "##",
57
- "max_input_chars_per_word": 100,
58
- "vocab": {
59
- "<|padding|>": 0,
60
- "<|endoftext|>": 1,
61
- "Ġõ": 2,
62
- "õ": 3,
63
- "##õ": 4,
64
- "Ġö": 5,
65
- "ö": 6,
66
- "##ö": 7,
67
- "Ġª": 8,
68
- "ª": 9,
69
- "##ª": 10,
70
- "ĠĽ": 11,
71
- "Ľ": 12,
72
- "##Ľ": 13,
73
- "ĠĐ": 14,
74
- "Đ": 15,
75
- "##Đ": 16,
76
- "`": 17,
77
- "##`": 18,
78
- ".": 19,
79
- "##.": 20,
80
- "ĠO": 21,
81
- "O": 22,
82
- "##O": 23,
83
- "®": 24,
84
- "##®": 25,
85
- "½": 26,
86
- "##½": 27,
87
- "ĠF": 28,
88
- "F": 29,
89
- "##F": 30,
90
- "Ġá": 31,
91
- "á": 32,
92
- "##á": 33,
93
- "Ġø": 34,
94
- "ø": 35,
95
- "##ø": 36,
96
- "ĠT": 37,
97
- "T": 38,
98
- "##T": 39,
99
- "\"": 40,
100
- "##\"": 41,
101
- "9": 42,
102
- "##9": 43,
103
- "¥": 44,
104
- "##¥": 45,
105
- "¸": 46,
106
- "##¸": 47,
107
- "_": 48,
108
- "##_": 49,
109
- "ĠB": 50,
110
- "B": 51,
111
- "##B": 52,
112
- "Ġĺ": 53,
113
- "ĺ": 54,
114
- "##ĺ": 55,
115
- "ĠÈ": 56,
116
- "È": 57,
117
- "##È": 58,
118
- "ĠÄ": 59,
119
- "Ä": 60,
120
- "##Ä": 61,
121
- "Ġķ": 62,
122
- "ķ": 63,
123
- "##ķ": 64,
124
- "¤": 65,
125
- "##¤": 66,
126
- "Ġo": 67,
127
- "o": 68,
128
- "##o": 69,
129
- "ĠĮ": 70,
130
- "Į": 71,
131
- "##Į": 72,
132
- "Ġð": 73,
133
- "ð": 74,
134
- "##ð": 75,
135
- "\\": 76,
136
- "##\\": 77,
137
- "ĠĿ": 78,
138
- "Ŀ": 79,
139
- "##Ŀ": 80,
140
- "^": 81,
141
- "##^": 82,
142
- "ĠĊ": 83,
143
- "Ċ": 84,
144
- "##Ċ": 85,
145
- "Ġj": 86,
146
- "j": 87,
147
- "##j": 88,
148
- "Ġn": 89,
149
- "n": 90,
150
- "##n": 91,
151
- "Ġ": 92,
152
- "##Ġ": 93,
153
- "Ġþ": 94,
154
- "þ": 95,
155
- "##þ": 96,
156
- "ĠD": 97,
157
- "D": 98,
158
- "##D": 99,
159
- "ĠĔ": 100,
160
- "Ĕ": 101,
161
- "##Ĕ": 102,
162
- "ĠL": 103,
163
- "L": 104,
164
- "##L": 105,
165
- "Ġå": 106,
166
- "å": 107,
167
- "##å": 108,
168
- "ĠJ": 109,
169
- "J": 110,
170
- "##J": 111,
171
- "Ġe": 112,
172
- "e": 113,
173
- "##e": 114,
174
- "Ġu": 115,
175
- "u": 116,
176
- "##u": 117,
177
- "ĠĄ": 118,
178
- "Ą": 119,
179
- "##Ą": 120,
180
- "Ġı": 121,
181
- "ı": 122,
182
- "##ı": 123,
183
- "ĠĴ": 124,
184
- "Ĵ": 125,
185
- "##Ĵ": 126,
186
- "*": 127,
187
- "##*": 128,
188
- "5": 129,
189
- "##5": 130,
190
- "ĠĖ": 131,
191
- "Ė": 132,
192
- "##Ė": 133,
193
- "ĠĎ": 134,
194
- "Ď": 135,
195
- "##Ď": 136,
196
- "ĠĢ": 137,
197
- "Ģ": 138,
198
- "##Ģ": 139,
199
- "ĠY": 140,
200
- "Y": 141,
201
- "##Y": 142,
202
- "ĠÜ": 143,
203
- "Ü": 144,
204
- "##Ü": 145,
205
- ";": 146,
206
- "##;": 147,
207
- "¿": 148,
208
- "##¿": 149,
209
- "~": 150,
210
- "##~": 151,
211
- "2": 152,
212
- "##2": 153,
213
- "´": 154,
214
- "##´": 155,
215
- "¦": 156,
216
- "##¦": 157,
217
- "ĠÀ": 158,
218
- "À": 159,
219
- "##À": 160,
220
- "Ġé": 161,
221
- "é": 162,
222
- "##é": 163,
223
- "Ġħ": 164,
224
- "ħ": 165,
225
- "##ħ": 166,
226
- "¢": 167,
227
- "##¢": 168,
228
- "ĠĚ": 169,
229
- "Ě": 170,
230
- "##Ě": 171,
231
- "Ġã": 172,
232
- "ã": 173,
233
- "##ã": 174,
234
- "Ġę": 175,
235
- "ę": 176,
236
- "##ę": 177,
237
- "(": 178,
238
- "##(": 179,
239
- "¬": 180,
240
- "##¬": 181,
241
- "«": 182,
242
- "##«": 183,
243
- "6": 184,
244
- "##6": 185,
245
- "ĠE": 186,
246
- "E": 187,
247
- "##E": 188,
248
- "£": 189,
249
- "##£": 190,
250
- "Ġg": 191,
251
- "g": 192,
252
- "##g": 193,
253
- "¹": 194,
254
- "##¹": 195,
255
- "Ġą": 196,
256
- "ą": 197,
257
- "##ą": 198,
258
- "8": 199,
259
- "##8": 200,
260
- "-": 201,
261
- "##-": 202,
262
- "¨": 203,
263
- "##¨": 204,
264
- "&": 205,
265
- "##&": 206,
266
- "Ġī": 207,
267
- "ī": 208,
268
- "##ī": 209,
269
- "Ġă": 210,
270
- "ă": 211,
271
- "##ă": 212,
272
- "ĠÊ": 213,
273
- "Ê": 214,
274
- "##Ê": 215,
275
- "ĠQ": 216,
276
- "Q": 217,
277
- "##Q": 218,
278
- "ĠĀ": 219,
279
- "Ā": 220,
280
- "##Ā": 221,
281
- "ĠIJ": 222,
282
- "IJ": 223,
283
- "##IJ": 224,
284
- "ĠÆ": 225,
285
- "Æ": 226,
286
- "##Æ": 227,
287
- "ĠV": 228,
288
- "V": 229,
289
- "##V": 230,
290
- "ĠÉ": 231,
291
- "É": 232,
292
- "##É": 233,
293
- "§": 234,
294
- "##§": 235,
295
- "Ġĝ": 236,
296
- "ĝ": 237,
297
- "##ĝ": 238,
298
- "ĠŃ": 239,
299
- "Ń": 240,
300
- "##Ń": 241,
301
- "ĠÚ": 242,
302
- "Ú": 243,
303
- "##Ú": 244,
304
- "Ġĵ": 245,
305
- "ĵ": 246,
306
- "##ĵ": 247,
307
- "Ġµ": 248,
308
- "µ": 249,
309
- "##µ": 250,
310
- "¶": 251,
311
- "##¶": 252,
312
- "ĠP": 253,
313
- "P": 254,
314
- "##P": 255,
315
- "Ġij": 256,
316
- "ij": 257,
317
- "##ij": 258,
318
- "²": 259,
319
- "##²": 260,
320
- "ĠÎ": 261,
321
- "Î": 262,
322
- "##Î": 263,
323
- "Ġâ": 264,
324
- "â": 265,
325
- "##â": 266,
326
- "ĠS": 267,
327
- "S": 268,
328
- "##S": 269,
329
- "ĠÇ": 270,
330
- "Ç": 271,
331
- "##Ç": 272,
332
- "ĠČ": 273,
333
- "Č": 274,
334
- "##Č": 275,
335
- "ĠI": 276,
336
- "I": 277,
337
- "##I": 278,
338
- "ĠH": 279,
339
- "H": 280,
340
- "##H": 281,
341
- "|": 282,
342
- "##|": 283,
343
- "ĠÝ": 284,
344
- "Ý": 285,
345
- "##Ý": 286,
346
- "Ġė": 287,
347
- "ė": 288,
348
- "##ė": 289,
349
- "×": 290,
350
- "##×": 291,
351
- "Ġx": 292,
352
- "x": 293,
353
- "##x": 294,
354
- "Ġā": 295,
355
- "ā": 296,
356
- "##ā": 297,
357
- "ĠĶ": 298,
358
- "Ķ": 299,
359
- "##Ķ": 300,
360
- "ĠĞ": 301,
361
- "Ğ": 302,
362
- "##Ğ": 303,
363
- "'": 304,
364
- "##'": 305,
365
- "Ġĩ": 306,
366
- "ĩ": 307,
367
- "##ĩ": 308,
368
- "Ġĥ": 309,
369
- "ĥ": 310,
370
- "##ĥ": 311,
371
- "Ġq": 312,
372
- "q": 313,
373
- "##q": 314,
374
- "@": 315,
375
- "##@": 316,
376
- "Ġp": 317,
377
- "p": 318,
378
- "##p": 319,
379
- "Ġw": 320,
380
- "w": 321,
381
- "##w": 322,
382
- "!": 323,
383
- "##!": 324,
384
- "#": 325,
385
- "###": 326,
386
- "Ġf": 327,
387
- "f": 328,
388
- "##f": 329,
389
- "¾": 330,
390
- "##¾": 331,
391
- "ĠÒ": 332,
392
- "Ò": 333,
393
- "##Ò": 334,
394
- "ĠÙ": 335,
395
- "Ù": 336,
396
- "##Ù": 337,
397
- "Ġç": 338,
398
- "ç": 339,
399
- "##ç": 340,
400
- ">": 341,
401
- "##>": 342,
402
- "ĠÁ": 343,
403
- "Á": 344,
404
- "##Á": 345,
405
- "Ġí": 346,
406
- "í": 347,
407
- "##í": 348,
408
- "0": 349,
409
- "##0": 350,
410
- ")": 351,
411
- "##)": 352,
412
- "Ġú": 353,
413
- "ú": 354,
414
- "##ú": 355,
415
- "]": 356,
416
- "##]": 357,
417
- "Ġñ": 358,
418
- "ñ": 359,
419
- "##ñ": 360,
420
- "Ġĭ": 361,
421
- "ĭ": 362,
422
- "##ĭ": 363,
423
- "ĠĹ": 364,
424
- "Ĺ": 365,
425
- "##Ĺ": 366,
426
- "ĠĈ": 367,
427
- "Ĉ": 368,
428
- "##Ĉ": 369,
429
- "ĠÖ": 370,
430
- "Ö": 371,
431
- "##Ö": 372,
432
- "Ġô": 373,
433
- "ô": 374,
434
- "##ô": 375,
435
- "ĠĘ": 376,
436
- "Ę": 377,
437
- "##Ę": 378,
438
- "ĠĨ": 379,
439
- "Ĩ": 380,
440
- "##Ĩ": 381,
441
- "ĠK": 382,
442
- "K": 383,
443
- "##K": 384,
444
- "Ġh": 385,
445
- "h": 386,
446
- "##h": 387,
447
- "Ġi": 388,
448
- "i": 389,
449
- "##i": 390,
450
- "Ġº": 391,
451
- "º": 392,
452
- "##º": 393,
453
- "Ġē": 394,
454
- "ē": 395,
455
- "##ē": 396,
456
- "Ġæ": 397,
457
- "æ": 398,
458
- "##æ": 399,
459
- "Ġt": 400,
460
- "t": 401,
461
- "##t": 402,
462
- "Ġî": 403,
463
- "î": 404,
464
- "##î": 405,
465
- "1": 406,
466
- "##1": 407,
467
- "Ġy": 408,
468
- "y": 409,
469
- "##y": 410,
470
- "ĠÍ": 411,
471
- "Í": 412,
472
- "##Í": 413,
473
- "ĠÂ": 414,
474
- "Â": 415,
475
- "##Â": 416,
476
- "Ġċ": 417,
477
- "ċ": 418,
478
- "##ċ": 419,
479
- "ĠØ": 420,
480
- "Ø": 421,
481
- "##Ø": 422,
482
- "ĠÌ": 423,
483
- "Ì": 424,
484
- "##Ì": 425,
485
- "ĠĻ": 426,
486
- "Ļ": 427,
487
- "##Ļ": 428,
488
- ":": 429,
489
- "##:": 430,
490
- "ĠM": 431,
491
- "M": 432,
492
- "##M": 433,
493
- "Ġa": 434,
494
- "a": 435,
495
- "##a": 436,
496
- "ĠG": 437,
497
- "G": 438,
498
- "##G": 439,
499
- "÷": 440,
500
- "##÷": 441,
501
- "Ġľ": 442,
502
- "ľ": 443,
503
- "##ľ": 444,
504
- "Ġk": 445,
505
- "k": 446,
506
- "##k": 447,
507
- "ĠW": 448,
508
- "W": 449,
509
- "##W": 450,
510
- "ĠÛ": 451,
511
- "Û": 452,
512
- "##Û": 453,
513
- "ĠA": 454,
514
- "A": 455,
515
- "##A": 456,
516
- "Ġİ": 457,
517
- "İ": 458,
518
- "##İ": 459,
519
- "ĠÞ": 460,
520
- "Þ": 461,
521
- "##Þ": 462,
522
- "Ġà": 463,
523
- "à": 464,
524
- "##à": 465,
525
- "·": 466,
526
- "##·": 467,
527
- "Ġÿ": 468,
528
- "ÿ": 469,
529
- "##ÿ": 470,
530
- "[": 471,
531
- "##[": 472,
532
- "=": 473,
533
- "##=": 474,
534
- "ĠÑ": 475,
535
- "Ñ": 476,
536
- "##Ñ": 477,
537
- "Ġč": 478,
538
- "č": 479,
539
- "##č": 480,
540
- "ĠX": 481,
541
- "X": 482,
542
- "##X": 483,
543
- "ĠË": 484,
544
- "Ë": 485,
545
- "##Ë": 486,
546
- "ĠZ": 487,
547
- "Z": 488,
548
- "##Z": 489,
549
- "ĠĂ": 490,
550
- "Ă": 491,
551
- "##Ă": 492,
552
- "ĠĬ": 493,
553
- "Ĭ": 494,
554
- "##Ĭ": 495,
555
- "Ġď": 496,
556
- "ď": 497,
557
- "##ď": 498,
558
- "Ġý": 499,
559
- "ý": 500,
560
- "##ý": 501,
561
- "+": 502,
562
- "##+": 503,
563
- "ĠU": 504,
564
- "U": 505,
565
- "##U": 506,
566
- "Ġz": 507,
567
- "z": 508,
568
- "##z": 509,
569
- "Ġü": 510,
570
- "ü": 511,
571
- "##ü": 512,
572
- "Ġģ": 513,
573
- "ģ": 514,
574
- "##ģ": 515,
575
- "»": 516,
576
- "##»": 517,
577
- "ĠÐ": 518,
578
- "Ð": 519,
579
- "##Ð": 520,
580
- "Ġġ": 521,
581
- "ġ": 522,
582
- "##ġ": 523,
583
- "Ġë": 524,
584
- "ë": 525,
585
- "##ë": 526,
586
- "ĠN": 527,
587
- "N": 528,
588
- "##N": 529,
589
- "ĠR": 530,
590
- "R": 531,
591
- "##R": 532,
592
- "Ġđ": 533,
593
- "đ": 534,
594
- "##đ": 535,
595
- "Ġê": 536,
596
- "ê": 537,
597
- "##ê": 538,
598
- "Ġò": 539,
599
- "ò": 540,
600
- "##ò": 541,
601
- "Ġû": 542,
602
- "û": 543,
603
- "##û": 544,
604
- "{": 545,
605
- "##{": 546,
606
- "Ġm": 547,
607
- "m": 548,
608
- "##m": 549,
609
- "©": 550,
610
- "##©": 551,
611
- "Ġĸ": 552,
612
- "ĸ": 553,
613
- "##ĸ": 554,
614
- "ĠĤ": 555,
615
- "Ĥ": 556,
616
- "##Ĥ": 557,
617
- "Ġä": 558,
618
- "ä": 559,
619
- "##ä": 560,
620
- "ĠÓ": 561,
621
- "Ó": 562,
622
- "##Ó": 563,
623
- "Ġù": 564,
624
- "ù": 565,
625
- "##ù": 566,
626
- "$": 567,
627
- "##$": 568,
628
- "Ġě": 569,
629
- "ě": 570,
630
- "##ě": 571,
631
- "¯": 572,
632
- "##¯": 573,
633
- "Ġs": 574,
634
- "s": 575,
635
- "##s": 576,
636
- "ĠÅ": 577,
637
- "Å": 578,
638
- "##Å": 579,
639
- "Ġv": 580,
640
- "v": 581,
641
- "##v": 582,
642
- "°": 583,
643
- "##°": 584,
644
- ",": 585,
645
- "##,": 586,
646
- "Ġó": 587,
647
- "ó": 588,
648
- "##ó": 589,
649
- "Ġŀ": 590,
650
- "ŀ": 591,
651
- "##ŀ": 592,
652
- "Ġć": 593,
653
- "ć": 594,
654
- "##ć": 595,
655
- "Ġł": 596,
656
- "ł": 597,
657
- "##ł": 598,
658
- "Ġß": 599,
659
- "ß": 600,
660
- "##ß": 601,
661
- "4": 602,
662
- "##4": 603,
663
- "ĠÔ": 604,
664
- "Ô": 605,
665
- "##Ô": 606,
666
- "Ġį": 607,
667
- "į": 608,
668
- "##į": 609,
669
- "ĠĜ": 610,
670
- "Ĝ": 611,
671
- "##Ĝ": 612,
672
- "%": 613,
673
- "##%": 614,
674
- "ĠÏ": 615,
675
- "Ï": 616,
676
- "##Ï": 617,
677
- "}": 618,
678
- "##}": 619,
679
- "Ġì": 620,
680
- "ì": 621,
681
- "##ì": 622,
682
- "Ġļ": 623,
683
- "ļ": 624,
684
- "##ļ": 625,
685
- "ĠĒ": 626,
686
- "Ē": 627,
687
- "##Ē": 628,
688
- "¡": 629,
689
- "##¡": 630,
690
- "ĠÃ": 631,
691
- "Ã": 632,
692
- "##Ã": 633,
693
- "ĠÕ": 634,
694
- "Õ": 635,
695
- "##Õ": 636,
696
- "Ġĕ": 637,
697
- "ĕ": 638,
698
- "##ĕ": 639,
699
- "ĠĦ": 640,
700
- "Ħ": 641,
701
- "##Ħ": 642,
702
- "ĠĪ": 643,
703
- "Ī": 644,
704
- "##Ī": 645,
705
- "Ġr": 646,
706
- "r": 647,
707
- "##r": 648,
708
- "ĠĆ": 649,
709
- "Ć": 650,
710
- "##Ć": 651,
711
- "Ġğ": 652,
712
- "ğ": 653,
713
- "##ğ": 654,
714
- "Ġc": 655,
715
- "c": 656,
716
- "##c": 657,
717
- "ĠC": 658,
718
- "C": 659,
719
- "##C": 660,
720
- "<": 661,
721
- "##<": 662,
722
- "¼": 663,
723
- "##¼": 664,
724
- "Ġĉ": 665,
725
- "ĉ": 666,
726
- "##ĉ": 667,
727
- "Ġl": 668,
728
- "l": 669,
729
- "##l": 670,
730
- "3": 671,
731
- "##3": 672,
732
- "?": 673,
733
- "##?": 674,
734
- "±": 675,
735
- "##±": 676,
736
- "Ġï": 677,
737
- "ï": 678,
738
- "##ï": 679,
739
- "³": 680,
740
- "##³": 681,
741
- "Ġb": 682,
742
- "b": 683,
743
- "##b": 684,
744
- "Ġd": 685,
745
- "d": 686,
746
- "##d": 687,
747
- "Ġè": 688,
748
- "è": 689,
749
- "##è": 690,
750
- "ĠŁ": 691,
751
- "Ł": 692,
752
- "##Ł": 693,
753
- "7": 694,
754
- "##7": 695,
755
- "/": 696,
756
- "##/": 697,
757
- "<|unk|>": 698,
758
- "##ä": 699,
759
- "##äÃ": 700,
760
- "##it": 701,
761
- "##an": 702,
762
- "##as": 703,
763
- "##asa": 704,
764
- "##ge": 705,
765
- "##de": 706,
766
- "##bi": 707,
767
- "##ga": 708,
768
- "##ta": 709,
769
- "##ang": 710,
770
- "##pa": 711,
771
- "##le": 712,
772
- "##ng": 713,
773
- "##na": 714,
774
- "##ina": 715,
775
- ":/": 716,
776
- "##en": 717,
777
- "##ra": 718,
778
- "##den": 719,
779
- "##ro": 720,
780
- "##on": 721,
781
- "##bo": 722,
782
- "##ibo": 723,
783
- "##ci": 724,
784
- "##ul": 725,
785
- "##ºŃ": 726,
786
- "##mu": 727,
787
- "##in": 728,
788
- "##ma": 729,
789
- "##ah": 730,
790
- "##ar": 731,
791
- "##ai": 732,
792
- "##nai": 733,
793
- "##aci": 734,
794
- "##sa": 735,
795
- "##inai": 736,
796
- "##ba": 737,
797
- "##uo": 738,
798
- "##bab": 739,
799
- "##ir": 740,
800
- "##ire": 741,
801
- "##baba": 742,
802
- "##¶Ã": 743,
803
- "##iz": 744,
804
- "##ni": 745,
805
- "##ri": 746,
806
- "##rri": 747,
807
- "##erri": 748,
808
- "##gan": 749,
809
- "##nt": 750,
810
- "##te": 751,
811
- "##ab": 752,
812
- "##gs": 753,
813
- "##ags": 754,
814
- "##zt": 755,
815
- "##zte": 756,
816
- "##zten": 757,
817
- "##la": 758,
818
- "##aba": 759,
819
- "##tr": 760,
820
- "##tro": 761,
821
- "##eh": 762,
822
- "##ahi": 763,
823
- "##ahin": 764,
824
- "##ep": 765,
825
- "##ion": 766,
826
- "##00": 767,
827
- "##pi": 768,
828
- "##re": 769,
829
- "##ö": 770,
830
- "##ue": 771,
831
- "##ak": 772,
832
- "##..": 773,
833
- "##ia": 774,
834
- "##ssa": 775,
835
- "##at": 776,
836
- "##ob": 777,
837
- "##ag": 778,
838
- "##aga": 779,
839
- "##so": 780,
840
- "##dl": 781,
841
- "##rt": 782,
842
- "##tt": 783,
843
- "##¼l": 784,
844
- "##Ńt": 785,
845
- "##ºŃt": 786,
846
- "##od": 787,
847
- "##um": 788,
848
- "##gang": 789,
849
- "##ngang": 790,
850
- "##lo": 791,
851
- "##bre": 792,
852
- "##tj": 793,
853
- "##abi": 794,
854
- "##ik": 795,
855
- "##ros": 796,
856
- "##bit": 797,
857
- "##ie": 798,
858
- "##ien": 799,
859
- "##ap": 800,
860
- "##og": 801,
861
- "##¡l": 802,
862
- "##ek": 803,
863
- "##eka": 804,
864
- "##etro": 805,
865
- "##ay": 806,
866
- "##baga": 807,
867
- "##abaga": 808,
868
- "##au": 809,
869
- "##hi": 810,
870
- "##him": 811,
871
- "##hin": 812,
872
- "##zi": 813,
873
- "##ÃŃ": 814,
874
- "##to": 815,
875
- "##ekak": 816,
876
- "##ot": 817,
877
- "##bot": 818,
878
- "##ibot": 819,
879
- "##abo": 820,
880
- "##¤l": 821,
881
- "##et": 822,
882
- "##ad": 823,
883
- "##er": 824,
884
- "##ti": 825,
885
- "##ao": 826,
886
- "##op": 827,
887
- "##om": 828,
888
- "##ha": 829,
889
- "##aha": 830,
890
- "##err": 831,
891
- "##ran": 832,
892
- "##sod": 833,
893
- "##ae": 834,
894
- "##sar": 835,
895
- "##im": 836,
896
- "##ahim": 837,
897
- "##ig": 838,
898
- "##ib": 839,
899
- "##da": 840,
900
- "##áº": 841,
901
- "##si": 842,
902
- "##asi": 843,
903
- "##ya": 844,
904
- "##al": 845,
905
- "##mi": 846,
906
- "##ala": 847,
907
- "##ur": 848,
908
- "##iy": 849,
909
- "##os": 850,
910
- "##fi": 851,
911
- "##abag": 852,
912
- "##ch": 853,
913
- "##Ãł": 854,
914
- "##ko": 855,
915
- "##Ãłi": 856,
916
- "##ali": 857,
917
- "##alib": 858,
918
- "##alibo": 859,
919
- "##tan": 860,
920
- "##li": 861,
921
- "##wa": 862,
922
- "##mb": 863,
923
- "##gh": 864,
924
- "##pt": 865,
925
- "##pti": 866,
926
- "Ġna": 867,
927
- "##libo": 868,
928
- "##nay": 869,
929
- "##asod": 870,
930
- "##ero": 871,
931
- "An": 872,
932
- "Ang": 873,
933
- "##pu": 874,
934
- "##himu": 875,
935
- "##dt": 876,
936
- "##dto": 877,
937
- "##oh": 878,
938
- "##¤t": 879,
939
- "##etr": 880,
940
- "##ı": 881,
941
- "##ata": 882,
942
- "##io": 883,
943
- "##»į": 884,
944
- "##es": 885,
945
- "##ila": 886,
946
- "##oga": 887,
947
- "Ġki": 888,
948
- "Ġkin": 889,
949
- "Ġkina": 890,
950
- "##¤Ã": 891,
951
- "##uj": 892,
952
- "##is": 893,
953
- "à¤": 894,
954
- "##nga": 895,
955
- "##ngan": 896,
956
- "##imu": 897,
957
- "##uk": 898,
958
- "##uki": 899,
959
- "##ukir": 900,
960
- "##ukira": 901,
961
- "##ukiran": 902,
962
- "##inab": 903,
963
- "##rr": 904,
964
- "Ġla": 905,
965
- "Ġlal": 906,
966
- "##wig": 907,
967
- "##ehi": 908,
968
- "##tz": 909,
969
- "##han": 910,
970
- "##ren": 911,
971
- "##iren": 912,
972
- "##res": 913,
973
- "##ntz": 914,
974
- "Ġlala": 915,
975
- "##»Ľ": 916,
976
- "##no": 917,
977
- "##©s": 918,
978
- "##mo": 919,
979
- "##omo": 920,
980
- "##lags": 921,
981
- "##kti": 922,
982
- "##akti": 923,
983
- "##libot": 924,
984
- "##alibot": 925,
985
- "##ÙĬ": 926,
986
- "##ts": 927,
987
- "##rts": 928,
988
- "##erts": 929,
989
- "Ġda": 930,
990
- "an": 931,
991
- "##ong": 932,
992
- "##asar": 933,
993
- "##pan": 934,
994
- "##ten": 935,
995
- "##yo": 936,
996
- "##oa": 937,
997
- "##tod": 938,
998
- "##itz": 939,
999
- "##il": 940,
1000
- "##uden": 941,
1001
- "##ebi": 942,
1002
- "##ahimu": 943,
1003
- "##izi": 944,
1004
- "##bizi": 945,
1005
- "##or": 946,
1006
- "Ġsa": 947,
1007
- "##ana": 948,
1008
- "##em": 949,
1009
- "##me": 950,
1010
- "##ka": 951,
1011
- "##aka": 952,
1012
- "##id": 953,
1013
- "##idl": 954,
1014
- "##idla": 955,
1015
- "##lag": 956,
1016
- "Ġka": 957,
1017
- "Ġkas": 958,
1018
- "Ġkasa": 959,
1019
- "Ġkasar": 960,
1020
- "##gw": 961,
1021
- "##do": 962,
1022
- "##ing": 963,
1023
- "##ua": 964,
1024
- "##ŁÄ": 965,
1025
- "##oÃł": 966,
1026
- "##akar": 967,
1027
- "##ume": 968,
1028
- "##hong": 969,
1029
- "##lu": 970,
1030
- "##alu": 971,
1031
- "Ġan": 972,
1032
- "Ġang": 973,
1033
- "##eme": 974,
1034
- "##ug": 975,
1035
- "##uz": 976,
1036
- "##am": 977,
1037
- "##kop": 978,
1038
- "##wi": 979,
1039
- "##än": 980,
1040
- "##arr": 981,
1041
- "##ÄĽ": 982,
1042
- "##gat": 983,
1043
- "##met": 984,
1044
- "##lib": 985,
1045
- "##tu": 986,
1046
- "##gt": 987,
1047
- "##gto": 988,
1048
- "##gtod": 989,
1049
- "##§Ã": 990,
1050
- "##//": 991,
1051
- "##kan": 992,
1052
- "##nd": 993,
1053
- "##and": 994,
1054
- "##dad": 995,
1055
- "##ou": 996,
1056
- "##oun": 997,
1057
- "##Łu": 998,
1058
- "##oÃ": 999,
1059
- "##ghi": 1000,
1060
- "##eek": 1001,
1061
- "°": 1002,
1062
- "##ud": 1003,
1063
- "##ude": 1004,
1064
- "##aa": 1005,
1065
- "##aan": 1006,
1066
- "##tang": 1007,
1067
- "##un": 1008,
1068
- "##nit": 1009,
1069
- "##st": 1010,
1070
- "##sta": 1011,
1071
- "##ol": 1012,
1072
- "##ez": 1013,
1073
- "Ġes": 1014,
1074
- "Ġest": 1015,
1075
- "##ø": 1016,
1076
- "##ku": 1017,
1077
- "##nda": 1018,
1078
- "##za": 1019,
1079
- "##zar": 1020,
1080
- "##np": 1021,
1081
- "##ilo": 1022,
1082
- "##dla": 1023,
1083
- "Ġzi": 1024,
1084
- "Ġzir": 1025,
1085
- "Ġzire": 1026,
1086
- "Ġziren": 1027,
1087
- "##pr": 1028,
1088
- "##npr": 1029,
1089
- "##ac": 1030,
1090
- "##ns": 1031,
1091
- "##dpa": 1032,
1092
- "##adpa": 1033,
1093
- "##sadpa": 1034,
1094
- "##asadpa": 1035,
1095
- "##eta": 1036,
1096
- "##eg": 1037,
1097
- "##ita": 1038,
1098
- "##nita": 1039,
1099
- "##ung": 1040,
1100
- "##¡n": 1041,
1101
- "##¸j": 1042,
1102
- "##ntro": 1043,
1103
- "##son": 1044,
1104
- "##¡nÃ": 1045,
1105
- "##¡nÃŃ": 1046,
1106
- "##Ľn": 1047,
1107
- "##po": 1048,
1108
- "##pon": 1049,
1109
- "##pong": 1050,
1110
- "##bag": 1051,
1111
- "##ho": 1052,
1112
- "##hon": 1053,
1113
- "##uzte": 1054,
1114
- "##alags": 1055,
1115
- "##³w": 1056,
1116
- "##aw": 1057,
1117
- "##mÃ": 1058,
1118
- "##mó": 1059,
1119
- "##ub": 1060,
1120
- "##euden": 1061,
1121
- "##ado": 1062,
1122
- "##iÄ": 1063,
1123
- "##ne": 1064,
1124
- "##nes": 1065,
1125
- "##ora": 1066,
1126
- "##ini": 1067,
1127
- "Ġng": 1068,
1128
- "Ġnga": 1069,
1129
- "##tzi": 1070,
1130
- "##ó": 1071,
1131
- "##kar": 1072,
1132
- "##akan": 1073,
1133
- "##lakan": 1074,
1134
- "##áºŃ": 1075,
1135
- "##sy": 1076,
1136
- "##se": 1077,
1137
- "##eae": 1078,
1138
- "##ceae": 1079,
1139
- "##¡d": 1080,
1140
- "##agat": 1081,
1141
- "Ġdag": 1082,
1142
- "##ea": 1083,
1143
- "##tros": 1084,
1144
- "##adp": 1085,
1145
- "://": 1086,
1146
- "##ÅĻ": 1087,
1147
- "##ve": 1088,
1148
- "##ver": 1089,
1149
- "##nito": 1090,
1150
- "##lak": 1091,
1151
- "##alag": 1092,
1152
- "##alagsa": 1093,
1153
- "##baw": 1094,
1154
- "##ow": 1095,
1155
- "##uzten": 1096,
1156
- "##oka": 1097,
1157
- "##lus": 1098,
1158
- "##ĢĻ": 1099,
1159
- "##abit": 1100,
1160
- "##metr": 1101,
1161
- "##metro": 1102,
1162
- "##ona": 1103,
1163
- "##uba": 1104,
1164
- "##¿t": 1105,
1165
- "Ġgi": 1106,
1166
- "##ki": 1107,
1167
- "Ġta": 1108,
1168
- "##br": 1109,
1169
- "##awi": 1110,
1170
- "##eb": 1111,
1171
- "##ih": 1112,
1172
- "##biy": 1113,
1173
- "##¹´": 1114,
1174
- "##agw": 1115,
1175
- "##bita": 1116,
1176
- "##abita": 1117,
1177
- "habita": 1118,
1178
- "##ert": 1119,
1179
- "##sad": 1120,
1180
- "##asad": 1121,
1181
- "##nÃ": 1122,
1182
- "##ito": 1123,
1183
- "##ss": 1124,
1184
- "##ÅŁ": 1125,
1185
- "##áºŃt": 1126,
1186
- "Ġnah": 1127,
1187
- "Ġnahi": 1128,
1188
- "Ġnahim": 1129,
1189
- "##±ÅŁ": 1130,
1190
- "##ura": 1131,
1191
- "##uran": 1132,
1192
- "##kt": 1133,
1193
- "##tza": 1134,
1194
- "##ld": 1135,
1195
- "Ġkm": 1136,
1196
- "##itu": 1137,
1197
- "##asadp": 1138,
1198
- "##ê": 1139,
1199
- "##ú": 1140,
1200
- "##kir": 1141,
1201
- "ĠAn": 1142,
1202
- "ĠAng": 1143,
1203
- "##mak": 1144,
1204
- "##á": 1145,
1205
- "##ág": 1146,
1206
- "##sadp": 1147,
1207
- "##sadpan": 1148,
1208
- "##enda": 1149,
1209
- "##gi": 1150,
1210
- "##Äĭ": 1151,
1211
- "##co": 1152,
1212
- "Ġph": 1153,
1213
- "##rg": 1154,
1214
- "##kira": 1155,
1215
- "##kiran": 1156,
1216
- "##kang": 1157,
1217
- "##akang": 1158,
1218
- "##lakang": 1159,
1219
- "##dlakang": 1160,
1220
- "##idlakang": 1161,
1221
- "##iÃ": 1162,
1222
- "##ió": 1163,
1223
- "##ako": 1164,
1224
- "##we": 1165,
1225
- "##ysa": 1166,
1226
- "##rop": 1167,
1227
- "##ika": 1168,
1228
- "##az": 1169,
1229
- "##jo": 1170,
1230
- "##zz": 1171,
1231
- "##ula": 1172,
1232
- "##¥d": 1173,
1233
- "##¼u": 1174,
1234
- "##be": 1175,
1235
- "##ħ": 1176,
1236
- "##tso": 1177,
1237
- "##tson": 1178,
1238
- "##iet": 1179,
1239
- "##§h": 1180,
1240
- "##ami": 1181,
1241
- "##§a": 1182,
1242
- "##ee": 1183,
1243
- "##Ã¥d": 1184,
1244
- "##ijl": 1185,
1245
- "##§o": 1186,
1246
- "##nah": 1187,
1247
- "##±m": 1188,
1248
- "##ħa": 1189,
1249
- "##abaw": 1190,
1250
- "##ihan": 1191,
1251
- "##utang": 1192,
1252
- "##mutang": 1193,
1253
- "##AT": 1194,
1254
- "Ġpu": 1195,
1255
- "Ġze": 1196,
1256
- "##us": 1197,
1257
- "Ġni": 1198,
1258
- "##¼i": 1199,
1259
- "##bl": 1200,
1260
- "##bli": 1201,
1261
- "##ikaz": 1202,
1262
- "##ut": 1203,
1263
- "##uen": 1204,
1264
- "##ate": 1205,
1265
- "##¡g": 1206,
1266
- "##rÄ": 1207,
1267
- "##ent": 1208,
1268
- "##©g": 1209,
1269
- "##zj": 1210,
1270
- "##Ńm": 1211,
1271
- "##cia": 1212,
1272
- "##oz": 1213,
1273
- "##à¤": 1214,
1274
- "##rÃ": 1215,
1275
- "##Äĭe": 1216,
1276
- "##¤Ĥ": 1217,
1277
- "##el": 1218,
1278
- "##ĭi": 1219,
1279
- "##ru": 1220,
1280
- "##las": 1221,
1281
- "##ulas": 1222,
1282
- "##iran": 1223,
1283
- "##idlak": 1224,
1284
- "##bagat": 1225,
1285
- "##abagat": 1226,
1286
- "##iyo": 1227,
1287
- "##ye": 1228,
1288
- "##pres": 1229,
1289
- "##npres": 1230,
1290
- "##DE": 1231,
1291
- "##tuzte": 1232,
1292
- "##kai": 1233,
1293
- "##ubi": 1234,
1294
- "##ubig": 1235,
1295
- "##§r": 1236,
1296
- "##akt": 1237,
1297
- "##dlak": 1238,
1298
- "##gk": 1239,
1299
- "##ll": 1240,
1300
- "##ż": 1241,
1301
- "##ħh": 1242,
1302
- "##akop": 1243,
1303
- "##big": 1244,
1304
- "##iez": 1245,
1305
- "##³l": 1246,
1306
- "##ahab": 1247,
1307
- "##ahabo": 1248,
1308
- "##ahabog": 1249,
1309
- "##naha": 1250,
1310
- "Ġkw": 1251,
1311
- "Ġkwa": 1252,
1312
- "##Å«": 1253,
1313
- "##é": 1254,
1314
- "##¡rÃ": 1255,
1315
- "##árÃ": 1256,
1316
- "##idlaka": 1257,
1317
- "##idlakan": 1258,
1318
- "Ġpa": 1259,
1319
- "##habo": 1260,
1320
- "##yon": 1261,
1321
- "##go": 1262,
1322
- "##ngo": 1263,
1323
- "##ghim": 1264,
1324
- "##aghim": 1265,
1325
- "##Äģ": 1266,
1326
- "##biz": 1267,
1327
- "##ira": 1268,
1328
- "##oÄ": 1269,
1329
- "##º¿t": 1270,
1330
- "Ġkinai": 1271,
1331
- "##inabu": 1272,
1332
- "##nty": 1273,
1333
- "##pre": 1274,
1334
- "##¡r": 1275,
1335
- "##ár": 1276,
1336
- "##¤k": 1277,
1337
- "##äk": 1278,
1338
- "##laka": 1279,
1339
- "##ler": 1280,
1340
- "##lerr": 1281,
1341
- "##lerri": 1282,
1342
- "##·¯": 1283,
1343
- "##bog": 1284,
1344
- "##inau": 1285,
1345
- "##»ĵ": 1286,
1346
- "##hp": 1287,
1347
- "##hÃ": 1288,
1348
- "Ġre": 1289,
1349
- "##igan": 1290,
1350
- "##kaz": 1291,
1351
- "##gsa": 1292,
1352
- "##zon": 1293,
1353
- "##aso": 1294,
1354
- "##gwa": 1295,
1355
- "##Ńj": 1296,
1356
- "Ġul": 1297,
1357
- "##syo": 1298,
1358
- "##agsa": 1299,
1359
- "##tor": 1300,
1360
- "##gtor": 1301,
1361
- "##ĦØ": 1302,
1362
- "##ÙĦØ": 1303,
1363
- "Âł": 1304,
1364
- "##hiy": 1305,
1365
- "##ima": 1306,
1366
- "##³n": 1307,
1367
- "##dp": 1308,
1368
- "##Łi": 1309,
1369
- "##RE": 1310,
1370
- "##rang": 1311,
1371
- "##á»": 1312,
1372
- "##eu": 1313,
1373
- "##eud": 1314,
1374
- "##eude": 1315,
1375
- "##lagsa": 1316,
1376
- "##tuz": 1317,
1377
- "##tuzt": 1318,
1378
- "##tuzten": 1319,
1379
- "##¼j": 1320,
1380
- "##¼jo": 1321,
1381
- "##zo": 1322,
1382
- "##jon": 1323,
1383
- "##lig": 1324,
1384
- "##azz": 1325,
1385
- "##bu": 1326,
1386
- "##abu": 1327,
1387
- "##nÄ": 1328
1388
- }
1389
- }
1390
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_256000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_256000/vocab.json DELETED
@@ -1 +0,0 @@
1
- {"<|padding|>": 0, "<|endoftext|>": 1, "\u0120\u00f5": 2, "\u00f5": 3, "##\u00f5": 4, "\u0120\u00f6": 5, "\u00f6": 6, "##\u00f6": 7, "\u0120\u00aa": 8, "\u00aa": 9, "##\u00aa": 10, "\u0120\u013d": 11, "\u013d": 12, "##\u013d": 13, "\u0120\u0110": 14, "\u0110": 15, "##\u0110": 16, "`": 17, "##`": 18, ".": 19, "##.": 20, "\u0120O": 21, "O": 22, "##O": 23, "\u00ae": 24, "##\u00ae": 25, "\u00bd": 26, "##\u00bd": 27, "\u0120F": 28, "F": 29, "##F": 30, "\u0120\u00e1": 31, "\u00e1": 32, "##\u00e1": 33, "\u0120\u00f8": 34, "\u00f8": 35, "##\u00f8": 36, "\u0120T": 37, "T": 38, "##T": 39, "\"": 40, "##\"": 41, "9": 42, "##9": 43, "\u00a5": 44, "##\u00a5": 45, "\u00b8": 46, "##\u00b8": 47, "_": 48, "##_": 49, "\u0120B": 50, "B": 51, "##B": 52, "\u0120\u013a": 53, "\u013a": 54, "##\u013a": 55, "\u0120\u00c8": 56, "\u00c8": 57, "##\u00c8": 58, "\u0120\u00c4": 59, "\u00c4": 60, "##\u00c4": 61, "\u0120\u0137": 62, "\u0137": 63, "##\u0137": 64, "\u00a4": 65, "##\u00a4": 66, "\u0120o": 67, "o": 68, "##o": 69, "\u0120\u012e": 70, "\u012e": 71, "##\u012e": 72, "\u0120\u00f0": 73, "\u00f0": 74, "##\u00f0": 75, "\\": 76, "##\\": 77, "\u0120\u013f": 78, "\u013f": 79, "##\u013f": 80, "^": 81, "##^": 82, "\u0120\u010a": 83, "\u010a": 84, "##\u010a": 85, "\u0120j": 86, "j": 87, "##j": 88, "\u0120n": 89, "n": 90, "##n": 91, "\u0120": 92, "##\u0120": 93, "\u0120\u00fe": 94, "\u00fe": 95, "##\u00fe": 96, "\u0120D": 97, "D": 98, "##D": 99, "\u0120\u0114": 100, "\u0114": 101, "##\u0114": 102, "\u0120L": 103, "L": 104, "##L": 105, "\u0120\u00e5": 106, "\u00e5": 107, "##\u00e5": 108, "\u0120J": 109, "J": 110, "##J": 111, "\u0120e": 112, "e": 113, "##e": 114, "\u0120u": 115, "u": 116, "##u": 117, "\u0120\u0104": 118, "\u0104": 119, "##\u0104": 120, "\u0120\u0131": 121, "\u0131": 122, "##\u0131": 123, "\u0120\u0134": 124, "\u0134": 125, "##\u0134": 126, "*": 127, "##*": 128, "5": 129, "##5": 130, "\u0120\u0116": 131, "\u0116": 132, "##\u0116": 133, "\u0120\u010e": 134, "\u010e": 135, "##\u010e": 136, "\u0120\u0122": 137, "\u0122": 138, "##\u0122": 139, "\u0120Y": 140, "Y": 141, "##Y": 142, "\u0120\u00dc": 143, "\u00dc": 144, "##\u00dc": 145, ";": 146, "##;": 147, "\u00bf": 148, "##\u00bf": 149, "~": 150, "##~": 151, "2": 152, "##2": 153, "\u00b4": 154, "##\u00b4": 155, "\u00a6": 156, "##\u00a6": 157, "\u0120\u00c0": 158, "\u00c0": 159, "##\u00c0": 160, "\u0120\u00e9": 161, "\u00e9": 162, "##\u00e9": 163, "\u0120\u0127": 164, "\u0127": 165, "##\u0127": 166, "\u00a2": 167, "##\u00a2": 168, "\u0120\u011a": 169, "\u011a": 170, "##\u011a": 171, "\u0120\u00e3": 172, "\u00e3": 173, "##\u00e3": 174, "\u0120\u0119": 175, "\u0119": 176, "##\u0119": 177, "(": 178, "##(": 179, "\u00ac": 180, "##\u00ac": 181, "\u00ab": 182, "##\u00ab": 183, "6": 184, "##6": 185, "\u0120E": 186, "E": 187, "##E": 188, "\u00a3": 189, "##\u00a3": 190, "\u0120g": 191, "g": 192, "##g": 193, "\u00b9": 194, "##\u00b9": 195, "\u0120\u0105": 196, "\u0105": 197, "##\u0105": 198, "8": 199, "##8": 200, "-": 201, "##-": 202, "\u00a8": 203, "##\u00a8": 204, "&": 205, "##&": 206, "\u0120\u012b": 207, "\u012b": 208, "##\u012b": 209, "\u0120\u0103": 210, "\u0103": 211, "##\u0103": 212, "\u0120\u00ca": 213, "\u00ca": 214, "##\u00ca": 215, "\u0120Q": 216, "Q": 217, "##Q": 218, "\u0120\u0100": 219, "\u0100": 220, "##\u0100": 221, "\u0120\u0132": 222, "\u0132": 223, "##\u0132": 224, "\u0120\u00c6": 225, "\u00c6": 226, "##\u00c6": 227, "\u0120V": 228, "V": 229, "##V": 230, "\u0120\u00c9": 231, "\u00c9": 232, "##\u00c9": 233, "\u00a7": 234, "##\u00a7": 235, "\u0120\u011d": 236, "\u011d": 237, "##\u011d": 238, "\u0120\u0143": 239, "\u0143": 240, "##\u0143": 241, "\u0120\u00da": 242, "\u00da": 243, "##\u00da": 244, "\u0120\u0135": 245, "\u0135": 246, "##\u0135": 247, "\u0120\u00b5": 248, "\u00b5": 249, "##\u00b5": 250, "\u00b6": 251, "##\u00b6": 252, "\u0120P": 253, "P": 254, "##P": 255, "\u0120\u0133": 256, "\u0133": 257, "##\u0133": 258, "\u00b2": 259, "##\u00b2": 260, "\u0120\u00ce": 261, "\u00ce": 262, "##\u00ce": 263, "\u0120\u00e2": 264, "\u00e2": 265, "##\u00e2": 266, "\u0120S": 267, "S": 268, "##S": 269, "\u0120\u00c7": 270, "\u00c7": 271, "##\u00c7": 272, "\u0120\u010c": 273, "\u010c": 274, "##\u010c": 275, "\u0120I": 276, "I": 277, "##I": 278, "\u0120H": 279, "H": 280, "##H": 281, "|": 282, "##|": 283, "\u0120\u00dd": 284, "\u00dd": 285, "##\u00dd": 286, "\u0120\u0117": 287, "\u0117": 288, "##\u0117": 289, "\u00d7": 290, "##\u00d7": 291, "\u0120x": 292, "x": 293, "##x": 294, "\u0120\u0101": 295, "\u0101": 296, "##\u0101": 297, "\u0120\u0136": 298, "\u0136": 299, "##\u0136": 300, "\u0120\u011e": 301, "\u011e": 302, "##\u011e": 303, "'": 304, "##'": 305, "\u0120\u0129": 306, "\u0129": 307, "##\u0129": 308, "\u0120\u0125": 309, "\u0125": 310, "##\u0125": 311, "\u0120q": 312, "q": 313, "##q": 314, "@": 315, "##@": 316, "\u0120p": 317, "p": 318, "##p": 319, "\u0120w": 320, "w": 321, "##w": 322, "!": 323, "##!": 324, "#": 325, "###": 326, "\u0120f": 327, "f": 328, "##f": 329, "\u00be": 330, "##\u00be": 331, "\u0120\u00d2": 332, "\u00d2": 333, "##\u00d2": 334, "\u0120\u00d9": 335, "\u00d9": 336, "##\u00d9": 337, "\u0120\u00e7": 338, "\u00e7": 339, "##\u00e7": 340, ">": 341, "##>": 342, "\u0120\u00c1": 343, "\u00c1": 344, "##\u00c1": 345, "\u0120\u00ed": 346, "\u00ed": 347, "##\u00ed": 348, "0": 349, "##0": 350, ")": 351, "##)": 352, "\u0120\u00fa": 353, "\u00fa": 354, "##\u00fa": 355, "]": 356, "##]": 357, "\u0120\u00f1": 358, "\u00f1": 359, "##\u00f1": 360, "\u0120\u012d": 361, "\u012d": 362, "##\u012d": 363, "\u0120\u0139": 364, "\u0139": 365, "##\u0139": 366, "\u0120\u0108": 367, "\u0108": 368, "##\u0108": 369, "\u0120\u00d6": 370, "\u00d6": 371, "##\u00d6": 372, "\u0120\u00f4": 373, "\u00f4": 374, "##\u00f4": 375, "\u0120\u0118": 376, "\u0118": 377, "##\u0118": 378, "\u0120\u0128": 379, "\u0128": 380, "##\u0128": 381, "\u0120K": 382, "K": 383, "##K": 384, "\u0120h": 385, "h": 386, "##h": 387, "\u0120i": 388, "i": 389, "##i": 390, "\u0120\u00ba": 391, "\u00ba": 392, "##\u00ba": 393, "\u0120\u0113": 394, "\u0113": 395, "##\u0113": 396, "\u0120\u00e6": 397, "\u00e6": 398, "##\u00e6": 399, "\u0120t": 400, "t": 401, "##t": 402, "\u0120\u00ee": 403, "\u00ee": 404, "##\u00ee": 405, "1": 406, "##1": 407, "\u0120y": 408, "y": 409, "##y": 410, "\u0120\u00cd": 411, "\u00cd": 412, "##\u00cd": 413, "\u0120\u00c2": 414, "\u00c2": 415, "##\u00c2": 416, "\u0120\u010b": 417, "\u010b": 418, "##\u010b": 419, "\u0120\u00d8": 420, "\u00d8": 421, "##\u00d8": 422, "\u0120\u00cc": 423, "\u00cc": 424, "##\u00cc": 425, "\u0120\u013b": 426, "\u013b": 427, "##\u013b": 428, ":": 429, "##:": 430, "\u0120M": 431, "M": 432, "##M": 433, "\u0120a": 434, "a": 435, "##a": 436, "\u0120G": 437, "G": 438, "##G": 439, "\u00f7": 440, "##\u00f7": 441, "\u0120\u013e": 442, "\u013e": 443, "##\u013e": 444, "\u0120k": 445, "k": 446, "##k": 447, "\u0120W": 448, "W": 449, "##W": 450, "\u0120\u00db": 451, "\u00db": 452, "##\u00db": 453, "\u0120A": 454, "A": 455, "##A": 456, "\u0120\u0130": 457, "\u0130": 458, "##\u0130": 459, "\u0120\u00de": 460, "\u00de": 461, "##\u00de": 462, "\u0120\u00e0": 463, "\u00e0": 464, "##\u00e0": 465, "\u00b7": 466, "##\u00b7": 467, "\u0120\u00ff": 468, "\u00ff": 469, "##\u00ff": 470, "[": 471, "##[": 472, "=": 473, "##=": 474, "\u0120\u00d1": 475, "\u00d1": 476, "##\u00d1": 477, "\u0120\u010d": 478, "\u010d": 479, "##\u010d": 480, "\u0120X": 481, "X": 482, "##X": 483, "\u0120\u00cb": 484, "\u00cb": 485, "##\u00cb": 486, "\u0120Z": 487, "Z": 488, "##Z": 489, "\u0120\u0102": 490, "\u0102": 491, "##\u0102": 492, "\u0120\u012c": 493, "\u012c": 494, "##\u012c": 495, "\u0120\u010f": 496, "\u010f": 497, "##\u010f": 498, "\u0120\u00fd": 499, "\u00fd": 500, "##\u00fd": 501, "+": 502, "##+": 503, "\u0120U": 504, "U": 505, "##U": 506, "\u0120z": 507, "z": 508, "##z": 509, "\u0120\u00fc": 510, "\u00fc": 511, "##\u00fc": 512, "\u0120\u0123": 513, "\u0123": 514, "##\u0123": 515, "\u00bb": 516, "##\u00bb": 517, "\u0120\u00d0": 518, "\u00d0": 519, "##\u00d0": 520, "\u0120\u0121": 521, "\u0121": 522, "##\u0121": 523, "\u0120\u00eb": 524, "\u00eb": 525, "##\u00eb": 526, "\u0120N": 527, "N": 528, "##N": 529, "\u0120R": 530, "R": 531, "##R": 532, "\u0120\u0111": 533, "\u0111": 534, "##\u0111": 535, "\u0120\u00ea": 536, "\u00ea": 537, "##\u00ea": 538, "\u0120\u00f2": 539, "\u00f2": 540, "##\u00f2": 541, "\u0120\u00fb": 542, "\u00fb": 543, "##\u00fb": 544, "{": 545, "##{": 546, "\u0120m": 547, "m": 548, "##m": 549, "\u00a9": 550, "##\u00a9": 551, "\u0120\u0138": 552, "\u0138": 553, "##\u0138": 554, "\u0120\u0124": 555, "\u0124": 556, "##\u0124": 557, "\u0120\u00e4": 558, "\u00e4": 559, "##\u00e4": 560, "\u0120\u00d3": 561, "\u00d3": 562, "##\u00d3": 563, "\u0120\u00f9": 564, "\u00f9": 565, "##\u00f9": 566, "$": 567, "##$": 568, "\u0120\u011b": 569, "\u011b": 570, "##\u011b": 571, "\u00af": 572, "##\u00af": 573, "\u0120s": 574, "s": 575, "##s": 576, "\u0120\u00c5": 577, "\u00c5": 578, "##\u00c5": 579, "\u0120v": 580, "v": 581, "##v": 582, "\u00b0": 583, "##\u00b0": 584, ",": 585, "##,": 586, "\u0120\u00f3": 587, "\u00f3": 588, "##\u00f3": 589, "\u0120\u0140": 590, "\u0140": 591, "##\u0140": 592, "\u0120\u0107": 593, "\u0107": 594, "##\u0107": 595, "\u0120\u0142": 596, "\u0142": 597, "##\u0142": 598, "\u0120\u00df": 599, "\u00df": 600, "##\u00df": 601, "4": 602, "##4": 603, "\u0120\u00d4": 604, "\u00d4": 605, "##\u00d4": 606, "\u0120\u012f": 607, "\u012f": 608, "##\u012f": 609, "\u0120\u011c": 610, "\u011c": 611, "##\u011c": 612, "%": 613, "##%": 614, "\u0120\u00cf": 615, "\u00cf": 616, "##\u00cf": 617, "}": 618, "##}": 619, "\u0120\u00ec": 620, "\u00ec": 621, "##\u00ec": 622, "\u0120\u013c": 623, "\u013c": 624, "##\u013c": 625, "\u0120\u0112": 626, "\u0112": 627, "##\u0112": 628, "\u00a1": 629, "##\u00a1": 630, "\u0120\u00c3": 631, "\u00c3": 632, "##\u00c3": 633, "\u0120\u00d5": 634, "\u00d5": 635, "##\u00d5": 636, "\u0120\u0115": 637, "\u0115": 638, "##\u0115": 639, "\u0120\u0126": 640, "\u0126": 641, "##\u0126": 642, "\u0120\u012a": 643, "\u012a": 644, "##\u012a": 645, "\u0120r": 646, "r": 647, "##r": 648, "\u0120\u0106": 649, "\u0106": 650, "##\u0106": 651, "\u0120\u011f": 652, "\u011f": 653, "##\u011f": 654, "\u0120c": 655, "c": 656, "##c": 657, "\u0120C": 658, "C": 659, "##C": 660, "<": 661, "##<": 662, "\u00bc": 663, "##\u00bc": 664, "\u0120\u0109": 665, "\u0109": 666, "##\u0109": 667, "\u0120l": 668, "l": 669, "##l": 670, "3": 671, "##3": 672, "?": 673, "##?": 674, "\u00b1": 675, "##\u00b1": 676, "\u0120\u00ef": 677, "\u00ef": 678, "##\u00ef": 679, "\u00b3": 680, "##\u00b3": 681, "\u0120b": 682, "b": 683, "##b": 684, "\u0120d": 685, "d": 686, "##d": 687, "\u0120\u00e8": 688, "\u00e8": 689, "##\u00e8": 690, "\u0120\u0141": 691, "\u0141": 692, "##\u0141": 693, "7": 694, "##7": 695, "/": 696, "##/": 697, "<|unk|>": 698, "##\u00c3\u00a4": 699, "##\u00c3\u00a4\u00c3": 700, "##it": 701, "##an": 702, "##as": 703, "##asa": 704, "##ge": 705, "##de": 706, "##bi": 707, "##ga": 708, "##ta": 709, "##ang": 710, "##pa": 711, "##le": 712, "##ng": 713, "##na": 714, "##ina": 715, ":/": 716, "##en": 717, "##ra": 718, "##den": 719, "##ro": 720, "##on": 721, "##bo": 722, "##ibo": 723, "##ci": 724, "##ul": 725, "##\u00ba\u0143": 726, "##mu": 727, "##in": 728, "##ma": 729, "##ah": 730, "##ar": 731, "##ai": 732, "##nai": 733, "##aci": 734, "##sa": 735, "##inai": 736, "##ba": 737, "##uo": 738, "##bab": 739, "##ir": 740, "##ire": 741, "##baba": 742, "##\u00b6\u00c3": 743, "##iz": 744, "##ni": 745, "##ri": 746, "##rri": 747, "##erri": 748, "##gan": 749, "##nt": 750, "##te": 751, "##ab": 752, "##gs": 753, "##ags": 754, "##zt": 755, "##zte": 756, "##zten": 757, "##la": 758, "##aba": 759, "##tr": 760, "##tro": 761, "##eh": 762, "##ahi": 763, "##ahin": 764, "##ep": 765, "##ion": 766, "##00": 767, "##pi": 768, "##re": 769, "##\u00c3\u00b6": 770, "##ue": 771, "##ak": 772, "##..": 773, "##ia": 774, "##ssa": 775, "##at": 776, "##ob": 777, "##ag": 778, "##aga": 779, "##so": 780, "##dl": 781, "##rt": 782, "##tt": 783, "##\u00bcl": 784, "##\u0143t": 785, "##\u00ba\u0143t": 786, "##od": 787, "##um": 788, "##gang": 789, "##ngang": 790, "##lo": 791, "##bre": 792, "##tj": 793, "##abi": 794, "##ik": 795, "##ros": 796, "##bit": 797, "##ie": 798, "##ien": 799, "##ap": 800, "##og": 801, "##\u00a1l": 802, "##ek": 803, "##eka": 804, "##etro": 805, "##ay": 806, "##baga": 807, "##abaga": 808, "##au": 809, "##hi": 810, "##him": 811, "##hin": 812, "##zi": 813, "##\u00c3\u0143": 814, "##to": 815, "##ekak": 816, "##ot": 817, "##bot": 818, "##ibot": 819, "##abo": 820, "##\u00a4l": 821, "##et": 822, "##ad": 823, "##er": 824, "##ti": 825, "##ao": 826, "##op": 827, "##om": 828, "##ha": 829, "##aha": 830, "##err": 831, "##ran": 832, "##sod": 833, "##ae": 834, "##sar": 835, "##im": 836, "##ahim": 837, "##ig": 838, "##ib": 839, "##da": 840, "##\u00e1\u00ba": 841, "##si": 842, "##asi": 843, "##ya": 844, "##al": 845, "##mi": 846, "##ala": 847, "##ur": 848, "##iy": 849, "##os": 850, "##fi": 851, "##abag": 852, "##ch": 853, "##\u00c3\u0142": 854, "##ko": 855, "##\u00c3\u0142i": 856, "##ali": 857, "##alib": 858, "##alibo": 859, "##tan": 860, "##li": 861, "##wa": 862, "##mb": 863, "##gh": 864, "##pt": 865, "##pti": 866, "\u0120na": 867, "##libo": 868, "##nay": 869, "##asod": 870, "##ero": 871, "An": 872, "Ang": 873, "##pu": 874, "##himu": 875, "##dt": 876, "##dto": 877, "##oh": 878, "##\u00a4t": 879, "##etr": 880, "##\u00c4\u00b1": 881, "##ata": 882, "##io": 883, "##\u00bb\u012f": 884, "##es": 885, "##ila": 886, "##oga": 887, "\u0120ki": 888, "\u0120kin": 889, "\u0120kina": 890, "##\u00a4\u00c3": 891, "##uj": 892, "##is": 893, "\u00e0\u00a4": 894, "##nga": 895, "##ngan": 896, "##imu": 897, "##uk": 898, "##uki": 899, "##ukir": 900, "##ukira": 901, "##ukiran": 902, "##inab": 903, "##rr": 904, "\u0120la": 905, "\u0120lal": 906, "##wig": 907, "##ehi": 908, "##tz": 909, "##han": 910, "##ren": 911, "##iren": 912, "##res": 913, "##ntz": 914, "\u0120lala": 915, "##\u00bb\u013d": 916, "##no": 917, "##\u00a9s": 918, "##mo": 919, "##omo": 920, "##lags": 921, "##kti": 922, "##akti": 923, "##libot": 924, "##alibot": 925, "##\u00d9\u012c": 926, "##ts": 927, "##rts": 928, "##erts": 929, "\u0120da": 930, "an": 931, "##ong": 932, "##asar": 933, "##pan": 934, "##ten": 935, "##yo": 936, "##oa": 937, "##tod": 938, "##itz": 939, "##il": 940, "##uden": 941, "##ebi": 942, "##ahimu": 943, "##izi": 944, "##bizi": 945, "##or": 946, "\u0120sa": 947, "##ana": 948, "##em": 949, "##me": 950, "##ka": 951, "##aka": 952, "##id": 953, "##idl": 954, "##idla": 955, "##lag": 956, "\u0120ka": 957, "\u0120kas": 958, "\u0120kasa": 959, "\u0120kasar": 960, "##gw": 961, "##do": 962, "##ing": 963, "##ua": 964, "##\u0141\u00c4": 965, "##o\u00c3\u0142": 966, "##akar": 967, "##ume": 968, "##hong": 969, "##lu": 970, "##alu": 971, "\u0120an": 972, "\u0120ang": 973, "##eme": 974, "##ug": 975, "##uz": 976, "##am": 977, "##kop": 978, "##wi": 979, "##\u00c3\u00a4n": 980, "##arr": 981, "##\u00c4\u013d": 982, "##gat": 983, "##met": 984, "##lib": 985, "##tu": 986, "##gt": 987, "##gto": 988, "##gtod": 989, "##\u00a7\u00c3": 990, "##//": 991, "##kan": 992, "##nd": 993, "##and": 994, "##dad": 995, "##ou": 996, "##oun": 997, "##\u0141u": 998, "##o\u00c3": 999, "##ghi": 1000, "##eek": 1001, "\u00c2\u00b0": 1002, "##ud": 1003, "##ude": 1004, "##aa": 1005, "##aan": 1006, "##tang": 1007, "##un": 1008, "##nit": 1009, "##st": 1010, "##sta": 1011, "##ol": 1012, "##ez": 1013, "\u0120es": 1014, "\u0120est": 1015, "##\u00c3\u00b8": 1016, "##ku": 1017, "##nda": 1018, "##za": 1019, "##zar": 1020, "##np": 1021, "##ilo": 1022, "##dla": 1023, "\u0120zi": 1024, "\u0120zir": 1025, "\u0120zire": 1026, "\u0120ziren": 1027, "##pr": 1028, "##npr": 1029, "##ac": 1030, "##ns": 1031, "##dpa": 1032, "##adpa": 1033, "##sadpa": 1034, "##asadpa": 1035, "##eta": 1036, "##eg": 1037, "##ita": 1038, "##nita": 1039, "##ung": 1040, "##\u00a1n": 1041, "##\u00b8j": 1042, "##ntro": 1043, "##son": 1044, "##\u00a1n\u00c3": 1045, "##\u00a1n\u00c3\u0143": 1046, "##\u013dn": 1047, "##po": 1048, "##pon": 1049, "##pong": 1050, "##bag": 1051, "##ho": 1052, "##hon": 1053, "##uzte": 1054, "##alags": 1055, "##\u00b3w": 1056, "##aw": 1057, "##m\u00c3": 1058, "##m\u00c3\u00b3": 1059, "##ub": 1060, "##euden": 1061, "##ado": 1062, "##i\u00c4": 1063, "##ne": 1064, "##nes": 1065, "##ora": 1066, "##ini": 1067, "\u0120ng": 1068, "\u0120nga": 1069, "##tzi": 1070, "##\u00c3\u00b3": 1071, "##kar": 1072, "##akan": 1073, "##lakan": 1074, "##\u00e1\u00ba\u0143": 1075, "##sy": 1076, "##se": 1077, "##eae": 1078, "##ceae": 1079, "##\u00a1d": 1080, "##agat": 1081, "\u0120dag": 1082, "##ea": 1083, "##tros": 1084, "##adp": 1085, "://": 1086, "##\u00c5\u013b": 1087, "##ve": 1088, "##ver": 1089, "##nito": 1090, "##lak": 1091, "##alag": 1092, "##alagsa": 1093, "##baw": 1094, "##ow": 1095, "##uzten": 1096, "##oka": 1097, "##lus": 1098, "##\u0122\u013b": 1099, "##abit": 1100, "##metr": 1101, "##metro": 1102, "##ona": 1103, "##uba": 1104, "##\u00bft": 1105, "\u0120gi": 1106, "##ki": 1107, "\u0120ta": 1108, "##br": 1109, "##awi": 1110, "##eb": 1111, "##ih": 1112, "##biy": 1113, "##\u00b9\u00b4": 1114, "##agw": 1115, "##bita": 1116, "##abita": 1117, "habita": 1118, "##ert": 1119, "##sad": 1120, "##asad": 1121, "##n\u00c3": 1122, "##ito": 1123, "##ss": 1124, "##\u00c5\u0141": 1125, "##\u00e1\u00ba\u0143t": 1126, "\u0120nah": 1127, "\u0120nahi": 1128, "\u0120nahim": 1129, "##\u00b1\u00c5\u0141": 1130, "##ura": 1131, "##uran": 1132, "##kt": 1133, "##tza": 1134, "##ld": 1135, "\u0120km": 1136, "##itu": 1137, "##asadp": 1138, "##\u00c3\u00aa": 1139, "##\u00c3\u00ba": 1140, "##kir": 1141, "\u0120An": 1142, "\u0120Ang": 1143, "##mak": 1144, "##\u00c3\u00a1": 1145, "##\u00c3\u00a1g": 1146, "##sadp": 1147, "##sadpan": 1148, "##enda": 1149, "##gi": 1150, "##\u00c4\u012d": 1151, "##co": 1152, "\u0120ph": 1153, "##rg": 1154, "##kira": 1155, "##kiran": 1156, "##kang": 1157, "##akang": 1158, "##lakang": 1159, "##dlakang": 1160, "##idlakang": 1161, "##i\u00c3": 1162, "##i\u00c3\u00b3": 1163, "##ako": 1164, "##we": 1165, "##ysa": 1166, "##rop": 1167, "##ika": 1168, "##az": 1169, "##jo": 1170, "##zz": 1171, "##ula": 1172, "##\u00a5d": 1173, "##\u00bcu": 1174, "##be": 1175, "##\u00c4\u00a7": 1176, "##tso": 1177, "##tson": 1178, "##iet": 1179, "##\u00a7h": 1180, "##ami": 1181, "##\u00a7a": 1182, "##ee": 1183, "##\u00c3\u00a5d": 1184, "##\u0133l": 1185, "##\u00a7o": 1186, "##nah": 1187, "##\u00b1m": 1188, "##\u00c4\u00a7a": 1189, "##abaw": 1190, "##ihan": 1191, "##utang": 1192, "##mutang": 1193, "##AT": 1194, "\u0120pu": 1195, "\u0120ze": 1196, "##us": 1197, "\u0120ni": 1198, "##\u00bci": 1199, "##bl": 1200, "##bli": 1201, "##ikaz": 1202, "##ut": 1203, "##uen": 1204, "##ate": 1205, "##\u00a1g": 1206, "##r\u00c4": 1207, "##ent": 1208, "##\u00a9g": 1209, "##zj": 1210, "##\u0143m": 1211, "##cia": 1212, "##oz": 1213, "##\u00e0\u00a4": 1214, "##r\u00c3": 1215, "##\u00c4\u012de": 1216, "##\u00a4\u0124": 1217, "##el": 1218, "##\u012di": 1219, "##ru": 1220, "##las": 1221, "##ulas": 1222, "##iran": 1223, "##idlak": 1224, "##bagat": 1225, "##abagat": 1226, "##iyo": 1227, "##ye": 1228, "##pres": 1229, "##npres": 1230, "##DE": 1231, "##tuzte": 1232, "##kai": 1233, "##ubi": 1234, "##ubig": 1235, "##\u00a7r": 1236, "##akt": 1237, "##dlak": 1238, "##gk": 1239, "##ll": 1240, "##\u00c5\u00bc": 1241, "##\u00c4\u00a7h": 1242, "##akop": 1243, "##big": 1244, "##iez": 1245, "##\u00b3l": 1246, "##ahab": 1247, "##ahabo": 1248, "##ahabog": 1249, "##naha": 1250, "\u0120kw": 1251, "\u0120kwa": 1252, "##\u00c5\u00ab": 1253, "##\u00c3\u00a9": 1254, "##\u00a1r\u00c3": 1255, "##\u00c3\u00a1r\u00c3": 1256, "##idlaka": 1257, "##idlakan": 1258, "\u0120pa": 1259, "##habo": 1260, "##yon": 1261, "##go": 1262, "##ngo": 1263, "##ghim": 1264, "##aghim": 1265, "##\u00c4\u0123": 1266, "##biz": 1267, "##ira": 1268, "##o\u00c4": 1269, "##\u00ba\u00bft": 1270, "\u0120kinai": 1271, "##inabu": 1272, "##nty": 1273, "##pre": 1274, "##\u00a1r": 1275, "##\u00c3\u00a1r": 1276, "##\u00a4k": 1277, "##\u00c3\u00a4k": 1278, "##laka": 1279, "##ler": 1280, "##lerr": 1281, "##lerri": 1282, "##\u00b7\u00af": 1283, "##bog": 1284, "##inau": 1285, "##\u00bb\u0135": 1286, "##hp": 1287, "##h\u00c3": 1288, "\u0120re": 1289, "##igan": 1290, "##kaz": 1291, "##gsa": 1292, "##zon": 1293, "##aso": 1294, "##gwa": 1295, "##\u0143j": 1296, "\u0120ul": 1297, "##syo": 1298, "##agsa": 1299, "##tor": 1300, "##gtor": 1301, "##\u0126\u00d8": 1302, "##\u00d9\u0126\u00d8": 1303, "\u00c2\u0142": 1304, "##hiy": 1305, "##ima": 1306, "##\u00b3n": 1307, "##dp": 1308, "##\u0141i": 1309, "##RE": 1310, "##rang": 1311, "##\u00e1\u00bb": 1312, "##eu": 1313, "##eud": 1314, "##eude": 1315, "##lagsa": 1316, "##tuz": 1317, "##tuzt": 1318, "##tuzten": 1319, "##\u00bcj": 1320, "##\u00bcjo": 1321, "##zo": 1322, "##jon": 1323, "##lig": 1324, "##azz": 1325, "##bu": 1326, "##abu": 1327, "##n\u00c4": 1328}
 
 
fw57Mmulti_Surprisal_threshold_32000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_32000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_32000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57Mmulti_Surprisal_threshold_32000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "698": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57Mmulti_Surprisal_threshold_32000/vocab.json DELETED
The diff for this file is too large to render. See raw diff