Zeb commited on
Commit
64b3309
·
1 Parent(s): 38928ba

Remove normalizers

Browse files
.DS_Store DELETED
Binary file (12.3 kB)
 
frequencymulti_128000/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
frequencymulti_16000/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
frequencymulti_256000/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed37c183a6fdf658a259f23b64ca57aab65a1b96dbcb8343b06e108fe6fbec55
3
- size 20274248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1ac462fae29c81b13d1ee24e1eedfe812d9c0d374ff3c61ba636cebaf52fd7
3
+ size 20274158
frequencymulti_32000/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
frequencymulti_64000/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
frequencymulti_8064/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
fw57Mmulti_Entropy_thresholdB_16000/tokenizer.json CHANGED
@@ -31,14 +31,7 @@
31
  "special": true
32
  }
33
  ],
34
- "normalizer": {
35
- "type": "Sequence",
36
- "normalizers": [
37
- {
38
- "type": "NFD"
39
- }
40
- ]
41
- },
42
  "pre_tokenizer": {
43
  "type": "ByteLevel",
44
  "add_prefix_space": true,
 
31
  "special": true
32
  }
33
  ],
34
+ "normalizer": null,
 
 
 
 
 
 
 
35
  "pre_tokenizer": {
36
  "type": "ByteLevel",
37
  "add_prefix_space": true,
fw57Mmulti_Entropy_thresholdB_32000/tokenizer.json CHANGED
@@ -31,14 +31,7 @@
31
  "special": true
32
  }
33
  ],
34
- "normalizer": {
35
- "type": "Sequence",
36
- "normalizers": [
37
- {
38
- "type": "NFD"
39
- }
40
- ]
41
- },
42
  "pre_tokenizer": {
43
  "type": "ByteLevel",
44
  "add_prefix_space": true,
 
31
  "special": true
32
  }
33
  ],
34
+ "normalizer": null,
 
 
 
 
 
 
 
35
  "pre_tokenizer": {
36
  "type": "ByteLevel",
37
  "add_prefix_space": true,
fw57Mmulti_Entropy_thresholdB_64000/tokenizer.json CHANGED
@@ -31,14 +31,7 @@
31
  "special": true
32
  }
33
  ],
34
- "normalizer": {
35
- "type": "Sequence",
36
- "normalizers": [
37
- {
38
- "type": "NFD"
39
- }
40
- ]
41
- },
42
  "pre_tokenizer": {
43
  "type": "ByteLevel",
44
  "add_prefix_space": true,
 
31
  "special": true
32
  }
33
  ],
34
+ "normalizer": null,
 
 
 
 
 
 
 
35
  "pre_tokenizer": {
36
  "type": "ByteLevel",
37
  "add_prefix_space": true,
fw57Mmulti_Entropy_thresholdM_16000/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
fw57Mmulti_Entropy_thresholdM_32000/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
fw57Mmulti_Entropy_thresholdM_64000/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
fw57Mmulti_Entropy_thresholdM_8064/tokenizer.json CHANGED
@@ -22,14 +22,7 @@
22
  "special": true
23
  }
24
  ],
25
- "normalizer": {
26
- "type": "Sequence",
27
- "normalizers": [
28
- {
29
- "type": "NFD"
30
- }
31
- ]
32
- },
33
  "pre_tokenizer": {
34
  "type": "ByteLevel",
35
  "add_prefix_space": true,
 
22
  "special": true
23
  }
24
  ],
25
+ "normalizer": null,
 
 
 
 
 
 
 
26
  "pre_tokenizer": {
27
  "type": "ByteLevel",
28
  "add_prefix_space": true,
fw57Mmulti_Surprisal_thresholdB_16000/tokenizer.json CHANGED
@@ -31,14 +31,7 @@
31
  "special": true
32
  }
33
  ],
34
- "normalizer": {
35
- "type": "Sequence",
36
- "normalizers": [
37
- {
38
- "type": "NFD"
39
- }
40
- ]
41
- },
42
  "pre_tokenizer": {
43
  "type": "ByteLevel",
44
  "add_prefix_space": true,
 
31
  "special": true
32
  }
33
  ],
34
+ "normalizer": null,
 
 
 
 
 
 
 
35
  "pre_tokenizer": {
36
  "type": "ByteLevel",
37
  "add_prefix_space": true,
fw57Mmulti_Surprisal_thresholdB_32000/tokenizer.json CHANGED
@@ -31,14 +31,7 @@
31
  "special": true
32
  }
33
  ],
34
- "normalizer": {
35
- "type": "Sequence",
36
- "normalizers": [
37
- {
38
- "type": "NFD"
39
- }
40
- ]
41
- },
42
  "pre_tokenizer": {
43
  "type": "ByteLevel",
44
  "add_prefix_space": true,
 
31
  "special": true
32
  }
33
  ],
34
+ "normalizer": null,
 
 
 
 
 
 
 
35
  "pre_tokenizer": {
36
  "type": "ByteLevel",
37
  "add_prefix_space": true,
fw57Mmulti_Surprisal_thresholdB_64000/tokenizer.json CHANGED
@@ -31,14 +31,7 @@
31
  "special": true
32
  }
33
  ],
34
- "normalizer": {
35
- "type": "Sequence",
36
- "normalizers": [
37
- {
38
- "type": "NFD"
39
- }
40
- ]
41
- },
42
  "pre_tokenizer": {
43
  "type": "ByteLevel",
44
  "add_prefix_space": true,
 
31
  "special": true
32
  }
33
  ],
34
+ "normalizer": null,
 
 
 
 
 
 
 
35
  "pre_tokenizer": {
36
  "type": "ByteLevel",
37
  "add_prefix_space": true,