Zeb commited on
Commit
26f9c53
·
1 Parent(s): 85c3781

Remove faulty tokenizers

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. fw57M_Entropy_threshold_128000/special_tokens_map.json +0 -6
  3. fw57M_Entropy_threshold_128000/stats.csv +0 -3
  4. fw57M_Entropy_threshold_128000/tokenizer.json +0 -0
  5. fw57M_Entropy_threshold_128000/tokenizer_config.json +0 -37
  6. fw57M_Entropy_threshold_128000/vocab.json +0 -0
  7. fw57M_Entropy_threshold_16000/special_tokens_map.json +0 -6
  8. fw57M_Entropy_threshold_16000/stats.csv +0 -0
  9. fw57M_Entropy_threshold_16000/tokenizer.json +0 -0
  10. fw57M_Entropy_threshold_16000/tokenizer_config.json +0 -37
  11. fw57M_Entropy_threshold_16000/vocab.json +0 -0
  12. fw57M_Entropy_threshold_32000/special_tokens_map.json +0 -6
  13. fw57M_Entropy_threshold_32000/stats.csv +0 -0
  14. fw57M_Entropy_threshold_32000/tokenizer.json +0 -0
  15. fw57M_Entropy_threshold_32000/tokenizer_config.json +0 -37
  16. fw57M_Entropy_threshold_32000/vocab.json +0 -0
  17. fw57M_Entropy_threshold_64000/special_tokens_map.json +0 -6
  18. fw57M_Entropy_threshold_64000/stats.csv +0 -0
  19. fw57M_Entropy_threshold_64000/tokenizer.json +0 -0
  20. fw57M_Entropy_threshold_64000/tokenizer_config.json +0 -37
  21. fw57M_Entropy_threshold_64000/vocab.json +0 -0
  22. fw57M_Entropy_threshold_8064/special_tokens_map.json +0 -6
  23. fw57M_Entropy_threshold_8064/stats.csv +0 -0
  24. fw57M_Entropy_threshold_8064/tokenizer.json +0 -0
  25. fw57M_Entropy_threshold_8064/tokenizer_config.json +0 -37
  26. fw57M_Entropy_threshold_8064/vocab.json +0 -0
  27. fw57M_Surprisal_thresholdB_64000/special_tokens_map.json +0 -6
  28. fw57M_Surprisal_thresholdB_64000/stats.csv +0 -0
  29. fw57M_Surprisal_thresholdB_64000/tokenizer.json +0 -0
  30. fw57M_Surprisal_thresholdB_64000/tokenizer_config.json +0 -37
  31. fw57M_Surprisal_thresholdB_64000/vocab.json +0 -0
  32. fw57M_Surprisal_threshold_16000/special_tokens_map.json +0 -6
  33. fw57M_Surprisal_threshold_16000/stats.csv +0 -0
  34. fw57M_Surprisal_threshold_16000/tokenizer.json +0 -0
  35. fw57M_Surprisal_threshold_16000/tokenizer_config.json +0 -37
  36. fw57M_Surprisal_threshold_16000/vocab.json +0 -0
  37. fw57M_Surprisal_threshold_32000/special_tokens_map.json +0 -6
  38. fw57M_Surprisal_threshold_32000/stats.csv +0 -0
  39. fw57M_Surprisal_threshold_32000/tokenizer.json +0 -0
  40. fw57M_Surprisal_threshold_32000/tokenizer_config.json +0 -37
  41. fw57M_Surprisal_threshold_32000/vocab.json +0 -0
  42. fw57M_Surprisal_threshold_64000/special_tokens_map.json +0 -6
  43. fw57M_Surprisal_threshold_64000/stats.csv +0 -0
  44. fw57M_Surprisal_threshold_64000/tokenizer.json +0 -0
  45. fw57M_Surprisal_threshold_64000/tokenizer_config.json +0 -37
  46. fw57M_Surprisal_threshold_64000/vocab.json +0 -0
  47. fw57M_Surprisal_threshold_8064/special_tokens_map.json +0 -6
  48. fw57M_Surprisal_threshold_8064/stats.csv +0 -0
  49. fw57M_Surprisal_threshold_8064/tokenizer.json +0 -0
  50. fw57M_Surprisal_threshold_8064/tokenizer_config.json +0 -37
.DS_Store ADDED
Binary file (12.3 kB). View file
 
fw57M_Entropy_threshold_128000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Entropy_threshold_128000/stats.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:909a8b46dab49cf0b1f25c10ace1ee321b566b41a9f8f5c03deac33e06433309
3
- size 11253040
 
 
 
 
fw57M_Entropy_threshold_128000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_128000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_threshold_128000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_16000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Entropy_threshold_16000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_16000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_16000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_threshold_16000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_32000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Entropy_threshold_32000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_32000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_32000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_threshold_32000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_64000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Entropy_threshold_64000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_64000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_64000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_threshold_64000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_8064/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Entropy_threshold_8064/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_8064/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Entropy_threshold_8064/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Entropy_threshold_8064/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_thresholdB_64000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Surprisal_thresholdB_64000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_thresholdB_64000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_thresholdB_64000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Surprisal_thresholdB_64000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_16000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Surprisal_threshold_16000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_16000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_16000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Surprisal_threshold_16000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_32000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Surprisal_threshold_32000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_32000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_32000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Surprisal_threshold_32000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_64000/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Surprisal_threshold_64000/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_64000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_64000/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fw57M_Surprisal_threshold_64000/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_8064/special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|padding|>",
5
- "unk_token": "<|unk|>"
6
- }
 
 
 
 
 
 
 
fw57M_Surprisal_threshold_8064/stats.csv DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_8064/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
fw57M_Surprisal_threshold_8064/tokenizer_config.json DELETED
@@ -1,37 +0,0 @@
1
- {
2
- "add_prefix_space": true,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|padding|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "514": {
21
- "content": "<|unk|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "bos_token": "<|endoftext|>",
30
- "clean_up_tokenization_spaces": false,
31
- "eos_token": "<|endoftext|>",
32
- "extra_special_tokens": {},
33
- "model_max_length": 1000000000000000019884624838656,
34
- "pad_token": "<|padding|>",
35
- "tokenizer_class": "PreTrainedTokenizer",
36
- "unk_token": "<|unk|>"
37
- }