Pradnya27 commited on
Commit
80468d8
·
verified ·
1 Parent(s): d6586df

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +25 -5
  2. tokenizer_config.json +2 -45
tokenizer.json CHANGED
@@ -18,7 +18,7 @@
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": true,
22
  "special": true
23
  },
24
  {
@@ -57,10 +57,30 @@
57
  "use_regex": true
58
  },
59
  "post_processor": {
60
- "type": "ByteLevel",
61
- "add_prefix_space": true,
62
- "trim_offsets": false,
63
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  },
65
  "decoder": {
66
  "type": "ByteLevel",
 
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": false,
22
  "special": true
23
  },
24
  {
 
57
  "use_regex": true
58
  },
59
  "post_processor": {
60
+ "type": "TemplateProcessing",
61
+ "single": [
62
+ {
63
+ "Sequence": {
64
+ "id": "A",
65
+ "type_id": 0
66
+ }
67
+ }
68
+ ],
69
+ "pair": [
70
+ {
71
+ "Sequence": {
72
+ "id": "A",
73
+ "type_id": 0
74
+ }
75
+ },
76
+ {
77
+ "Sequence": {
78
+ "id": "B",
79
+ "type_id": 1
80
+ }
81
+ }
82
+ ],
83
+ "special_tokens": {}
84
  },
85
  "decoder": {
86
  "type": "ByteLevel",
tokenizer_config.json CHANGED
@@ -1,54 +1,11 @@
1
  {
2
- "add_bos_token": false,
3
  "add_prefix_space": false,
4
- "added_tokens_decoder": {
5
- "0": {
6
- "content": "<s>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "1": {
14
- "content": "<pad>",
15
- "lstrip": false,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "2": {
22
- "content": "</s>",
23
- "lstrip": false,
24
- "normalized": true,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "50000": {
30
- "content": "<EOL>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "50001": {
38
- "content": "<|UNKNOWN|>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- }
45
- },
46
  "bos_token": "<s>",
47
- "clean_up_tokenization_spaces": false,
48
  "eos_token": "</s>",
49
  "errors": "replace",
50
- "extra_special_tokens": {},
51
  "full_tokenizer_file": null,
 
52
  "model_max_length": 1000000000000000019884624838656,
53
  "pad_token": "</s>",
54
  "sep_token": "<EOL>",
 
1
  {
 
2
  "add_prefix_space": false,
3
+ "backend": "tokenizers",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "bos_token": "<s>",
 
5
  "eos_token": "</s>",
6
  "errors": "replace",
 
7
  "full_tokenizer_file": null,
8
+ "is_local": false,
9
  "model_max_length": 1000000000000000019884624838656,
10
  "pad_token": "</s>",
11
  "sep_token": "<EOL>",