alejandrovil commited on
Commit
746ba88
·
verified ·
1 Parent(s): f00e660

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +9 -27
  2. tokenizer_config.json +9 -1
tokenizer.json CHANGED
@@ -29,6 +29,15 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
@@ -62,12 +71,6 @@
62
  "id": "A",
63
  "type_id": 0
64
  }
65
- },
66
- {
67
- "SpecialToken": {
68
- "id": "</s>",
69
- "type_id": 0
70
- }
71
  }
72
  ],
73
  "pair": [
@@ -83,12 +86,6 @@
83
  "type_id": 0
84
  }
85
  },
86
- {
87
- "SpecialToken": {
88
- "id": "</s>",
89
- "type_id": 0
90
- }
91
- },
92
  {
93
  "SpecialToken": {
94
  "id": "<s>",
@@ -100,24 +97,9 @@
100
  "id": "B",
101
  "type_id": 1
102
  }
103
- },
104
- {
105
- "SpecialToken": {
106
- "id": "</s>",
107
- "type_id": 1
108
- }
109
  }
110
  ],
111
  "special_tokens": {
112
- "</s>": {
113
- "id": "</s>",
114
- "ids": [
115
- 2
116
- ],
117
- "tokens": [
118
- "</s>"
119
- ]
120
- },
121
  "<s>": {
122
  "id": "<s>",
123
  "ids": [
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 32000,
35
+ "content": "[PAD]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
  }
42
  ],
43
  "normalizer": {
 
71
  "id": "A",
72
  "type_id": 0
73
  }
 
 
 
 
 
 
74
  }
75
  ],
76
  "pair": [
 
86
  "type_id": 0
87
  }
88
  },
 
 
 
 
 
 
89
  {
90
  "SpecialToken": {
91
  "id": "<s>",
 
97
  "id": "B",
98
  "type_id": 1
99
  }
 
 
 
 
 
 
100
  }
101
  ],
102
  "special_tokens": {
 
 
 
 
 
 
 
 
 
103
  "<s>": {
104
  "id": "<s>",
105
  "ids": [
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": true,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -25,6 +25,14 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
28
  }
29
  },
30
  "additional_special_tokens": [],
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
+ },
29
+ "32000": {
30
+ "content": "[PAD]",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
  }
37
  },
38
  "additional_special_tokens": [],