nosuchjihyun commited on
Commit
14978c4
·
verified ·
1 Parent(s): f839f6a

Upload tokenizer

Browse files
Files changed (4) hide show
  1. merges.txt +0 -1
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +25 -24
  4. vocab.json +0 -0
merges.txt CHANGED
@@ -32482,4 +32482,3 @@ ason ic
32482
  ĠíļĮ ìĤ¬ë¥¼
32483
  ple ted
32484
  cent ered
32485
- ĠCl ose
 
32482
  ĠíļĮ ìĤ¬ë¥¼
32483
  ple ted
32484
  cent ered
 
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {
@@ -50,7 +51,7 @@
50
  "special": true
51
  },
52
  "6": {
53
- "content": "<general><semantic_similarity>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
@@ -58,7 +59,7 @@
58
  "special": true
59
  },
60
  "7": {
61
- "content": "<classification>",
62
  "lstrip": false,
63
  "normalized": false,
64
  "rstrip": false,
@@ -66,7 +67,7 @@
66
  "special": true
67
  },
68
  "8": {
69
- "content": "<clustering>",
70
  "lstrip": false,
71
  "normalized": false,
72
  "rstrip": false,
@@ -74,7 +75,7 @@
74
  "special": true
75
  },
76
  "9": {
77
- "content": "<retrieval_query>",
78
  "lstrip": false,
79
  "normalized": false,
80
  "rstrip": false,
@@ -82,7 +83,7 @@
82
  "special": true
83
  },
84
  "10": {
85
- "content": "<retrieval_document>",
86
  "lstrip": false,
87
  "normalized": false,
88
  "rstrip": false,
@@ -90,7 +91,7 @@
90
  "special": true
91
  },
92
  "11": {
93
- "content": "<code_retrieval_query>",
94
  "lstrip": false,
95
  "normalized": false,
96
  "rstrip": false,
@@ -98,7 +99,7 @@
98
  "special": true
99
  },
100
  "12": {
101
- "content": "<reserved0>",
102
  "lstrip": false,
103
  "normalized": false,
104
  "rstrip": false,
@@ -106,7 +107,7 @@
106
  "special": true
107
  },
108
  "13": {
109
- "content": "<reserved1>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
@@ -114,7 +115,7 @@
114
  "special": true
115
  },
116
  "14": {
117
- "content": "<reserved2>",
118
  "lstrip": false,
119
  "normalized": false,
120
  "rstrip": false,
@@ -122,7 +123,7 @@
122
  "special": true
123
  },
124
  "15": {
125
- "content": "<reserved3>",
126
  "lstrip": false,
127
  "normalized": false,
128
  "rstrip": false,
@@ -130,7 +131,7 @@
130
  "special": true
131
  },
132
  "16": {
133
- "content": "<reserved4>",
134
  "lstrip": false,
135
  "normalized": false,
136
  "rstrip": false,
@@ -138,7 +139,7 @@
138
  "special": true
139
  },
140
  "17": {
141
- "content": "<reserved5>",
142
  "lstrip": false,
143
  "normalized": false,
144
  "rstrip": false,
@@ -146,7 +147,7 @@
146
  "special": true
147
  },
148
  "18": {
149
- "content": "<reserved6>",
150
  "lstrip": false,
151
  "normalized": false,
152
  "rstrip": false,
@@ -154,7 +155,7 @@
154
  "special": true
155
  },
156
  "19": {
157
- "content": "<reserved7>",
158
  "lstrip": false,
159
  "normalized": false,
160
  "rstrip": false,
@@ -162,7 +163,7 @@
162
  "special": true
163
  },
164
  "20": {
165
- "content": "<reserved8>",
166
  "lstrip": false,
167
  "normalized": false,
168
  "rstrip": false,
@@ -170,7 +171,7 @@
170
  "special": true
171
  },
172
  "21": {
173
- "content": "<reserved9>",
174
  "lstrip": false,
175
  "normalized": false,
176
  "rstrip": false,
@@ -178,7 +179,7 @@
178
  "special": true
179
  },
180
  "22": {
181
- "content": "<reserved10>",
182
  "lstrip": false,
183
  "normalized": false,
184
  "rstrip": false,
@@ -186,7 +187,7 @@
186
  "special": true
187
  },
188
  "23": {
189
- "content": "<reserved11>",
190
  "lstrip": false,
191
  "normalized": false,
192
  "rstrip": false,
@@ -194,7 +195,7 @@
194
  "special": true
195
  },
196
  "24": {
197
- "content": "<reserved12>",
198
  "lstrip": false,
199
  "normalized": false,
200
  "rstrip": false,
@@ -202,7 +203,7 @@
202
  "special": true
203
  },
204
  "25": {
205
- "content": "<reserved13>",
206
  "lstrip": false,
207
  "normalized": false,
208
  "rstrip": false,
@@ -210,7 +211,7 @@
210
  "special": true
211
  },
212
  "26": {
213
- "content": "<reserved14>",
214
  "lstrip": false,
215
  "normalized": false,
216
  "rstrip": false,
@@ -218,15 +219,15 @@
218
  "special": true
219
  },
220
  "27": {
221
- "content": "<reserved15>",
222
  "lstrip": false,
223
  "normalized": false,
224
  "rstrip": false,
225
  "single_word": false,
226
  "special": true
227
  },
228
- "32768": {
229
- "content": "<|endoftext|>",
230
  "lstrip": false,
231
  "normalized": false,
232
  "rstrip": false,
 
1
  {
2
+ "add_bos_token": true,
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "0": {
 
51
  "special": true
52
  },
53
  "6": {
54
+ "content": "<general>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
 
59
  "special": true
60
  },
61
  "7": {
62
+ "content": "<semantic_similarity>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
 
67
  "special": true
68
  },
69
  "8": {
70
+ "content": "<classification>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
 
75
  "special": true
76
  },
77
  "9": {
78
+ "content": "<clustering>",
79
  "lstrip": false,
80
  "normalized": false,
81
  "rstrip": false,
 
83
  "special": true
84
  },
85
  "10": {
86
+ "content": "<retrieval_query>",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
 
91
  "special": true
92
  },
93
  "11": {
94
+ "content": "<retrieval_document>",
95
  "lstrip": false,
96
  "normalized": false,
97
  "rstrip": false,
 
99
  "special": true
100
  },
101
  "12": {
102
+ "content": "<code_retrieval_query>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
 
107
  "special": true
108
  },
109
  "13": {
110
+ "content": "<reserved0>",
111
  "lstrip": false,
112
  "normalized": false,
113
  "rstrip": false,
 
115
  "special": true
116
  },
117
  "14": {
118
+ "content": "<reserved1>",
119
  "lstrip": false,
120
  "normalized": false,
121
  "rstrip": false,
 
123
  "special": true
124
  },
125
  "15": {
126
+ "content": "<reserved2>",
127
  "lstrip": false,
128
  "normalized": false,
129
  "rstrip": false,
 
131
  "special": true
132
  },
133
  "16": {
134
+ "content": "<reserved3>",
135
  "lstrip": false,
136
  "normalized": false,
137
  "rstrip": false,
 
139
  "special": true
140
  },
141
  "17": {
142
+ "content": "<reserved4>",
143
  "lstrip": false,
144
  "normalized": false,
145
  "rstrip": false,
 
147
  "special": true
148
  },
149
  "18": {
150
+ "content": "<reserved5>",
151
  "lstrip": false,
152
  "normalized": false,
153
  "rstrip": false,
 
155
  "special": true
156
  },
157
  "19": {
158
+ "content": "<reserved6>",
159
  "lstrip": false,
160
  "normalized": false,
161
  "rstrip": false,
 
163
  "special": true
164
  },
165
  "20": {
166
+ "content": "<reserved7>",
167
  "lstrip": false,
168
  "normalized": false,
169
  "rstrip": false,
 
171
  "special": true
172
  },
173
  "21": {
174
+ "content": "<reserved8>",
175
  "lstrip": false,
176
  "normalized": false,
177
  "rstrip": false,
 
179
  "special": true
180
  },
181
  "22": {
182
+ "content": "<reserved9>",
183
  "lstrip": false,
184
  "normalized": false,
185
  "rstrip": false,
 
187
  "special": true
188
  },
189
  "23": {
190
+ "content": "<reserved10>",
191
  "lstrip": false,
192
  "normalized": false,
193
  "rstrip": false,
 
195
  "special": true
196
  },
197
  "24": {
198
+ "content": "<reserved11>",
199
  "lstrip": false,
200
  "normalized": false,
201
  "rstrip": false,
 
203
  "special": true
204
  },
205
  "25": {
206
+ "content": "<reserved12>",
207
  "lstrip": false,
208
  "normalized": false,
209
  "rstrip": false,
 
211
  "special": true
212
  },
213
  "26": {
214
+ "content": "<reserved13>",
215
  "lstrip": false,
216
  "normalized": false,
217
  "rstrip": false,
 
219
  "special": true
220
  },
221
  "27": {
222
+ "content": "<reserved14>",
223
  "lstrip": false,
224
  "normalized": false,
225
  "rstrip": false,
226
  "single_word": false,
227
  "special": true
228
  },
229
+ "28": {
230
+ "content": "<reserved15>",
231
  "lstrip": false,
232
  "normalized": false,
233
  "rstrip": false,
vocab.json CHANGED
The diff for this file is too large to render. See raw diff