Mauricio-100 commited on
Commit
7710b82
·
verified ·
1 Parent(s): 4d32cbf

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +0 -0
  2. tokenizer_config.json +16 -88
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -73,7 +73,7 @@
73
  "special": true
74
  },
75
  "9": {
76
- "content": "[FR]",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": true
82
  },
83
  "10": {
84
- "content": "[EN]",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": true
90
  },
91
  "11": {
92
- "content": "[ES]",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": true
98
  },
99
  "12": {
100
- "content": "[DE]",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": true
106
  },
107
  "13": {
108
- "content": "[ZH]",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": true
114
  },
115
  "14": {
116
- "content": "[JA]",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": true
122
  },
123
  "15": {
124
- "content": "[KO]",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": true
130
  },
131
  "16": {
132
- "content": "[AR]",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
@@ -137,7 +137,7 @@
137
  "special": true
138
  },
139
  "17": {
140
- "content": "[CODE]",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  "special": true
146
  },
147
  "18": {
148
- "content": "[PYTHON]",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
@@ -153,7 +153,7 @@
153
  "special": true
154
  },
155
  "19": {
156
- "content": "[SQL]",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": true
162
  },
163
  "20": {
164
- "content": "[HTML]",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": true
170
  },
171
  "21": {
172
- "content": "[CSS]",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": true
178
  },
179
  "22": {
180
- "content": "[JS]",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
@@ -185,7 +185,7 @@
185
  "special": true
186
  },
187
  "23": {
188
- "content": "[MATH]",
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
@@ -193,79 +193,7 @@
193
  "special": true
194
  },
195
  "24": {
196
- "content": "[SCIENCE]",
197
- "lstrip": false,
198
- "normalized": false,
199
- "rstrip": false,
200
- "single_word": false,
201
- "special": true
202
- },
203
- "25": {
204
- "content": "[TECH]",
205
- "lstrip": false,
206
- "normalized": false,
207
- "rstrip": false,
208
- "single_word": false,
209
- "special": true
210
- },
211
- "26": {
212
- "content": "[MED]",
213
- "lstrip": false,
214
- "normalized": false,
215
- "rstrip": false,
216
- "single_word": false,
217
- "special": true
218
- },
219
- "27": {
220
- "content": "[LAW]",
221
- "lstrip": false,
222
- "normalized": false,
223
- "rstrip": false,
224
- "single_word": false,
225
- "special": true
226
- },
227
- "28": {
228
- "content": "[FINANCE]",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false,
233
- "special": true
234
- },
235
- "29": {
236
- "content": "<num>",
237
- "lstrip": false,
238
- "normalized": false,
239
- "rstrip": false,
240
- "single_word": false,
241
- "special": true
242
- },
243
- "30": {
244
- "content": "<url>",
245
- "lstrip": false,
246
- "normalized": false,
247
- "rstrip": false,
248
- "single_word": false,
249
- "special": true
250
- },
251
- "31": {
252
- "content": "<email>",
253
- "lstrip": false,
254
- "normalized": false,
255
- "rstrip": false,
256
- "single_word": false,
257
- "special": true
258
- },
259
- "32": {
260
- "content": "<date>",
261
- "lstrip": false,
262
- "normalized": false,
263
- "rstrip": false,
264
- "single_word": false,
265
- "special": true
266
- },
267
- "33": {
268
- "content": "<time>",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
 
73
  "special": true
74
  },
75
  "9": {
76
+ "content": "[TITLE]",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
 
81
  "special": true
82
  },
83
  "10": {
84
+ "content": "[SECTION]",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
 
89
  "special": true
90
  },
91
  "11": {
92
+ "content": "[PARAGRAPH]",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
 
97
  "special": true
98
  },
99
  "12": {
100
+ "content": "[LIST]",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
 
105
  "special": true
106
  },
107
  "13": {
108
+ "content": "[QUOTE]",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
 
113
  "special": true
114
  },
115
  "14": {
116
+ "content": "[FR]",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
 
121
  "special": true
122
  },
123
  "15": {
124
+ "content": "[EN]",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
 
129
  "special": true
130
  },
131
  "16": {
132
+ "content": "[ES]",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
 
137
  "special": true
138
  },
139
  "17": {
140
+ "content": "[MATH]",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
 
145
  "special": true
146
  },
147
  "18": {
148
+ "content": "[SCIENCE]",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
 
153
  "special": true
154
  },
155
  "19": {
156
+ "content": "[HISTORY]",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
 
161
  "special": true
162
  },
163
  "20": {
164
+ "content": "[CITATION]",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
 
169
  "special": true
170
  },
171
  "21": {
172
+ "content": "[REFERENCE]",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
 
177
  "special": true
178
  },
179
  "22": {
180
+ "content": "[DATE]",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
 
185
  "special": true
186
  },
187
  "23": {
188
+ "content": "[LOCATION]",
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
 
193
  "special": true
194
  },
195
  "24": {
196
+ "content": "[PERSON]",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  "lstrip": false,
198
  "normalized": false,
199
  "rstrip": false,