Franso commited on
Commit
e2ba2d3
·
verified ·
1 Parent(s): cef4c92

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +64 -100
tokenizer.json CHANGED
@@ -139,62 +139,62 @@
139
  "<s>": 1,
140
  "</s>": 2,
141
  "#": 3,
142
- "(": 4,
143
- ")": 5,
144
- "+": 6,
145
- "-": 7,
146
- "/": 8,
147
- "1": 9,
148
- "2": 10,
149
- "3": 11,
150
- "4": 12,
151
- "5": 13,
152
- "=": 14,
153
- "@": 15,
154
- "B": 16,
155
- "C": 17,
156
- "F": 18,
157
- "H": 19,
158
- "I": 20,
159
- "N": 21,
160
- "O": 22,
161
- "S": 23,
162
- "[": 24,
163
- "\\": 25,
164
- "]": 26,
165
- "c": 27,
166
- "l": 28,
167
- "n": 29,
168
- "o": 30,
169
- "r": 31,
170
- "s": 32,
171
- "": 33,
172
- "cc": 34,
173
- "CC": 35,
174
- "O)": 36,
175
- "c1": 37,
176
- "=O)": 38,
177
- "(=O)": 39,
178
- "(C": 40,
179
- "c2": 41,
180
- "H]": 42,
181
- "C@": 43,
182
- "[C@": 44,
183
- "c1cc": 45,
184
- "c(": 46,
185
- "▁C": 47,
186
- "C(=O)": 48,
187
- "(C)": 49,
188
- "2)": 50,
189
- "[C@H]": 51,
190
- "[C@@": 52,
191
- "c3": 53,
192
- "c2cc": 54,
193
- "[C@@H]": 55,
194
- ")cc": 56,
195
- "NC(=O)": 57,
196
- "CCC": 58,
197
- "(=O)N": 59
198
  },
199
  "merges": [
200
  [
@@ -234,12 +234,12 @@
234
  "]"
235
  ],
236
  [
237
- "C",
238
- "@"
239
  ],
240
  [
241
- "[",
242
- "C@"
243
  ],
244
  [
245
  "c1",
@@ -249,57 +249,21 @@
249
  "c",
250
  "("
251
  ],
252
- [
253
- "▁",
254
- "C"
255
- ],
256
- [
257
- "C",
258
- "(=O)"
259
- ],
260
  [
261
  "(C",
262
  ")"
263
  ],
264
  [
265
- "2",
266
- ")"
267
  ],
268
  [
269
- "[C@",
270
- "H]"
271
  ],
272
  [
273
  "[C@",
274
  "@"
275
- ],
276
- [
277
- "c",
278
- "3"
279
- ],
280
- [
281
- "c2",
282
- "cc"
283
- ],
284
- [
285
- "[C@@",
286
- "H]"
287
- ],
288
- [
289
- ")",
290
- "cc"
291
- ],
292
- [
293
- "N",
294
- "C(=O)"
295
- ],
296
- [
297
- "CC",
298
- "C"
299
- ],
300
- [
301
- "(=O)",
302
- "N"
303
  ]
304
  ]
305
  }
 
139
  "<s>": 1,
140
  "</s>": 2,
141
  "#": 3,
142
+ "%": 4,
143
+ "(": 5,
144
+ ")": 6,
145
+ "+": 7,
146
+ "-": 8,
147
+ "/": 9,
148
+ "0": 10,
149
+ "1": 11,
150
+ "2": 12,
151
+ "3": 13,
152
+ "4": 14,
153
+ "5": 15,
154
+ "6": 16,
155
+ "7": 17,
156
+ "8": 18,
157
+ "9": 19,
158
+ "=": 20,
159
+ "@": 21,
160
+ "B": 22,
161
+ "C": 23,
162
+ "F": 24,
163
+ "H": 25,
164
+ "I": 26,
165
+ "N": 27,
166
+ "O": 28,
167
+ "P": 29,
168
+ "S": 30,
169
+ "[": 31,
170
+ "\\": 32,
171
+ "]": 33,
172
+ "c": 34,
173
+ "i": 35,
174
+ "l": 36,
175
+ "n": 37,
176
+ "o": 38,
177
+ "p": 39,
178
+ "r": 40,
179
+ "s": 41,
180
+ "": 42,
181
+ "cc": 43,
182
+ "CC": 44,
183
+ "O)": 45,
184
+ "c1": 46,
185
+ "=O)": 47,
186
+ "(=O)": 48,
187
+ "(C": 49,
188
+ "c2": 50,
189
+ "H]": 51,
190
+ "[C": 52,
191
+ "[C@": 53,
192
+ "c1cc": 54,
193
+ "c(": 55,
194
+ "(C)": 56,
195
+ "C(=O)": 57,
196
+ "▁C": 58,
197
+ "[C@@": 59
198
  },
199
  "merges": [
200
  [
 
234
  "]"
235
  ],
236
  [
237
+ "[",
238
+ "C"
239
  ],
240
  [
241
+ "[C",
242
+ "@"
243
  ],
244
  [
245
  "c1",
 
249
  "c",
250
  "("
251
  ],
 
 
 
 
 
 
 
 
252
  [
253
  "(C",
254
  ")"
255
  ],
256
  [
257
+ "C",
258
+ "(=O)"
259
  ],
260
  [
261
+ "",
262
+ "C"
263
  ],
264
  [
265
  "[C@",
266
  "@"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  ]
268
  ]
269
  }