VicenteAlex commited on
Commit
f2973dd
·
verified ·
1 Parent(s): 062c9e8

Upload tokenizers

Browse files
aa/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
aa/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
aa/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "8": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "9": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "10": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "11": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "12": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "13": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "14": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "15": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "16": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "17": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "18": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "19": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "20": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "21": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "22": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "23": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "24": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "25": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "26": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "27": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "28": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "29": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "30": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "31": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "33": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "34": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "35": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "36": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "37": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "38": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "39": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "40": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "41": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "42": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "43": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "44": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "45": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "46": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "47": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "48": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "49": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "50": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "51": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "52": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "53": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "54": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "55": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "56": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "57": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "58": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "59": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "60": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "61": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "62": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "63": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "64": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "65": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "66": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "67": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "68": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "69": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "70": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "71": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "72": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "73": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "74": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "75": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "76": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "77": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "78": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "79": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "80": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "81": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "82": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "83": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "84": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "85": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "86": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "87": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "88": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "89": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "90": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "91": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "92": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "93": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "94": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "95": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "96": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "97": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "98": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "99": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "100": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "101": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "102": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": false,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 1000000000000000019884624838656,
934
+ "pad_token": "<pad>",
935
+ "sp_model_kwargs": {},
936
+ "tokenizer_class": "T5Tokenizer",
937
+ "unk_token": "<unk>"
938
+ }
smiles/merges.txt ADDED
@@ -0,0 +1,1238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ [ n
3
+ [n H
4
+ [nH ]
5
+ B r
6
+ [ N
7
+ [N +
8
+ [N+ ]
9
+ [ O
10
+ [O -
11
+ [O- ]
12
+ C l
13
+ [ N
14
+ [N H
15
+ [NH 3
16
+ [NH3 +
17
+ [NH3+ ]
18
+ [ N
19
+ [N H
20
+ [NH +
21
+ [NH+ ]
22
+ [ N
23
+ [N -
24
+ [N- ]
25
+ [ n
26
+ [n +
27
+ [n+ ]
28
+ [ S
29
+ [S i
30
+ [Si ]
31
+ [ N
32
+ [N H
33
+ [NH 2
34
+ [NH2 +
35
+ [NH2+ ]
36
+ [ S
37
+ [S e
38
+ [Se ]
39
+ [ n
40
+ [n H
41
+ [nH +
42
+ [nH+ ]
43
+ [ C
44
+ [C +
45
+ [C+ ]
46
+ [ c
47
+ [c -
48
+ [c- ]
49
+ [ O
50
+ [O H
51
+ [OH +
52
+ [OH+ ]
53
+ [ O
54
+ [O +
55
+ [O+ ]
56
+ [ n
57
+ [n -
58
+ [n- ]
59
+ [ o
60
+ [o +
61
+ [o+ ]
62
+ [ C
63
+ [C -
64
+ [C- ]
65
+ [ P
66
+ [P H
67
+ [PH ]
68
+ [ S
69
+ [S +
70
+ [S+ ]
71
+ [ N
72
+ [N H
73
+ [NH -
74
+ [NH- ]
75
+ [ s
76
+ [s e
77
+ [se ]
78
+ [ C
79
+ [C H
80
+ [CH -
81
+ [CH- ]
82
+ [ P
83
+ [P +
84
+ [P+ ]
85
+ [ S
86
+ [S b
87
+ [Sb ]
88
+ [ F
89
+ [F -
90
+ [F- ]
91
+ [ C
92
+ [C l
93
+ [Cl -
94
+ [Cl- ]
95
+ [ B
96
+ [B r
97
+ [Br -
98
+ [Br- ]
99
+ [ I
100
+ [I -
101
+ [I- ]
102
+ [ S
103
+ [S H
104
+ [SH -
105
+ [SH- ]
106
+ [ B
107
+ [B -
108
+ [B- ]
109
+ [ A
110
+ [A s
111
+ [As ]
112
+ [ C
113
+ [C H
114
+ [CH +
115
+ [CH+ ]
116
+ [ s
117
+ [s +
118
+ [s+ ]
119
+ [ R
120
+ [R u
121
+ [Ru ]
122
+ [ N
123
+ [N a
124
+ [Na +
125
+ [Na+ ]
126
+ [ C
127
+ [C H
128
+ [CH 2
129
+ [CH2 -
130
+ [CH2- ]
131
+ [ c
132
+ [c H
133
+ [cH -
134
+ [cH- ]
135
+ [ c
136
+ [c +
137
+ [c+ ]
138
+ % 1
139
+ %1 0
140
+ % 1
141
+ %1 1
142
+ [ G
143
+ [G d
144
+ [Gd -
145
+ [Gd- 4
146
+ [Gd-4 ]
147
+ [ G
148
+ [G d
149
+ [Gd -
150
+ [Gd- 5
151
+ [Gd-5 ]
152
+ [ H
153
+ [H g
154
+ [Hg ]
155
+ [ Z
156
+ [Z n
157
+ [Zn +
158
+ [Zn+ 2
159
+ [Zn+2 ]
160
+ [ S
161
+ [S -
162
+ [S- ]
163
+ [ S
164
+ [S i
165
+ [Si H
166
+ [SiH 3
167
+ [SiH3 ]
168
+ [ B
169
+ [B i
170
+ [Bi H
171
+ [BiH 3
172
+ [BiH3 ]
173
+ [ S
174
+ [S e
175
+ [Se H
176
+ [SeH ]
177
+ [ N
178
+ [N a
179
+ [Na ]
180
+ % 1
181
+ %1 2
182
+ [ O
183
+ [O ]
184
+ [ S
185
+ [S i
186
+ [Si H
187
+ [SiH 2
188
+ [SiH2 ]
189
+ [ P
190
+ [P t
191
+ [Pt ]
192
+ [ F
193
+ [F e
194
+ [Fe ]
195
+ [ R
196
+ [R e
197
+ [Re +
198
+ [Re+ ]
199
+ < -
200
+ [ R
201
+ [R e
202
+ [Re ]
203
+ [ S
204
+ [S H
205
+ [SH ]
206
+ [ L
207
+ [L i
208
+ [Li ]
209
+ [ V
210
+ [V ]
211
+ [ O
212
+ [O H
213
+ [OH 2
214
+ [OH2 +
215
+ [OH2+ ]
216
+ [ S
217
+ [S n
218
+ [Sn ]
219
+ [ N
220
+ [N i
221
+ [Ni ]
222
+ [ S
223
+ [S H
224
+ [SH +
225
+ [SH+ ]
226
+ [ K
227
+ [K ]
228
+ [ A
229
+ [A u
230
+ [Au ]
231
+ [ C
232
+ [C o
233
+ [Co ]
234
+ [ C
235
+ [C u
236
+ [Cu ]
237
+ [ T
238
+ [T e
239
+ [Te ]
240
+ [ O
241
+ [O s
242
+ [Os ]
243
+ [ R
244
+ [R u
245
+ [Ru -
246
+ [Ru- ]
247
+ [ C
248
+ [C l
249
+ [Cl +
250
+ [Cl+ 3
251
+ [Cl+3 ]
252
+ [ R
253
+ [R e
254
+ [Re -
255
+ [Re- ]
256
+ [ Z
257
+ [Z n
258
+ [Zn +
259
+ [Zn+ ]
260
+ [ S
261
+ [S i
262
+ [Si H
263
+ [SiH ]
264
+ [ H
265
+ [H ]
266
+ [ O
267
+ [O H
268
+ [OH -
269
+ [OH- ]
270
+ [ C
271
+ [C a
272
+ [Ca ]
273
+ [ C
274
+ [C H
275
+ [CH 2
276
+ [CH2 ]
277
+ [ P
278
+ [P d
279
+ [Pd ]
280
+ [ C
281
+ [C H
282
+ [CH 2
283
+ [CH2 +
284
+ [CH2+ ]
285
+ [ C
286
+ [C ]
287
+ [ C
288
+ [C H
289
+ [CH ]
290
+ [ S
291
+ [S ]
292
+ [ N
293
+ [N H
294
+ [NH ]
295
+ [ c
296
+ [c ]
297
+ [ Z
298
+ [Z n
299
+ [Zn ]
300
+ [ F
301
+ [F e
302
+ [Fe +
303
+ [Fe+ 2
304
+ [Fe+2 ]
305
+ [ I
306
+ [I ]
307
+ [ A
308
+ [A r
309
+ [Ar ]
310
+ [ W
311
+ [W ]
312
+ [ P
313
+ [P ]
314
+ [ B
315
+ [B H
316
+ [BH 3
317
+ [BH3 -
318
+ [BH3- ]
319
+ [ G
320
+ [G d
321
+ [Gd ]
322
+ [ G
323
+ [G a
324
+ [Ga ]
325
+ [ L
326
+ [L u
327
+ [Lu ]
328
+ [ B
329
+ [B i
330
+ [Bi +
331
+ [Bi+ 3
332
+ [Bi+3 ]
333
+ [ N
334
+ [N +
335
+ [N+ 2
336
+ [N+2 ]
337
+ [ B
338
+ [B H
339
+ [BH 2
340
+ [BH2 -
341
+ [BH2- ]
342
+ [ K
343
+ [K +
344
+ [K+ ]
345
+ [ M
346
+ [M n
347
+ [Mn ]
348
+ [ B
349
+ [B H
350
+ [BH -
351
+ [BH- ]
352
+ [ C
353
+ [C a
354
+ [Ca +
355
+ [Ca+ 2
356
+ [Ca+2 ]
357
+ [ P
358
+ [P H
359
+ [PH +
360
+ [PH+ ]
361
+ % 1
362
+ %1 3
363
+ % 1
364
+ %1 4
365
+ % 1
366
+ %1 5
367
+ % 1
368
+ %1 6
369
+ % 1
370
+ %1 7
371
+ % 1
372
+ %1 8
373
+ % 1
374
+ %1 9
375
+ % 2
376
+ %2 0
377
+ % 2
378
+ %2 1
379
+ % 2
380
+ %2 2
381
+ % 2
382
+ %2 3
383
+ % 2
384
+ %2 4
385
+ % 2
386
+ %2 5
387
+ [ A
388
+ [A u
389
+ [Au -
390
+ [Au- ]
391
+ [ N
392
+ [N b
393
+ [Nb -
394
+ [Nb- 2
395
+ [Nb-2 ]
396
+ [ P
397
+ [P t
398
+ [Pt -
399
+ [Pt- 2
400
+ [Pt-2 ]
401
+ [ F
402
+ [F e
403
+ [Fe -
404
+ [Fe- 3
405
+ [Fe-3 ]
406
+ [ A
407
+ [A l
408
+ [Al -
409
+ [Al- 3
410
+ [Al-3 ]
411
+ [ C
412
+ [C u
413
+ [Cu -
414
+ [Cu- ]
415
+ [ A
416
+ [A g
417
+ [Ag -
418
+ [Ag- ]
419
+ [ A
420
+ [A s
421
+ [As -
422
+ [As- ]
423
+ [ P
424
+ [P d
425
+ [Pd -
426
+ [Pd- 2
427
+ [Pd-2 ]
428
+ [ P
429
+ [P -
430
+ [P- ]
431
+ [ L
432
+ [L i
433
+ [Li +
434
+ [Li+ ]
435
+ [ S
436
+ [S e
437
+ [Se -
438
+ [Se- ]
439
+ [ N
440
+ [N H
441
+ [NH 4
442
+ [NH4 +
443
+ [NH4+ ]
444
+ [ t
445
+ [t e
446
+ [te ]
447
+ [ T
448
+ [T c
449
+ [Tc ]
450
+ [ N
451
+ [N i
452
+ [Ni +
453
+ [Ni+ 2
454
+ [Ni+2 ]
455
+ [ S
456
+ [S r
457
+ [Sr +
458
+ [Sr+ 2
459
+ [Sr+2 ]
460
+ [ I
461
+ [I +
462
+ [I+ 3
463
+ [I+3 ]
464
+ [ B
465
+ [B r
466
+ [Br +
467
+ [Br+ 2
468
+ [Br+2 ]
469
+ [ I
470
+ [I +
471
+ [I+ 2
472
+ [I+2 ]
473
+ [ C
474
+ [C l
475
+ [Cl +
476
+ [Cl+ 2
477
+ [Cl+2 ]
478
+ [ F
479
+ [F e
480
+ [Fe -
481
+ [Fe- 2
482
+ [Fe-2 ]
483
+ [ I
484
+ [I +
485
+ [I+ ]
486
+ [ S
487
+ [S e
488
+ [Se +
489
+ [Se+ ]
490
+ [ M
491
+ [M o
492
+ [Mo ]
493
+ [ c
494
+ [c H
495
+ [cH +
496
+ [cH+ ]
497
+ [ t
498
+ [t e
499
+ [te +
500
+ [te+ ]
501
+ [ s
502
+ [s e
503
+ [se +
504
+ [se+ ]
505
+ [ C
506
+ [C a
507
+ [Ca H
508
+ [CaH 2
509
+ [CaH2 ]
510
+ [ A
511
+ [A g
512
+ [Ag +
513
+ [Ag+ ]
514
+ [ M
515
+ [M n
516
+ [Mn +
517
+ [Mn+ 2
518
+ [Mn+2 ]
519
+ [ M
520
+ [M g
521
+ [Mg +
522
+ [Mg+ 2
523
+ [Mg+2 ]
524
+ [ C
525
+ [C o
526
+ [Co +
527
+ [Co+ 2
528
+ [Co+2 ]
529
+ [ C
530
+ [C u
531
+ [Cu +
532
+ [Cu+ 2
533
+ [Cu+2 ]
534
+ [ C
535
+ [C u
536
+ [Cu +
537
+ [Cu+ ]
538
+ [ A
539
+ [A l
540
+ [Al ]
541
+ [ F
542
+ [F e
543
+ [Fe +
544
+ [Fe+ 3
545
+ [Fe+3 ]
546
+ [ H
547
+ [H g
548
+ [Hg +
549
+ [Hg+ 2
550
+ [Hg+2 ]
551
+ [ A
552
+ [A l
553
+ [Al -
554
+ [Al- ]
555
+ [ C
556
+ [C e
557
+ [Ce +
558
+ [Ce+ 3
559
+ [Ce+3 ]
560
+ [ N
561
+ [N ]
562
+ [ M
563
+ [M g
564
+ [Mg ]
565
+ [ C
566
+ [C d
567
+ [Cd +
568
+ [Cd+ 2
569
+ [Cd+2 ]
570
+ [ P
571
+ [P t
572
+ [Pt +
573
+ [Pt+ ]
574
+ [ B
575
+ [B e
576
+ [Be -
577
+ [Be- ]
578
+ [ X
579
+ [X e
580
+ [Xe ]
581
+ [ K
582
+ [K r
583
+ [Kr ]
584
+ [ T
585
+ [T a
586
+ [Ta ]
587
+ [ L
588
+ [L a
589
+ [La +
590
+ [La+ 3
591
+ [La+3 ]
592
+ [ B
593
+ [B a
594
+ [Ba +
595
+ [Ba+ 2
596
+ [Ba+2 ]
597
+ [ W
598
+ [W +
599
+ [W+ 4
600
+ [W+4 ]
601
+ [ N
602
+ [N i
603
+ [Ni +
604
+ [Ni+ ]
605
+ [ S
606
+ [S b
607
+ [Sb +
608
+ [Sb+ ]
609
+ [ S
610
+ [S b
611
+ [Sb +
612
+ [Sb+ 3
613
+ [Sb+3 ]
614
+ [ T
615
+ [T l
616
+ [Tl +
617
+ [Tl+ ]
618
+ [ C
619
+ [C s
620
+ [Cs +
621
+ [Cs+ ]
622
+ [ A
623
+ [A u
624
+ [Au +
625
+ [Au+ ]
626
+ [ Y
627
+ [Y b
628
+ [Yb +
629
+ [Yb+ 3
630
+ [Yb+3 ]
631
+ [ G
632
+ [G a
633
+ [Ga +
634
+ [Ga+ 3
635
+ [Ga+3 ]
636
+ [ C
637
+ [C r
638
+ [Cr +
639
+ [Cr+ 3
640
+ [Cr+3 ]
641
+ [ A
642
+ [A s
643
+ [As +
644
+ [As+ ]
645
+ [ M
646
+ [M n
647
+ [Mn +
648
+ [Mn+ 3
649
+ [Mn+3 ]
650
+ [ R
651
+ [R u
652
+ [Ru +
653
+ [Ru+ 2
654
+ [Ru+2 ]
655
+ [ I
656
+ [I r
657
+ [Ir +
658
+ [Ir+ 3
659
+ [Ir+3 ]
660
+ [ P
661
+ [P d
662
+ [Pd +
663
+ [Pd+ 2
664
+ [Pd+2 ]
665
+ [ B
666
+ [B e
667
+ [Be ]
668
+ [ B
669
+ [B e
670
+ [Be -
671
+ [Be- 2
672
+ [Be-2 ]
673
+ [ Y
674
+ [Y ]
675
+ [ M
676
+ [M o
677
+ [Mo +
678
+ [Mo+ 6
679
+ [Mo+6 ]
680
+ [ M
681
+ [M o
682
+ [Mo +
683
+ [Mo+ 4
684
+ [Mo+4 ]
685
+ [ H
686
+ [H f
687
+ [Hf ]
688
+ [ U
689
+ [U ]
690
+ [ M
691
+ [M g
692
+ [Mg -
693
+ [Mg- ]
694
+ [ S
695
+ [S e
696
+ [Se H
697
+ [SeH 2
698
+ [SeH2 ]
699
+ [ L
700
+ [L u
701
+ [Lu +
702
+ [Lu+ 3
703
+ [Lu+3 ]
704
+ [ N
705
+ [N i
706
+ [Ni +
707
+ [Ni+ 3
708
+ [Ni+3 ]
709
+ [ S
710
+ [S m
711
+ [Sm +
712
+ [Sm+ 3
713
+ [Sm+3 ]
714
+ [ P
715
+ [P b
716
+ [Pb +
717
+ [Pb+ 2
718
+ [Pb+2 ]
719
+ [ E
720
+ [E u
721
+ [Eu +
722
+ [Eu+ 2
723
+ [Eu+2 ]
724
+ [ P
725
+ [P t
726
+ [Pt +
727
+ [Pt+ 2
728
+ [Pt+2 ]
729
+ [ H
730
+ [H o
731
+ [Ho +
732
+ [Ho+ 3
733
+ [Ho+3 ]
734
+ [ M
735
+ [M g
736
+ [Mg -
737
+ [Mg- 2
738
+ [Mg-2 ]
739
+ [ Z
740
+ [Z r
741
+ [Zr ]
742
+ [ N
743
+ [N H
744
+ [NH 2
745
+ [NH2 ]
746
+ [ O
747
+ [O s
748
+ [Os +
749
+ [Os+ 4
750
+ [Os+4 ]
751
+ [ M
752
+ [M o
753
+ [Mo -
754
+ [Mo- ]
755
+ [ C
756
+ [C o
757
+ [Co +
758
+ [Co+ 3
759
+ [Co+3 ]
760
+ [ R
761
+ [R h
762
+ [Rh ]
763
+ [ T
764
+ [T b
765
+ [Tb +
766
+ [Tb+ 3
767
+ [Tb+3 ]
768
+ [ C
769
+ [C r
770
+ [Cr +
771
+ [Cr+ ]
772
+ [ R
773
+ [R h
774
+ [Rh +
775
+ [Rh+ ]
776
+ [ R
777
+ [R h
778
+ [Rh +
779
+ [Rh+ 3
780
+ [Rh+3 ]
781
+ [ R
782
+ [R b
783
+ [Rb +
784
+ [Rb+ ]
785
+ [ E
786
+ [E r
787
+ [Er +
788
+ [Er+ 3
789
+ [Er+3 ]
790
+ [ R
791
+ [R u
792
+ [Ru +
793
+ [Ru+ 3
794
+ [Ru+3 ]
795
+ [ A
796
+ [A u
797
+ [Au +
798
+ [Au+ 3
799
+ [Au+3 ]
800
+ [ H
801
+ [H +
802
+ [H+ ]
803
+ [ S
804
+ [S c
805
+ [Sc -
806
+ [Sc- ]
807
+ [ V
808
+ [V -
809
+ [V- ]
810
+ [ O
811
+ [O H
812
+ [OH +
813
+ [OH+ 2
814
+ [OH+2 ]
815
+ [ V
816
+ [V +
817
+ [V+ 3
818
+ [V+3 ]
819
+ [ A
820
+ [A g
821
+ [Ag ]
822
+ [ D
823
+ [D y
824
+ [Dy +
825
+ [Dy+ 2
826
+ [Dy+2 ]
827
+ [ E
828
+ [E u
829
+ [Eu +
830
+ [Eu+ 3
831
+ [Eu+3 ]
832
+ [ I
833
+ [I r
834
+ [Ir ]
835
+ [ B
836
+ [B e
837
+ [Be +
838
+ [Be+ 2
839
+ [Be+2 ]
840
+ [ A
841
+ [A s
842
+ [As H
843
+ [AsH 2
844
+ [AsH2 ]
845
+ [ H
846
+ [H -
847
+ [H- ]
848
+ [ C
849
+ [C r
850
+ [Cr ]
851
+ [ V
852
+ [V +
853
+ [V+ 2
854
+ [V+2 ]
855
+ [ S
856
+ [S H
857
+ [SH 2
858
+ [SH2 ]
859
+ [ T
860
+ [T i
861
+ [Ti +
862
+ [Ti+ 4
863
+ [Ti+4 ]
864
+ [ C
865
+ [C H
866
+ [CH 3
867
+ [CH3 ]
868
+ [ P
869
+ [P H
870
+ [PH 2
871
+ [PH2 ]
872
+ [ T
873
+ [T b
874
+ [Tb ]
875
+ [ C
876
+ [C +
877
+ [C+ 2
878
+ [C+2 ]
879
+ [ P
880
+ [P b
881
+ [Pb ]
882
+ [ P
883
+ [P r
884
+ [Pr +
885
+ [Pr+ 3
886
+ [Pr+3 ]
887
+ [ O
888
+ [O s
889
+ [Os +
890
+ [Os+ 3
891
+ [Os+3 ]
892
+ [ P
893
+ [P r
894
+ [Pr ]
895
+ [ C
896
+ [C u
897
+ [Cu -
898
+ [Cu- 2
899
+ [Cu-2 ]
900
+ [ O
901
+ [O +
902
+ [O+ 2
903
+ [O+2 ]
904
+ [ I
905
+ [I H
906
+ [IH ]
907
+ [ P
908
+ [P t
909
+ [Pt -
910
+ [Pt- ]
911
+ [ Y
912
+ [Y b
913
+ [Yb ]
914
+ [ I
915
+ [I r
916
+ [Ir +
917
+ [Ir+ 4
918
+ [Ir+4 ]
919
+ [ P
920
+ [P -
921
+ [P- 2
922
+ [P-2 ]
923
+ [ T
924
+ [T i
925
+ [Ti ]
926
+ [ N
927
+ [N d
928
+ [Nd +
929
+ [Nd+ 3
930
+ [Nd+3 ]
931
+ [ A
932
+ [A l
933
+ [Al +
934
+ [Al+ 3
935
+ [Al+3 ]
936
+ [ H
937
+ [H g
938
+ [Hg -
939
+ [Hg- 2
940
+ [Hg-2 ]
941
+ [ I
942
+ [I n
943
+ [In ]
944
+ [ F
945
+ [F e
946
+ [Fe +
947
+ [Fe+ ]
948
+ [ R
949
+ [R u
950
+ [Ru -
951
+ [Ru- 2
952
+ [Ru-2 ]
953
+ [ I
954
+ [I r
955
+ [Ir +
956
+ [Ir+ ]
957
+ [ R
958
+ [R h
959
+ [Rh +
960
+ [Rh+ 2
961
+ [Rh+2 ]
962
+ [ N
963
+ [N a
964
+ [Na H
965
+ [NaH ]
966
+ [ M
967
+ [M g
968
+ [Mg H
969
+ [MgH 2
970
+ [MgH2 ]
971
+ [ A
972
+ [A g
973
+ [Ag -
974
+ [Ag- 4
975
+ [Ag-4 ]
976
+ [ C
977
+ [C l
978
+ [Cl +
979
+ [Cl+ ]
980
+ [ Z
981
+ [Z n
982
+ [Zn -
983
+ [Zn- 2
984
+ [Zn-2 ]
985
+ [ H
986
+ [H H
987
+ [HH ]
988
+ [ H
989
+ [H g
990
+ [Hg +
991
+ [Hg+ ]
992
+ [ Y
993
+ [Y +
994
+ [Y+ 3
995
+ [Y+3 ]
996
+ [ P
997
+ [P b
998
+ [Pb +
999
+ [Pb+ ]
1000
+ [ G
1001
+ [G d
1002
+ [Gd +
1003
+ [Gd+ 3
1004
+ [Gd+3 ]
1005
+ [ P
1006
+ [P t
1007
+ [Pt +
1008
+ [Pt+ 4
1009
+ [Pt+4 ]
1010
+ c c
1011
+ O )
1012
+ C C
1013
+ C (
1014
+ = O)
1015
+ c 1
1016
+ c (
1017
+ c 2
1018
+ ( =O)
1019
+ C )
1020
+ C( =O)
1021
+ c1 cc
1022
+ c 3
1023
+ ( O)
1024
+ c n
1025
+ C 1
1026
+ c2 cc
1027
+ ) cc
1028
+ C O
1029
+ CC CC
1030
+ C( O)
1031
+ P (=O)
1032
+ C 2
1033
+ P(=O) (O)
1034
+ ( C)
1035
+ c1 n
1036
+ F )
1037
+ c1cc c(
1038
+ C(=O) N
1039
+ C N
1040
+ O =
1041
+ O CC
1042
+ c3 cc
1043
+ 2 )
1044
+ N )
1045
+ O C(
1046
+ [ n
1047
+ H ]
1048
+ c1cc cc
1049
+ [n H]
1050
+ O P(=O)(O)
1051
+ C l
1052
+ N C(=O)
1053
+ c 4
1054
+ 3 )
1055
+ c2cc cc
1056
+ )cc 1
1057
+ n 1
1058
+ + ]
1059
+ 2 ]
1060
+ CC N
1061
+ + 2]
1062
+ CC 1
1063
+ C( C)
1064
+ CO P(=O)(O)
1065
+ O) C(O)
1066
+ - ]
1067
+ c( =O)
1068
+ c1n cn
1069
+ C 3
1070
+ C =
1071
+ c2cc c(
1072
+ [ O
1073
+ [O -]
1074
+ CC (=O)
1075
+ n c(
1076
+ c1cccc c1
1077
+ c2cccc c2
1078
+ [ N
1079
+ S (=O)
1080
+ c3cc cc
1081
+ ( F)
1082
+ S(=O) (=O)
1083
+ n 2
1084
+ OC( COP(=O)(O)
1085
+ n cn
1086
+ c2 c(
1087
+ CCN (
1088
+ CC (C)
1089
+ C1 O
1090
+ c 5
1091
+ CC (
1092
+ cc (
1093
+ 4 )
1094
+ Cl )
1095
+ [O-] )
1096
+ c3cccc c3
1097
+ O= C(
1098
+ c1cc (
1099
+ cc 1
1100
+ OCC OCC
1101
+ OC(COP(=O)(O) OP(=O)(O)
1102
+ [ M
1103
+ c4 cc
1104
+ O C)
1105
+ O= C(O)
1106
+ c1ncn c2
1107
+ c3cc c(
1108
+ 2 C1
1109
+ N c1ncnc2
1110
+ C c1ccc(
1111
+ C(=O) O)
1112
+ C( F)
1113
+ CCCC CCCC
1114
+ C2 O)
1115
+ c2 )
1116
+ CC (O)
1117
+ c(=O) [nH]
1118
+ n (
1119
+ CC O
1120
+ c1ncn 2C1
1121
+ c( N)
1122
+ O)C(O) C1O
1123
+ CC(=O) N
1124
+ F )cc
1125
+ c( -
1126
+ C(F) (F)
1127
+ Nc1ncnc2 c1ncn2C1
1128
+ ) N
1129
+ c2 n
1130
+ CC CN
1131
+ c( C)
1132
+ C( C
1133
+ [N +]
1134
+ C( N)
1135
+ ( [O-])
1136
+ [M g
1137
+ O)C(O) C2O)
1138
+ CC 2
1139
+ c1cccc (
1140
+ C(=O) O
1141
+ [Mg +2]
1142
+ C 4
1143
+ = O
1144
+ c( N
1145
+ CO c1ccc(
1146
+ n +2]
1147
+ c2ccccc2 )
1148
+ [ C
1149
+ Cl )cc
1150
+ [N a
1151
+ [Na +]
1152
+ C c1cc
1153
+ N c1n
1154
+ B r
1155
+ 2 )cc1
1156
+ c2cc (
1157
+ CC OCC
1158
+ CN (
1159
+ C( =
1160
+ n 3
1161
+ c(N) ncn
1162
+ C S
1163
+ C c1
1164
+ c s
1165
+ c1 )
1166
+ O C
1167
+ N 2
1168
+ CC C(
1169
+ C c1n
1170
+ cc cc
1171
+ C2 OC(COP(=O)(O)OP(=O)(O)
1172
+ c(=O)[nH] 1
1173
+ c1 2
1174
+ Nc1ncnc2c1ncn2C1 OC(COP(=O)(O)OP(=O)(O)
1175
+ c2 nc(
1176
+ c3 )
1177
+ a +2]
1178
+ [C a+2]
1179
+ c( O)
1180
+ C(O) C(O)
1181
+ N =
1182
+ C #
1183
+ n 4
1184
+ (C) C)
1185
+ 1 )
1186
+ [ Z
1187
+ [Z n+2]
1188
+ 2) c1
1189
+ O=C( N
1190
+ = N
1191
+ c2 c1
1192
+ CN 1
1193
+ c4cc cc
1194
+ OP(=O)(O) OCC
1195
+ ( C
1196
+ n cc
1197
+ OCCOCC OCCOCC
1198
+ C( CC
1199
+ N 1
1200
+ [N+] (=O)
1201
+ c o
1202
+ CC =
1203
+ c3ccccc3 )
1204
+ C n1
1205
+ CC C2
1206
+ C(F)(F) F)
1207
+ c1ccccc1 )
1208
+ [ F
1209
+ [F e
1210
+ c3 c(
1211
+ cn c5
1212
+ CC 2)
1213
+ CC 3
1214
+ C2 )
1215
+ OC( n4
1216
+ c5 4)
1217
+ 3 OC(n4
1218
+ 3OC(n4 cnc5
1219
+ n c2
1220
+ O)C(O)C2O) c(=O)[nH]1
1221
+ OP(=O)(O) O)
1222
+ c [nH]
1223
+ [n +]
1224
+ c4cccc c4
1225
+ CO C(=O)
1226
+ Nc1n c2c(
1227
+ S(=O)(=O) N
1228
+ ncn 2
1229
+ CC C1
1230
+ cc 2
1231
+ cn c3
1232
+ c(N)ncn c54)
1233
+ 3OC(n4cnc5 c(N)ncnc54)
1234
+ c2cccc (
1235
+ n c3
1236
+ C(O) C3
1237
+ n 2)
1238
+ [Fe ]
smiles/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "mask_token": {
17
+ "content": "<mask>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "pad_token": {
24
+ "content": "<pad>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
smiles/tokenizer.json ADDED
@@ -0,0 +1,2254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 185,
8
+ "content": "<s>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 0,
17
+ "content": "<pad>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 1,
26
+ "content": "</s>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 2,
35
+ "content": "<unk>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 189,
44
+ "content": "<mask>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": null,
54
+ "post_processor": {
55
+ "type": "TemplateProcessing",
56
+ "single": [
57
+ {
58
+ "SpecialToken": {
59
+ "id": "<s>",
60
+ "type_id": 0
61
+ }
62
+ },
63
+ {
64
+ "Sequence": {
65
+ "id": "A",
66
+ "type_id": 0
67
+ }
68
+ },
69
+ {
70
+ "SpecialToken": {
71
+ "id": "</s>",
72
+ "type_id": 0
73
+ }
74
+ }
75
+ ],
76
+ "pair": [
77
+ {
78
+ "Sequence": {
79
+ "id": "A",
80
+ "type_id": 0
81
+ }
82
+ },
83
+ {
84
+ "Sequence": {
85
+ "id": "B",
86
+ "type_id": 1
87
+ }
88
+ }
89
+ ],
90
+ "special_tokens": {
91
+ "</s>": {
92
+ "id": "</s>",
93
+ "ids": [
94
+ 1
95
+ ],
96
+ "tokens": [
97
+ "</s>"
98
+ ]
99
+ },
100
+ "<s>": {
101
+ "id": "<s>",
102
+ "ids": [
103
+ 185
104
+ ],
105
+ "tokens": [
106
+ "<s>"
107
+ ]
108
+ }
109
+ }
110
+ },
111
+ "decoder": {
112
+ "type": "Replace",
113
+ "pattern": {
114
+ "String": ""
115
+ },
116
+ "content": ""
117
+ },
118
+ "model": {
119
+ "type": "BPE",
120
+ "dropout": null,
121
+ "unk_token": "<unk>",
122
+ "continuing_subword_prefix": null,
123
+ "end_of_word_suffix": null,
124
+ "fuse_unk": false,
125
+ "byte_fallback": false,
126
+ "ignore_merges": false,
127
+ "vocab": {
128
+ "[Cu-": 622,
129
+ "[Pr]": 1041,
130
+ "V": 230,
131
+ "E": 215,
132
+ "[Be-2]": 988,
133
+ "[Br": 526,
134
+ "[Re": 565,
135
+ "[Rh+3]": 1013,
136
+ "[Se-]": 934,
137
+ "3)": 300,
138
+ "[Tl+]": 976,
139
+ "d": 240,
140
+ "%1": 545,
141
+ "e": 241,
142
+ "c(-": 371,
143
+ "c1cccc": 294,
144
+ "c(N": 390,
145
+ "%22": 919,
146
+ "%21": 918,
147
+ "[Li]": 865,
148
+ "[Rh": 744,
149
+ "[Ce+3]": 961,
150
+ "[Kr": 678,
151
+ "</s>": 1,
152
+ "O=C(N": 435,
153
+ "[N-": 494,
154
+ "[Au-": 611,
155
+ "F)cc": 370,
156
+ "-": 196,
157
+ "[Lu]": 902,
158
+ "[Pb+2]": 999,
159
+ "[Nb-": 613,
160
+ "[Re+]": 861,
161
+ "[Ru+3]": 1016,
162
+ "[Pd]": 886,
163
+ "[Rh+2]": 1057,
164
+ "[AsH": 771,
165
+ "Cc1ccc(": 356,
166
+ "C(CC": 444,
167
+ "[Ba": 683,
168
+ "[Gd-5": 550,
169
+ "CC2": 384,
170
+ "[Mg-2]": 1003,
171
+ "[P-2": 790,
172
+ "[SH+": 574,
173
+ "[W+4": 687,
174
+ "[AsH2": 772,
175
+ "[Eu": 731,
176
+ "[Mo+4]": 991,
177
+ "H": 218,
178
+ "C(O)C3": 482,
179
+ "[C-]": 826,
180
+ "[BH-": 606,
181
+ "M": 222,
182
+ "[Fe-3]": 926,
183
+ "[Li": 568,
184
+ "[MgH": 802,
185
+ "[NaH]": 1058,
186
+ "[Nb-2": 614,
187
+ "[P+": 520,
188
+ "[Pt+4]": 1068,
189
+ "[o": 509,
190
+ "[S": 496,
191
+ "C(": 259,
192
+ "1)": 431,
193
+ "[Zn+": 554,
194
+ "[Ag+]": 952,
195
+ "[Cr": 701,
196
+ "[o+]": 825,
197
+ "<": 208,
198
+ "Cc1n": 415,
199
+ "[se+]": 950,
200
+ "[n-]": 824,
201
+ "N2": 413,
202
+ "CCCC": 275,
203
+ "U": 229,
204
+ "[Mn+2": 657,
205
+ "[Au]": 872,
206
+ "[Ar": 592,
207
+ "[Si]": 816,
208
+ "[c-": 504,
209
+ "[c-]": 821,
210
+ "X": 232,
211
+ "0": 198,
212
+ "CCN": 306,
213
+ "[Cr+3": 703,
214
+ "[N": 324,
215
+ "[Rb+": 751,
216
+ "=N": 436,
217
+ "OC(": 291,
218
+ "[Al-3": 621,
219
+ "[SiH]": 881,
220
+ "[c+]": 846,
221
+ "a": 237,
222
+ "[Ni+2]": 938,
223
+ "[Dy+": 766,
224
+ "[As": 534,
225
+ "ncn": 331,
226
+ "[H]": 882,
227
+ "O=C(": 343,
228
+ "[Ca": 587,
229
+ "c2": 263,
230
+ "CCO": 365,
231
+ "[Ir+4": 789,
232
+ "[Sb+3": 689,
233
+ "o": 249,
234
+ "R": 226,
235
+ "CCN(": 333,
236
+ "[Yb": 696,
237
+ "[I+2]": 942,
238
+ "[Nb": 612,
239
+ "C(F)(F)": 372,
240
+ "[H+]": 1018,
241
+ "[Zn-2": 806,
242
+ "[MgH2": 803,
243
+ "[N+2]": 904,
244
+ "[Ti]": 1049,
245
+ "[te+": 651,
246
+ "[NH4": 631,
247
+ "[te]": 936,
248
+ "c3c(": 456,
249
+ "9": 207,
250
+ "[Cu": 578,
251
+ "[Au-]": 923,
252
+ "[B": 525,
253
+ "[Ga+": 699,
254
+ "[Rb+]": 1014,
255
+ "[Mg-2": 738,
256
+ "[CH3]": 1034,
257
+ "CN1": 438,
258
+ "[Fe-3": 618,
259
+ "[Pd-2]": 931,
260
+ "<pad>": 0,
261
+ "[BH": 594,
262
+ "[Si": 497,
263
+ "c5": 336,
264
+ "[IH": 788,
265
+ "[Pt+4": 812,
266
+ "[Re]": 863,
267
+ "[BH2-]": 905,
268
+ "c3cc": 288,
269
+ "[PH2": 780,
270
+ "[Al": 619,
271
+ "[O+": 507,
272
+ "[Pd-": 626,
273
+ "C#": 428,
274
+ "[Tb+3": 747,
275
+ "OCCOCC": 346,
276
+ "[O]": 858,
277
+ "[SH+]": 870,
278
+ "O=C(O)": 351,
279
+ "[Hg": 552,
280
+ "[Dy+2": 767,
281
+ ")cc": 273,
282
+ "COC(=O)": 471,
283
+ "[Be-]": 966,
284
+ "[Br-": 527,
285
+ "[CH3": 779,
286
+ "[Ga]": 901,
287
+ "[Mg+2]": 387,
288
+ "[Mo+6": 715,
289
+ "[Rh+]": 1012,
290
+ "[OH2": 570,
291
+ "[Mg+2": 659,
292
+ "[SiH": 557,
293
+ "[Yb+3]": 979,
294
+ "[Mg-": 719,
295
+ "[Mo+4": 716,
296
+ "[se+": 652,
297
+ "[Eu+": 732,
298
+ "a+2]": 423,
299
+ "c(N)ncn": 407,
300
+ "c1": 261,
301
+ "Nc1nc2c(": 472,
302
+ "c1cccc(": 385,
303
+ "c2c(": 332,
304
+ "cc1": 345,
305
+ "CN": 285,
306
+ "[La+3": 682,
307
+ "[NH3+]": 813,
308
+ "co": 447,
309
+ "[As+]": 982,
310
+ "[SeH2]": 995,
311
+ "[F": 454,
312
+ "[L": 567,
313
+ "[G": 546,
314
+ "(C)C)": 430,
315
+ "[N-]": 815,
316
+ "[MgH2]": 1059,
317
+ "[C": 394,
318
+ ")": 193,
319
+ "[Cl+]": 1061,
320
+ "[NH2": 498,
321
+ "[Hg-2]": 1052,
322
+ "[Ni+2": 637,
323
+ "[OH": 505,
324
+ "[Ru-2]": 1055,
325
+ "[Tb": 745,
326
+ "[Ba+": 684,
327
+ "[Zn": 553,
328
+ "[Lu+": 721,
329
+ "[Ca+": 607,
330
+ "2C1": 354,
331
+ "c54)": 462,
332
+ "t": 253,
333
+ "[Cl+": 583,
334
+ "[Ga": 597,
335
+ "[Ru+2": 707,
336
+ "[Ru-2": 799,
337
+ "[Sb+": 688,
338
+ "CCOCC": 403,
339
+ "[Gd+": 810,
340
+ "c1ccccc1": 322,
341
+ "C2": 278,
342
+ "[Fe]": 484,
343
+ "[Bi": 559,
344
+ "[Ru-]": 877,
345
+ "[Cu+2": 663,
346
+ "[K+": 604,
347
+ "c2ccc(": 317,
348
+ "[Ru]": 843,
349
+ "OP(=O)(O)O)": 467,
350
+ "2)": 289,
351
+ "[Sr+2]": 939,
352
+ "OC(n4": 461,
353
+ "c(": 262,
354
+ "[nH]": 295,
355
+ "[Al-]": 960,
356
+ "[PH]": 827,
357
+ "[Ag-": 624,
358
+ "[Fe+": 590,
359
+ "[BiH": 560,
360
+ "cc": 256,
361
+ "[Kr]": 968,
362
+ "i": 245,
363
+ "CCCCCCCC": 359,
364
+ "b": 238,
365
+ "[Ti": 776,
366
+ "([O-])": 381,
367
+ "[As+": 704,
368
+ "%15": 912,
369
+ "Nc1n": 399,
370
+ "Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)(O)": 420,
371
+ "[CH2+": 589,
372
+ "Cc1cc": 398,
373
+ "[H-": 773,
374
+ "[Lu+3": 722,
375
+ "[Hg+": 665,
376
+ "[Mo+6]": 990,
377
+ "7": 205,
378
+ "O)C(O)C1O": 368,
379
+ "[As-]": 930,
380
+ "[OH2+": 571,
381
+ "[BH3-": 596,
382
+ "[Pd+2": 712,
383
+ "[Re+": 566,
384
+ "[S-]": 852,
385
+ "[SiH3]": 853,
386
+ "[W+4]": 972,
387
+ "c3cccc": 326,
388
+ "%16": 913,
389
+ "[Ru-": 582,
390
+ "4": 202,
391
+ "[I]": 895,
392
+ "CC2)": 458,
393
+ "[Ce": 667,
394
+ "[cH-": 543,
395
+ "[K+]": 906,
396
+ "[Rh+": 748,
397
+ "c1ccc(": 283,
398
+ "[C+2": 781,
399
+ "[Pb]": 1038,
400
+ "cccc": 416,
401
+ "C4": 388,
402
+ "[BH2-": 603,
403
+ "I": 219,
404
+ "[La+": 681,
405
+ "[P+]": 832,
406
+ "[SH": 530,
407
+ "nc3": 481,
408
+ "[Gd+3]": 1067,
409
+ "[Nd+3": 793,
410
+ "F": 216,
411
+ "[C-": 511,
412
+ "[D": 764,
413
+ "[Mn+2]": 953,
414
+ "O)C(O)C2O)": 383,
415
+ "A": 211,
416
+ "[HH]": 1063,
417
+ "[Ir+4]": 1047,
418
+ "[Pb": 727,
419
+ "]": 236,
420
+ "CCC2": 451,
421
+ "[Mo+": 714,
422
+ "<s>": 185,
423
+ "[As-": 625,
424
+ "[Fe+3]": 958,
425
+ "[Pr+": 783,
426
+ "C2OC(COP(=O)(O)OP(=O)(O)": 417,
427
+ "OC(COP(=O)(O)": 330,
428
+ "[NH-": 515,
429
+ "Cn1": 450,
430
+ "[AsH2]": 1028,
431
+ "CC(=O)": 320,
432
+ "2]": 305,
433
+ "[Mo": 649,
434
+ "[Sr+2": 640,
435
+ "K": 220,
436
+ "[Fe+2]": 894,
437
+ "[Ti+": 777,
438
+ "[Cr+3]": 981,
439
+ "[K]": 871,
440
+ "CC(=O)N": 369,
441
+ "[Bi+3]": 903,
442
+ "C(=O)O)": 357,
443
+ "[Ag]": 1023,
444
+ "[BH3": 595,
445
+ "[CH2-": 541,
446
+ "[CaH2]": 951,
447
+ "[Cd+": 671,
448
+ "[SH-]": 838,
449
+ "C(=": 405,
450
+ "[Pt-2": 616,
451
+ "[I+3": 642,
452
+ "[Na+]": 397,
453
+ "%2": 610,
454
+ "[Cu+": 662,
455
+ "[Gd+3": 811,
456
+ "S(=O)": 325,
457
+ "%10": 847,
458
+ "[V+3": 763,
459
+ "[s": 516,
460
+ "c2cccc(": 480,
461
+ "[Ag-4": 804,
462
+ "[Co]": 873,
463
+ "[Ho+3": 737,
464
+ "g": 243,
465
+ "[Cl-": 524,
466
+ "[BH2": 602,
467
+ "[Gd-4]": 849,
468
+ "[Li+]": 933,
469
+ "[NH3+": 492,
470
+ "[I+2": 645,
471
+ "[NH4+": 632,
472
+ "[Rh+2": 800,
473
+ "[SH2": 775,
474
+ "[Sr+": 639,
475
+ "[Br+": 643,
476
+ "[W+": 686,
477
+ "[Ca+2]": 424,
478
+ "[Zn+2": 555,
479
+ "[n-": 508,
480
+ "cnc3": 477,
481
+ "u": 254,
482
+ "[PH+]": 909,
483
+ "COP(=O)(O)": 310,
484
+ "[B-]": 839,
485
+ "[Ta": 679,
486
+ "[Al]": 957,
487
+ "[Rh]": 1009,
488
+ "O": 224,
489
+ "[Ca+2": 608,
490
+ "[Dy+2]": 1024,
491
+ "6": 204,
492
+ "[Pt": 564,
493
+ "[Re-": 585,
494
+ "[Nd": 791,
495
+ "C(N)": 380,
496
+ "[PH+": 609,
497
+ "[Pb+2": 729,
498
+ "[Yb+": 697,
499
+ "[Hf]": 992,
500
+ "[Yb+3": 698,
501
+ "c(N)": 367,
502
+ "c2cc(": 402,
503
+ "OCCOCCOCCOCC": 443,
504
+ "[C+]": 820,
505
+ "O)C(O)C2O)c(=O)[nH]1": 466,
506
+ "[Ta]": 969,
507
+ "C(C)": 309,
508
+ "[Se+": 648,
509
+ "c4cc": 349,
510
+ "%23": 920,
511
+ "<unk>": 2,
512
+ "[O+2]": 1043,
513
+ "Cl": 297,
514
+ "3OC(n4cnc5": 464,
515
+ "[Zn]": 893,
516
+ "Nc1ncnc2c1ncn2C1": 373,
517
+ "[I+3]": 940,
518
+ "[Bi+3": 600,
519
+ "[Co+2": 661,
520
+ "[SH-": 531,
521
+ "[Pd+": 711,
522
+ "[Gd]": 900,
523
+ "W": 231,
524
+ "[PH2]": 1035,
525
+ "[T": 579,
526
+ ")cc1": 302,
527
+ "[Ni+": 636,
528
+ "c(=O)[nH]1": 418,
529
+ "[Bi+": 599,
530
+ "[CH2-]": 844,
531
+ "c(=O)[nH]": 363,
532
+ "[Co+2]": 954,
533
+ "[BiH3]": 854,
534
+ "[CH]": 889,
535
+ "[Gd": 547,
536
+ "[Hg-": 796,
537
+ "[Xe": 677,
538
+ "OC(COP(=O)(O)OP(=O)(O)": 347,
539
+ "c2nc(": 421,
540
+ "[Ni]": 869,
541
+ "C(F)(F)F)": 452,
542
+ "[I+": 641,
543
+ "[O+]": 823,
544
+ "[OH+2]": 1021,
545
+ "3": 201,
546
+ "c1ncn2C1": 366,
547
+ "[Pr": 782,
548
+ "[O+2": 787,
549
+ "c[nH]": 468,
550
+ "[c]": 892,
551
+ "c4": 299,
552
+ "[S+]": 828,
553
+ "[s+]": 842,
554
+ "[Ir]": 1026,
555
+ "%18": 915,
556
+ "CN(": 404,
557
+ "[H-]": 1029,
558
+ "<mask>": 189,
559
+ "[Ga+3": 700,
560
+ "[Pd-2": 627,
561
+ "[Rb": 750,
562
+ "C(C": 378,
563
+ "[Er+3": 754,
564
+ "[OH-]": 883,
565
+ "(C)": 280,
566
+ "(O)": 269,
567
+ "[Hg+2": 666,
568
+ "[Re-]": 879,
569
+ "[S+": 514,
570
+ "c1cc(": 344,
571
+ "#version:": 485,
572
+ "[cH+]": 948,
573
+ "S(=O)(=O)": 328,
574
+ "[Pd+2]": 986,
575
+ "5": 203,
576
+ "Nc1ncnc2": 355,
577
+ "[Au+": 694,
578
+ "#": 190,
579
+ "[Tb+3]": 1010,
580
+ "[Pt]": 860,
581
+ "[Al+": 794,
582
+ "CC3": 459,
583
+ "CC=": 448,
584
+ "[Tc]": 937,
585
+ "C(=O)N": 284,
586
+ "-]": 312,
587
+ "[Co+": 660,
588
+ "[Cu-2": 786,
589
+ "[Mg-]": 994,
590
+ "[Sb": 521,
591
+ "[Ca]": 884,
592
+ "+2]": 307,
593
+ "[CH-": 519,
594
+ "[Cu+2]": 955,
595
+ "[Ag-]": 929,
596
+ "[Cl+3": 584,
597
+ "CCC(": 414,
598
+ "N=": 427,
599
+ "[Cd+2]": 964,
600
+ "[Co+3]": 1008,
601
+ "[Cu-]": 928,
602
+ "P(=O)": 277,
603
+ "COc1ccc(": 391,
604
+ "[F-": 522,
605
+ "[Ho": 735,
606
+ "[Ho+3]": 1002,
607
+ "%": 191,
608
+ "[OH-": 586,
609
+ "[Al+3": 795,
610
+ "[Os+": 740,
611
+ "[Br+2": 644,
612
+ "[Fe": 455,
613
+ "[Sn]": 868,
614
+ "S(=O)(=O)N": 473,
615
+ "[Ir+": 709,
616
+ "[V-": 760,
617
+ "Br": 400,
618
+ "+]": 304,
619
+ "[CH": 518,
620
+ "[Fe-2]": 944,
621
+ "[Cs+": 693,
622
+ "2)cc1": 401,
623
+ "[Pb+": 728,
624
+ "[Cl+2": 646,
625
+ "[Pb+]": 1066,
626
+ "[NH2]": 1005,
627
+ "=O": 389,
628
+ "O)C(O)": 311,
629
+ "Y": 233,
630
+ "[Co+3": 743,
631
+ "[Mg]": 963,
632
+ "[Na+": 539,
633
+ "[W]": 897,
634
+ "%24": 921,
635
+ "[Na": 396,
636
+ "[X": 676,
637
+ "[As]": 840,
638
+ "[Br-]": 836,
639
+ "[F-]": 834,
640
+ "[CH+": 535,
641
+ "OP(=O)(O)": 296,
642
+ "[Y+": 808,
643
+ "C1": 271,
644
+ "[NH": 490,
645
+ "[Yb]": 1046,
646
+ "[NH2+": 499,
647
+ "[n+": 495,
648
+ "[nH+]": 819,
649
+ "[s+": 536,
650
+ "2": 200,
651
+ "[U": 718,
652
+ "c(N)ncnc54)": 478,
653
+ "c3": 268,
654
+ "f": 242,
655
+ "[V]": 866,
656
+ "cc2": 476,
657
+ "OCC": 287,
658
+ "[Cd+2": 672,
659
+ "[I-": 529,
660
+ "[In]": 1053,
661
+ "[Al+3]": 1051,
662
+ "c1n": 281,
663
+ "CC1": 308,
664
+ "+": 195,
665
+ "C(O)": 276,
666
+ "c(O)": 425,
667
+ "Cl)": 340,
668
+ "[Co": 577,
669
+ "[C+2]": 1037,
670
+ "c12": 419,
671
+ "C(F)": 358,
672
+ "[Hg]": 851,
673
+ "4)": 339,
674
+ "[Sm+": 725,
675
+ "[Zn+]": 880,
676
+ "3OC(n4cnc5c(N)ncnc54)": 479,
677
+ "[Mn]": 907,
678
+ "[I-]": 837,
679
+ "[N+": 488,
680
+ "[Ni+3": 723,
681
+ "[E": 730,
682
+ "[I+]": 945,
683
+ "[O-": 489,
684
+ "[Ce+": 668,
685
+ "[Sn": 572,
686
+ "O)": 257,
687
+ "[Eu+2]": 1000,
688
+ "8": 206,
689
+ "[Tb+": 746,
690
+ "[Zr]": 1004,
691
+ "c2c1": 437,
692
+ "cs": 410,
693
+ "[OH+2": 761,
694
+ "C3": 315,
695
+ "c1cc": 267,
696
+ "h": 244,
697
+ "nc2": 465,
698
+ "ncc": 442,
699
+ "[Ir+]": 1056,
700
+ "c4cccc": 439,
701
+ "[Sb]": 833,
702
+ "[NH-]": 829,
703
+ "n(": 364,
704
+ "CCC1": 475,
705
+ "[Pt+": 673,
706
+ "[Be]": 987,
707
+ "[Eu+3": 768,
708
+ "[Mo-": 742,
709
+ "L": 221,
710
+ "[P]": 898,
711
+ "[PH": 513,
712
+ "n1": 303,
713
+ "[Mg+": 658,
714
+ "[Nb-2]": 924,
715
+ "[Au+]": 978,
716
+ "[Mn+": 656,
717
+ "c2ccccc2": 323,
718
+ "%13": 910,
719
+ "ncn2": 474,
720
+ "[O": 318,
721
+ "[Pr+3": 784,
722
+ "1": 199,
723
+ "CS": 408,
724
+ "[Ba+2": 685,
725
+ "[Be": 674,
726
+ "[Cl+3]": 878,
727
+ ".": 197,
728
+ "[Ir": 708,
729
+ "[NH2+]": 817,
730
+ "[Pr+3]": 1039,
731
+ "[C]": 888,
732
+ "[Pt-": 615,
733
+ "[NaH": 801,
734
+ "[Tc": 635,
735
+ "[Te]": 875,
736
+ "[V+3]": 1022,
737
+ "[Zr": 739,
738
+ "[se": 517,
739
+ "[Sc": 758,
740
+ "N)": 290,
741
+ "[Mg": 382,
742
+ "[Ag": 623,
743
+ "[Tb]": 1036,
744
+ "[nH+": 501,
745
+ "[o+": 510,
746
+ "c": 239,
747
+ "[Ni+]": 973,
748
+ "[V-]": 1020,
749
+ "[Lu+3]": 996,
750
+ "[Ag+": 655,
751
+ "(=O)": 264,
752
+ "[SH2]": 1032,
753
+ "c3)": 422,
754
+ "[Au": 576,
755
+ ">": 210,
756
+ "[Ru+": 706,
757
+ "[Se": 500,
758
+ "[te+]": 949,
759
+ "cc(": 338,
760
+ "[Os]": 876,
761
+ "[Ag-4]": 1060,
762
+ "[Sm": 724,
763
+ "[cH+": 650,
764
+ "[Er": 752,
765
+ "[t": 633,
766
+ "[Fe+2": 591,
767
+ "[Fe+3": 664,
768
+ "CC(O)": 362,
769
+ "[Dy": 765,
770
+ "[M": 348,
771
+ "[Ir+3": 710,
772
+ "[BH3-]": 899,
773
+ "[Mo]": 947,
774
+ "[Ni": 573,
775
+ "[N]": 962,
776
+ "[Se+]": 946,
777
+ "C(=O)O": 386,
778
+ "[In": 798,
779
+ "C2)": 460,
780
+ "c(C)": 377,
781
+ "[Be-2": 713,
782
+ "[Gd-4": 549,
783
+ "[Nd+": 792,
784
+ "(F)": 327,
785
+ "c3ccccc3": 342,
786
+ "[N+]": 379,
787
+ "[OH+]": 822,
788
+ "[cH": 542,
789
+ "[Ni+3]": 997,
790
+ "[Lu": 598,
791
+ "=O)": 260,
792
+ "[NH4+]": 935,
793
+ "nc(": 321,
794
+ "c1ncn": 314,
795
+ "[NH]": 891,
796
+ "H]": 293,
797
+ "C(O)C(O)": 426,
798
+ "[Pd": 588,
799
+ "[SiH3": 558,
800
+ "[N+](=O)": 446,
801
+ "%17": 914,
802
+ "CO": 274,
803
+ "[Ar]": 896,
804
+ "[Cd": 670,
805
+ "[Ga+3]": 980,
806
+ "C)": 265,
807
+ "[Pt-2]": 925,
808
+ "[Se]": 818,
809
+ "[Rh+3": 749,
810
+ "[Y": 695,
811
+ "%25": 922,
812
+ "[Mo-]": 1007,
813
+ "c1)": 411,
814
+ "c1ccccc1)": 453,
815
+ "Cl)cc": 395,
816
+ "[Al-3]": 927,
817
+ "[Sm+3]": 998,
818
+ "[Be-": 675,
819
+ "[Sb+]": 974,
820
+ "[B-": 532,
821
+ "[Tl": 690,
822
+ "c1ncnc2": 352,
823
+ "*": 194,
824
+ "[Os+4": 741,
825
+ "[N+2": 601,
826
+ "[SiH2]": 859,
827
+ "[Y+3": 809,
828
+ "y": 255,
829
+ "OP(=O)(O)OCC": 440,
830
+ "3OC(n4": 463,
831
+ "%12": 857,
832
+ "%20": 917,
833
+ "P": 225,
834
+ "[Au+3]": 1017,
835
+ "[Ho+": 736,
836
+ "[OH2+]": 867,
837
+ "[Y+3]": 1065,
838
+ "0.2": 486,
839
+ "[CH-]": 831,
840
+ "[Br+2]": 941,
841
+ "[CH2]": 885,
842
+ "[SeH": 562,
843
+ "D": 214,
844
+ "[A": 533,
845
+ "[Be+2]": 1027,
846
+ "[CH2": 540,
847
+ "(": 192,
848
+ "[Hg+]": 1064,
849
+ "<-": 862,
850
+ "[Li+": 629,
851
+ "[NH3": 491,
852
+ "[Nd+3]": 1050,
853
+ "B": 212,
854
+ "[Cr]": 1030,
855
+ "[Cs": 692,
856
+ "[Er+3]": 1015,
857
+ "C=": 316,
858
+ "[Eu+3]": 1025,
859
+ "[Fe-2": 647,
860
+ "C(=O)": 266,
861
+ "[NH+": 493,
862
+ "[O-])": 341,
863
+ "[Ir+3]": 985,
864
+ "[Te": 580,
865
+ "[BH-]": 908,
866
+ "[Cl+2]": 943,
867
+ "[Ti+4": 778,
868
+ "Z": 234,
869
+ "2)c1": 434,
870
+ "S": 227,
871
+ "[Ti+4]": 1033,
872
+ "[V+": 762,
873
+ "[P-": 628,
874
+ "[Zn-2]": 1062,
875
+ "c2n": 375,
876
+ "[Gd-5]": 850,
877
+ "[n+]": 469,
878
+ "n2": 329,
879
+ "(C": 441,
880
+ "[HH": 807,
881
+ "[Zn-": 805,
882
+ "[P": 512,
883
+ "[S-": 556,
884
+ "[Tl+": 691,
885
+ "Cc1": 409,
886
+ "[P-]": 932,
887
+ "[Cr+": 702,
888
+ "[I": 528,
889
+ "[U]": 993,
890
+ "[Al-": 620,
891
+ "[W": 593,
892
+ "[SeH]": 855,
893
+ ")N": 374,
894
+ "[R": 537,
895
+ "[Sc-]": 1019,
896
+ "[SiH2": 563,
897
+ "[cH-]": 845,
898
+ "[te": 634,
899
+ "[Na]": 856,
900
+ "[V+2]": 1031,
901
+ "c3ccc(": 353,
902
+ "c2)": 361,
903
+ "m": 247,
904
+ "n+2]": 392,
905
+ "p": 250,
906
+ "F)": 282,
907
+ "N": 223,
908
+ "[Ba+2]": 971,
909
+ "[Cs+]": 977,
910
+ "[Er+": 753,
911
+ "[H+": 757,
912
+ "[Cu+]": 956,
913
+ "[Cu-2]": 1042,
914
+ "[Hg-2": 797,
915
+ "[OH+": 506,
916
+ "[Os": 581,
917
+ "[Os+4]": 1006,
918
+ "[Hf": 717,
919
+ "[Pt+2": 734,
920
+ "[Ru": 538,
921
+ "[S]": 890,
922
+ "[Mn": 605,
923
+ "[Sr": 638,
924
+ "c3ccccc3)": 449,
925
+ "C1O": 335,
926
+ "n4": 429,
927
+ "[Au+3": 756,
928
+ "cn": 270,
929
+ "[Sb+3]": 975,
930
+ "CC(": 337,
931
+ "CC(C)": 334,
932
+ "[Pt+]": 965,
933
+ "[V+2": 774,
934
+ "[Z": 432,
935
+ "[nH": 487,
936
+ "[Be+2": 770,
937
+ "[Se-": 630,
938
+ "[Mn+3": 705,
939
+ "[": 235,
940
+ "[C+": 502,
941
+ "CCCN": 376,
942
+ "T": 228,
943
+ "[Eu+2": 733,
944
+ "[Fe+]": 1054,
945
+ "[NH+]": 814,
946
+ "%14": 911,
947
+ "[Os+3]": 1040,
948
+ "[Be+": 769,
949
+ "N1": 445,
950
+ "[Cr+]": 1011,
951
+ "[Ru+2]": 984,
952
+ "[Sm+3": 726,
953
+ "[Y]": 989,
954
+ "[c": 503,
955
+ "P(=O)(O)": 279,
956
+ "[CaH": 653,
957
+ "[Sc-": 759,
958
+ "[n": 292,
959
+ "c2cccc": 301,
960
+ "s": 252,
961
+ "[BiH3": 561,
962
+ "[IH]": 1044,
963
+ "[CaH2": 654,
964
+ "[se]": 830,
965
+ "[SeH2": 720,
966
+ "[Cl": 523,
967
+ "[Mn+3]": 983,
968
+ "CC": 258,
969
+ "O=": 286,
970
+ "[Cl-]": 835,
971
+ "C2O)": 360,
972
+ "G": 217,
973
+ "[K": 575,
974
+ "[Ce+3": 669,
975
+ "[Xe]": 967,
976
+ "[P-2]": 1048,
977
+ "[V": 569,
978
+ "[Os+3": 785,
979
+ "[Zn+2]": 433,
980
+ "[Fe-": 617,
981
+ "[Pt-]": 1045,
982
+ "[Hg+2]": 959,
983
+ "c(=O)": 313,
984
+ "[SH]": 864,
985
+ "[CH2+]": 887,
986
+ "c2cc": 272,
987
+ "[Gd-": 548,
988
+ "c2ccccc2)": 393,
989
+ "c4ccccc4": 470,
990
+ "[O-]": 319,
991
+ "OC)": 350,
992
+ "l": 246,
993
+ "%19": 916,
994
+ "[Ru+3": 755,
995
+ "[c+": 544,
996
+ "[Cu]": 874,
997
+ "[Pt+2]": 1001,
998
+ "[La+3]": 970,
999
+ "n": 248,
1000
+ "n2)": 483,
1001
+ "n3": 406,
1002
+ "%11": 848,
1003
+ "[H": 551,
1004
+ "[CH+]": 841,
1005
+ "cnc5": 457,
1006
+ "[La": 680,
1007
+ "r": 251,
1008
+ "NC(=O)": 298,
1009
+ "C": 213,
1010
+ "=": 209,
1011
+ "OC": 412
1012
+ },
1013
+ "merges": [
1014
+ "#version: 0.2",
1015
+ "[ n",
1016
+ "[n H",
1017
+ "[nH ]",
1018
+ "B r",
1019
+ "[ N",
1020
+ "[N +",
1021
+ "[N+ ]",
1022
+ "[ O",
1023
+ "[O -",
1024
+ "[O- ]",
1025
+ "C l",
1026
+ "[ N",
1027
+ "[N H",
1028
+ "[NH 3",
1029
+ "[NH3 +",
1030
+ "[NH3+ ]",
1031
+ "[ N",
1032
+ "[N H",
1033
+ "[NH +",
1034
+ "[NH+ ]",
1035
+ "[ N",
1036
+ "[N -",
1037
+ "[N- ]",
1038
+ "[ n",
1039
+ "[n +",
1040
+ "[n+ ]",
1041
+ "[ S",
1042
+ "[S i",
1043
+ "[Si ]",
1044
+ "[ N",
1045
+ "[N H",
1046
+ "[NH 2",
1047
+ "[NH2 +",
1048
+ "[NH2+ ]",
1049
+ "[ S",
1050
+ "[S e",
1051
+ "[Se ]",
1052
+ "[ n",
1053
+ "[n H",
1054
+ "[nH +",
1055
+ "[nH+ ]",
1056
+ "[ C",
1057
+ "[C +",
1058
+ "[C+ ]",
1059
+ "[ c",
1060
+ "[c -",
1061
+ "[c- ]",
1062
+ "[ O",
1063
+ "[O H",
1064
+ "[OH +",
1065
+ "[OH+ ]",
1066
+ "[ O",
1067
+ "[O +",
1068
+ "[O+ ]",
1069
+ "[ n",
1070
+ "[n -",
1071
+ "[n- ]",
1072
+ "[ o",
1073
+ "[o +",
1074
+ "[o+ ]",
1075
+ "[ C",
1076
+ "[C -",
1077
+ "[C- ]",
1078
+ "[ P",
1079
+ "[P H",
1080
+ "[PH ]",
1081
+ "[ S",
1082
+ "[S +",
1083
+ "[S+ ]",
1084
+ "[ N",
1085
+ "[N H",
1086
+ "[NH -",
1087
+ "[NH- ]",
1088
+ "[ s",
1089
+ "[s e",
1090
+ "[se ]",
1091
+ "[ C",
1092
+ "[C H",
1093
+ "[CH -",
1094
+ "[CH- ]",
1095
+ "[ P",
1096
+ "[P +",
1097
+ "[P+ ]",
1098
+ "[ S",
1099
+ "[S b",
1100
+ "[Sb ]",
1101
+ "[ F",
1102
+ "[F -",
1103
+ "[F- ]",
1104
+ "[ C",
1105
+ "[C l",
1106
+ "[Cl -",
1107
+ "[Cl- ]",
1108
+ "[ B",
1109
+ "[B r",
1110
+ "[Br -",
1111
+ "[Br- ]",
1112
+ "[ I",
1113
+ "[I -",
1114
+ "[I- ]",
1115
+ "[ S",
1116
+ "[S H",
1117
+ "[SH -",
1118
+ "[SH- ]",
1119
+ "[ B",
1120
+ "[B -",
1121
+ "[B- ]",
1122
+ "[ A",
1123
+ "[A s",
1124
+ "[As ]",
1125
+ "[ C",
1126
+ "[C H",
1127
+ "[CH +",
1128
+ "[CH+ ]",
1129
+ "[ s",
1130
+ "[s +",
1131
+ "[s+ ]",
1132
+ "[ R",
1133
+ "[R u",
1134
+ "[Ru ]",
1135
+ "[ N",
1136
+ "[N a",
1137
+ "[Na +",
1138
+ "[Na+ ]",
1139
+ "[ C",
1140
+ "[C H",
1141
+ "[CH 2",
1142
+ "[CH2 -",
1143
+ "[CH2- ]",
1144
+ "[ c",
1145
+ "[c H",
1146
+ "[cH -",
1147
+ "[cH- ]",
1148
+ "[ c",
1149
+ "[c +",
1150
+ "[c+ ]",
1151
+ "% 1",
1152
+ "%1 0",
1153
+ "% 1",
1154
+ "%1 1",
1155
+ "[ G",
1156
+ "[G d",
1157
+ "[Gd -",
1158
+ "[Gd- 4",
1159
+ "[Gd-4 ]",
1160
+ "[ G",
1161
+ "[G d",
1162
+ "[Gd -",
1163
+ "[Gd- 5",
1164
+ "[Gd-5 ]",
1165
+ "[ H",
1166
+ "[H g",
1167
+ "[Hg ]",
1168
+ "[ Z",
1169
+ "[Z n",
1170
+ "[Zn +",
1171
+ "[Zn+ 2",
1172
+ "[Zn+2 ]",
1173
+ "[ S",
1174
+ "[S -",
1175
+ "[S- ]",
1176
+ "[ S",
1177
+ "[S i",
1178
+ "[Si H",
1179
+ "[SiH 3",
1180
+ "[SiH3 ]",
1181
+ "[ B",
1182
+ "[B i",
1183
+ "[Bi H",
1184
+ "[BiH 3",
1185
+ "[BiH3 ]",
1186
+ "[ S",
1187
+ "[S e",
1188
+ "[Se H",
1189
+ "[SeH ]",
1190
+ "[ N",
1191
+ "[N a",
1192
+ "[Na ]",
1193
+ "% 1",
1194
+ "%1 2",
1195
+ "[ O",
1196
+ "[O ]",
1197
+ "[ S",
1198
+ "[S i",
1199
+ "[Si H",
1200
+ "[SiH 2",
1201
+ "[SiH2 ]",
1202
+ "[ P",
1203
+ "[P t",
1204
+ "[Pt ]",
1205
+ "[ F",
1206
+ "[F e",
1207
+ "[Fe ]",
1208
+ "[ R",
1209
+ "[R e",
1210
+ "[Re +",
1211
+ "[Re+ ]",
1212
+ "< -",
1213
+ "[ R",
1214
+ "[R e",
1215
+ "[Re ]",
1216
+ "[ S",
1217
+ "[S H",
1218
+ "[SH ]",
1219
+ "[ L",
1220
+ "[L i",
1221
+ "[Li ]",
1222
+ "[ V",
1223
+ "[V ]",
1224
+ "[ O",
1225
+ "[O H",
1226
+ "[OH 2",
1227
+ "[OH2 +",
1228
+ "[OH2+ ]",
1229
+ "[ S",
1230
+ "[S n",
1231
+ "[Sn ]",
1232
+ "[ N",
1233
+ "[N i",
1234
+ "[Ni ]",
1235
+ "[ S",
1236
+ "[S H",
1237
+ "[SH +",
1238
+ "[SH+ ]",
1239
+ "[ K",
1240
+ "[K ]",
1241
+ "[ A",
1242
+ "[A u",
1243
+ "[Au ]",
1244
+ "[ C",
1245
+ "[C o",
1246
+ "[Co ]",
1247
+ "[ C",
1248
+ "[C u",
1249
+ "[Cu ]",
1250
+ "[ T",
1251
+ "[T e",
1252
+ "[Te ]",
1253
+ "[ O",
1254
+ "[O s",
1255
+ "[Os ]",
1256
+ "[ R",
1257
+ "[R u",
1258
+ "[Ru -",
1259
+ "[Ru- ]",
1260
+ "[ C",
1261
+ "[C l",
1262
+ "[Cl +",
1263
+ "[Cl+ 3",
1264
+ "[Cl+3 ]",
1265
+ "[ R",
1266
+ "[R e",
1267
+ "[Re -",
1268
+ "[Re- ]",
1269
+ "[ Z",
1270
+ "[Z n",
1271
+ "[Zn +",
1272
+ "[Zn+ ]",
1273
+ "[ S",
1274
+ "[S i",
1275
+ "[Si H",
1276
+ "[SiH ]",
1277
+ "[ H",
1278
+ "[H ]",
1279
+ "[ O",
1280
+ "[O H",
1281
+ "[OH -",
1282
+ "[OH- ]",
1283
+ "[ C",
1284
+ "[C a",
1285
+ "[Ca ]",
1286
+ "[ C",
1287
+ "[C H",
1288
+ "[CH 2",
1289
+ "[CH2 ]",
1290
+ "[ P",
1291
+ "[P d",
1292
+ "[Pd ]",
1293
+ "[ C",
1294
+ "[C H",
1295
+ "[CH 2",
1296
+ "[CH2 +",
1297
+ "[CH2+ ]",
1298
+ "[ C",
1299
+ "[C ]",
1300
+ "[ C",
1301
+ "[C H",
1302
+ "[CH ]",
1303
+ "[ S",
1304
+ "[S ]",
1305
+ "[ N",
1306
+ "[N H",
1307
+ "[NH ]",
1308
+ "[ c",
1309
+ "[c ]",
1310
+ "[ Z",
1311
+ "[Z n",
1312
+ "[Zn ]",
1313
+ "[ F",
1314
+ "[F e",
1315
+ "[Fe +",
1316
+ "[Fe+ 2",
1317
+ "[Fe+2 ]",
1318
+ "[ I",
1319
+ "[I ]",
1320
+ "[ A",
1321
+ "[A r",
1322
+ "[Ar ]",
1323
+ "[ W",
1324
+ "[W ]",
1325
+ "[ P",
1326
+ "[P ]",
1327
+ "[ B",
1328
+ "[B H",
1329
+ "[BH 3",
1330
+ "[BH3 -",
1331
+ "[BH3- ]",
1332
+ "[ G",
1333
+ "[G d",
1334
+ "[Gd ]",
1335
+ "[ G",
1336
+ "[G a",
1337
+ "[Ga ]",
1338
+ "[ L",
1339
+ "[L u",
1340
+ "[Lu ]",
1341
+ "[ B",
1342
+ "[B i",
1343
+ "[Bi +",
1344
+ "[Bi+ 3",
1345
+ "[Bi+3 ]",
1346
+ "[ N",
1347
+ "[N +",
1348
+ "[N+ 2",
1349
+ "[N+2 ]",
1350
+ "[ B",
1351
+ "[B H",
1352
+ "[BH 2",
1353
+ "[BH2 -",
1354
+ "[BH2- ]",
1355
+ "[ K",
1356
+ "[K +",
1357
+ "[K+ ]",
1358
+ "[ M",
1359
+ "[M n",
1360
+ "[Mn ]",
1361
+ "[ B",
1362
+ "[B H",
1363
+ "[BH -",
1364
+ "[BH- ]",
1365
+ "[ C",
1366
+ "[C a",
1367
+ "[Ca +",
1368
+ "[Ca+ 2",
1369
+ "[Ca+2 ]",
1370
+ "[ P",
1371
+ "[P H",
1372
+ "[PH +",
1373
+ "[PH+ ]",
1374
+ "% 1",
1375
+ "%1 3",
1376
+ "% 1",
1377
+ "%1 4",
1378
+ "% 1",
1379
+ "%1 5",
1380
+ "% 1",
1381
+ "%1 6",
1382
+ "% 1",
1383
+ "%1 7",
1384
+ "% 1",
1385
+ "%1 8",
1386
+ "% 1",
1387
+ "%1 9",
1388
+ "% 2",
1389
+ "%2 0",
1390
+ "% 2",
1391
+ "%2 1",
1392
+ "% 2",
1393
+ "%2 2",
1394
+ "% 2",
1395
+ "%2 3",
1396
+ "% 2",
1397
+ "%2 4",
1398
+ "% 2",
1399
+ "%2 5",
1400
+ "[ A",
1401
+ "[A u",
1402
+ "[Au -",
1403
+ "[Au- ]",
1404
+ "[ N",
1405
+ "[N b",
1406
+ "[Nb -",
1407
+ "[Nb- 2",
1408
+ "[Nb-2 ]",
1409
+ "[ P",
1410
+ "[P t",
1411
+ "[Pt -",
1412
+ "[Pt- 2",
1413
+ "[Pt-2 ]",
1414
+ "[ F",
1415
+ "[F e",
1416
+ "[Fe -",
1417
+ "[Fe- 3",
1418
+ "[Fe-3 ]",
1419
+ "[ A",
1420
+ "[A l",
1421
+ "[Al -",
1422
+ "[Al- 3",
1423
+ "[Al-3 ]",
1424
+ "[ C",
1425
+ "[C u",
1426
+ "[Cu -",
1427
+ "[Cu- ]",
1428
+ "[ A",
1429
+ "[A g",
1430
+ "[Ag -",
1431
+ "[Ag- ]",
1432
+ "[ A",
1433
+ "[A s",
1434
+ "[As -",
1435
+ "[As- ]",
1436
+ "[ P",
1437
+ "[P d",
1438
+ "[Pd -",
1439
+ "[Pd- 2",
1440
+ "[Pd-2 ]",
1441
+ "[ P",
1442
+ "[P -",
1443
+ "[P- ]",
1444
+ "[ L",
1445
+ "[L i",
1446
+ "[Li +",
1447
+ "[Li+ ]",
1448
+ "[ S",
1449
+ "[S e",
1450
+ "[Se -",
1451
+ "[Se- ]",
1452
+ "[ N",
1453
+ "[N H",
1454
+ "[NH 4",
1455
+ "[NH4 +",
1456
+ "[NH4+ ]",
1457
+ "[ t",
1458
+ "[t e",
1459
+ "[te ]",
1460
+ "[ T",
1461
+ "[T c",
1462
+ "[Tc ]",
1463
+ "[ N",
1464
+ "[N i",
1465
+ "[Ni +",
1466
+ "[Ni+ 2",
1467
+ "[Ni+2 ]",
1468
+ "[ S",
1469
+ "[S r",
1470
+ "[Sr +",
1471
+ "[Sr+ 2",
1472
+ "[Sr+2 ]",
1473
+ "[ I",
1474
+ "[I +",
1475
+ "[I+ 3",
1476
+ "[I+3 ]",
1477
+ "[ B",
1478
+ "[B r",
1479
+ "[Br +",
1480
+ "[Br+ 2",
1481
+ "[Br+2 ]",
1482
+ "[ I",
1483
+ "[I +",
1484
+ "[I+ 2",
1485
+ "[I+2 ]",
1486
+ "[ C",
1487
+ "[C l",
1488
+ "[Cl +",
1489
+ "[Cl+ 2",
1490
+ "[Cl+2 ]",
1491
+ "[ F",
1492
+ "[F e",
1493
+ "[Fe -",
1494
+ "[Fe- 2",
1495
+ "[Fe-2 ]",
1496
+ "[ I",
1497
+ "[I +",
1498
+ "[I+ ]",
1499
+ "[ S",
1500
+ "[S e",
1501
+ "[Se +",
1502
+ "[Se+ ]",
1503
+ "[ M",
1504
+ "[M o",
1505
+ "[Mo ]",
1506
+ "[ c",
1507
+ "[c H",
1508
+ "[cH +",
1509
+ "[cH+ ]",
1510
+ "[ t",
1511
+ "[t e",
1512
+ "[te +",
1513
+ "[te+ ]",
1514
+ "[ s",
1515
+ "[s e",
1516
+ "[se +",
1517
+ "[se+ ]",
1518
+ "[ C",
1519
+ "[C a",
1520
+ "[Ca H",
1521
+ "[CaH 2",
1522
+ "[CaH2 ]",
1523
+ "[ A",
1524
+ "[A g",
1525
+ "[Ag +",
1526
+ "[Ag+ ]",
1527
+ "[ M",
1528
+ "[M n",
1529
+ "[Mn +",
1530
+ "[Mn+ 2",
1531
+ "[Mn+2 ]",
1532
+ "[ M",
1533
+ "[M g",
1534
+ "[Mg +",
1535
+ "[Mg+ 2",
1536
+ "[Mg+2 ]",
1537
+ "[ C",
1538
+ "[C o",
1539
+ "[Co +",
1540
+ "[Co+ 2",
1541
+ "[Co+2 ]",
1542
+ "[ C",
1543
+ "[C u",
1544
+ "[Cu +",
1545
+ "[Cu+ 2",
1546
+ "[Cu+2 ]",
1547
+ "[ C",
1548
+ "[C u",
1549
+ "[Cu +",
1550
+ "[Cu+ ]",
1551
+ "[ A",
1552
+ "[A l",
1553
+ "[Al ]",
1554
+ "[ F",
1555
+ "[F e",
1556
+ "[Fe +",
1557
+ "[Fe+ 3",
1558
+ "[Fe+3 ]",
1559
+ "[ H",
1560
+ "[H g",
1561
+ "[Hg +",
1562
+ "[Hg+ 2",
1563
+ "[Hg+2 ]",
1564
+ "[ A",
1565
+ "[A l",
1566
+ "[Al -",
1567
+ "[Al- ]",
1568
+ "[ C",
1569
+ "[C e",
1570
+ "[Ce +",
1571
+ "[Ce+ 3",
1572
+ "[Ce+3 ]",
1573
+ "[ N",
1574
+ "[N ]",
1575
+ "[ M",
1576
+ "[M g",
1577
+ "[Mg ]",
1578
+ "[ C",
1579
+ "[C d",
1580
+ "[Cd +",
1581
+ "[Cd+ 2",
1582
+ "[Cd+2 ]",
1583
+ "[ P",
1584
+ "[P t",
1585
+ "[Pt +",
1586
+ "[Pt+ ]",
1587
+ "[ B",
1588
+ "[B e",
1589
+ "[Be -",
1590
+ "[Be- ]",
1591
+ "[ X",
1592
+ "[X e",
1593
+ "[Xe ]",
1594
+ "[ K",
1595
+ "[K r",
1596
+ "[Kr ]",
1597
+ "[ T",
1598
+ "[T a",
1599
+ "[Ta ]",
1600
+ "[ L",
1601
+ "[L a",
1602
+ "[La +",
1603
+ "[La+ 3",
1604
+ "[La+3 ]",
1605
+ "[ B",
1606
+ "[B a",
1607
+ "[Ba +",
1608
+ "[Ba+ 2",
1609
+ "[Ba+2 ]",
1610
+ "[ W",
1611
+ "[W +",
1612
+ "[W+ 4",
1613
+ "[W+4 ]",
1614
+ "[ N",
1615
+ "[N i",
1616
+ "[Ni +",
1617
+ "[Ni+ ]",
1618
+ "[ S",
1619
+ "[S b",
1620
+ "[Sb +",
1621
+ "[Sb+ ]",
1622
+ "[ S",
1623
+ "[S b",
1624
+ "[Sb +",
1625
+ "[Sb+ 3",
1626
+ "[Sb+3 ]",
1627
+ "[ T",
1628
+ "[T l",
1629
+ "[Tl +",
1630
+ "[Tl+ ]",
1631
+ "[ C",
1632
+ "[C s",
1633
+ "[Cs +",
1634
+ "[Cs+ ]",
1635
+ "[ A",
1636
+ "[A u",
1637
+ "[Au +",
1638
+ "[Au+ ]",
1639
+ "[ Y",
1640
+ "[Y b",
1641
+ "[Yb +",
1642
+ "[Yb+ 3",
1643
+ "[Yb+3 ]",
1644
+ "[ G",
1645
+ "[G a",
1646
+ "[Ga +",
1647
+ "[Ga+ 3",
1648
+ "[Ga+3 ]",
1649
+ "[ C",
1650
+ "[C r",
1651
+ "[Cr +",
1652
+ "[Cr+ 3",
1653
+ "[Cr+3 ]",
1654
+ "[ A",
1655
+ "[A s",
1656
+ "[As +",
1657
+ "[As+ ]",
1658
+ "[ M",
1659
+ "[M n",
1660
+ "[Mn +",
1661
+ "[Mn+ 3",
1662
+ "[Mn+3 ]",
1663
+ "[ R",
1664
+ "[R u",
1665
+ "[Ru +",
1666
+ "[Ru+ 2",
1667
+ "[Ru+2 ]",
1668
+ "[ I",
1669
+ "[I r",
1670
+ "[Ir +",
1671
+ "[Ir+ 3",
1672
+ "[Ir+3 ]",
1673
+ "[ P",
1674
+ "[P d",
1675
+ "[Pd +",
1676
+ "[Pd+ 2",
1677
+ "[Pd+2 ]",
1678
+ "[ B",
1679
+ "[B e",
1680
+ "[Be ]",
1681
+ "[ B",
1682
+ "[B e",
1683
+ "[Be -",
1684
+ "[Be- 2",
1685
+ "[Be-2 ]",
1686
+ "[ Y",
1687
+ "[Y ]",
1688
+ "[ M",
1689
+ "[M o",
1690
+ "[Mo +",
1691
+ "[Mo+ 6",
1692
+ "[Mo+6 ]",
1693
+ "[ M",
1694
+ "[M o",
1695
+ "[Mo +",
1696
+ "[Mo+ 4",
1697
+ "[Mo+4 ]",
1698
+ "[ H",
1699
+ "[H f",
1700
+ "[Hf ]",
1701
+ "[ U",
1702
+ "[U ]",
1703
+ "[ M",
1704
+ "[M g",
1705
+ "[Mg -",
1706
+ "[Mg- ]",
1707
+ "[ S",
1708
+ "[S e",
1709
+ "[Se H",
1710
+ "[SeH 2",
1711
+ "[SeH2 ]",
1712
+ "[ L",
1713
+ "[L u",
1714
+ "[Lu +",
1715
+ "[Lu+ 3",
1716
+ "[Lu+3 ]",
1717
+ "[ N",
1718
+ "[N i",
1719
+ "[Ni +",
1720
+ "[Ni+ 3",
1721
+ "[Ni+3 ]",
1722
+ "[ S",
1723
+ "[S m",
1724
+ "[Sm +",
1725
+ "[Sm+ 3",
1726
+ "[Sm+3 ]",
1727
+ "[ P",
1728
+ "[P b",
1729
+ "[Pb +",
1730
+ "[Pb+ 2",
1731
+ "[Pb+2 ]",
1732
+ "[ E",
1733
+ "[E u",
1734
+ "[Eu +",
1735
+ "[Eu+ 2",
1736
+ "[Eu+2 ]",
1737
+ "[ P",
1738
+ "[P t",
1739
+ "[Pt +",
1740
+ "[Pt+ 2",
1741
+ "[Pt+2 ]",
1742
+ "[ H",
1743
+ "[H o",
1744
+ "[Ho +",
1745
+ "[Ho+ 3",
1746
+ "[Ho+3 ]",
1747
+ "[ M",
1748
+ "[M g",
1749
+ "[Mg -",
1750
+ "[Mg- 2",
1751
+ "[Mg-2 ]",
1752
+ "[ Z",
1753
+ "[Z r",
1754
+ "[Zr ]",
1755
+ "[ N",
1756
+ "[N H",
1757
+ "[NH 2",
1758
+ "[NH2 ]",
1759
+ "[ O",
1760
+ "[O s",
1761
+ "[Os +",
1762
+ "[Os+ 4",
1763
+ "[Os+4 ]",
1764
+ "[ M",
1765
+ "[M o",
1766
+ "[Mo -",
1767
+ "[Mo- ]",
1768
+ "[ C",
1769
+ "[C o",
1770
+ "[Co +",
1771
+ "[Co+ 3",
1772
+ "[Co+3 ]",
1773
+ "[ R",
1774
+ "[R h",
1775
+ "[Rh ]",
1776
+ "[ T",
1777
+ "[T b",
1778
+ "[Tb +",
1779
+ "[Tb+ 3",
1780
+ "[Tb+3 ]",
1781
+ "[ C",
1782
+ "[C r",
1783
+ "[Cr +",
1784
+ "[Cr+ ]",
1785
+ "[ R",
1786
+ "[R h",
1787
+ "[Rh +",
1788
+ "[Rh+ ]",
1789
+ "[ R",
1790
+ "[R h",
1791
+ "[Rh +",
1792
+ "[Rh+ 3",
1793
+ "[Rh+3 ]",
1794
+ "[ R",
1795
+ "[R b",
1796
+ "[Rb +",
1797
+ "[Rb+ ]",
1798
+ "[ E",
1799
+ "[E r",
1800
+ "[Er +",
1801
+ "[Er+ 3",
1802
+ "[Er+3 ]",
1803
+ "[ R",
1804
+ "[R u",
1805
+ "[Ru +",
1806
+ "[Ru+ 3",
1807
+ "[Ru+3 ]",
1808
+ "[ A",
1809
+ "[A u",
1810
+ "[Au +",
1811
+ "[Au+ 3",
1812
+ "[Au+3 ]",
1813
+ "[ H",
1814
+ "[H +",
1815
+ "[H+ ]",
1816
+ "[ S",
1817
+ "[S c",
1818
+ "[Sc -",
1819
+ "[Sc- ]",
1820
+ "[ V",
1821
+ "[V -",
1822
+ "[V- ]",
1823
+ "[ O",
1824
+ "[O H",
1825
+ "[OH +",
1826
+ "[OH+ 2",
1827
+ "[OH+2 ]",
1828
+ "[ V",
1829
+ "[V +",
1830
+ "[V+ 3",
1831
+ "[V+3 ]",
1832
+ "[ A",
1833
+ "[A g",
1834
+ "[Ag ]",
1835
+ "[ D",
1836
+ "[D y",
1837
+ "[Dy +",
1838
+ "[Dy+ 2",
1839
+ "[Dy+2 ]",
1840
+ "[ E",
1841
+ "[E u",
1842
+ "[Eu +",
1843
+ "[Eu+ 3",
1844
+ "[Eu+3 ]",
1845
+ "[ I",
1846
+ "[I r",
1847
+ "[Ir ]",
1848
+ "[ B",
1849
+ "[B e",
1850
+ "[Be +",
1851
+ "[Be+ 2",
1852
+ "[Be+2 ]",
1853
+ "[ A",
1854
+ "[A s",
1855
+ "[As H",
1856
+ "[AsH 2",
1857
+ "[AsH2 ]",
1858
+ "[ H",
1859
+ "[H -",
1860
+ "[H- ]",
1861
+ "[ C",
1862
+ "[C r",
1863
+ "[Cr ]",
1864
+ "[ V",
1865
+ "[V +",
1866
+ "[V+ 2",
1867
+ "[V+2 ]",
1868
+ "[ S",
1869
+ "[S H",
1870
+ "[SH 2",
1871
+ "[SH2 ]",
1872
+ "[ T",
1873
+ "[T i",
1874
+ "[Ti +",
1875
+ "[Ti+ 4",
1876
+ "[Ti+4 ]",
1877
+ "[ C",
1878
+ "[C H",
1879
+ "[CH 3",
1880
+ "[CH3 ]",
1881
+ "[ P",
1882
+ "[P H",
1883
+ "[PH 2",
1884
+ "[PH2 ]",
1885
+ "[ T",
1886
+ "[T b",
1887
+ "[Tb ]",
1888
+ "[ C",
1889
+ "[C +",
1890
+ "[C+ 2",
1891
+ "[C+2 ]",
1892
+ "[ P",
1893
+ "[P b",
1894
+ "[Pb ]",
1895
+ "[ P",
1896
+ "[P r",
1897
+ "[Pr +",
1898
+ "[Pr+ 3",
1899
+ "[Pr+3 ]",
1900
+ "[ O",
1901
+ "[O s",
1902
+ "[Os +",
1903
+ "[Os+ 3",
1904
+ "[Os+3 ]",
1905
+ "[ P",
1906
+ "[P r",
1907
+ "[Pr ]",
1908
+ "[ C",
1909
+ "[C u",
1910
+ "[Cu -",
1911
+ "[Cu- 2",
1912
+ "[Cu-2 ]",
1913
+ "[ O",
1914
+ "[O +",
1915
+ "[O+ 2",
1916
+ "[O+2 ]",
1917
+ "[ I",
1918
+ "[I H",
1919
+ "[IH ]",
1920
+ "[ P",
1921
+ "[P t",
1922
+ "[Pt -",
1923
+ "[Pt- ]",
1924
+ "[ Y",
1925
+ "[Y b",
1926
+ "[Yb ]",
1927
+ "[ I",
1928
+ "[I r",
1929
+ "[Ir +",
1930
+ "[Ir+ 4",
1931
+ "[Ir+4 ]",
1932
+ "[ P",
1933
+ "[P -",
1934
+ "[P- 2",
1935
+ "[P-2 ]",
1936
+ "[ T",
1937
+ "[T i",
1938
+ "[Ti ]",
1939
+ "[ N",
1940
+ "[N d",
1941
+ "[Nd +",
1942
+ "[Nd+ 3",
1943
+ "[Nd+3 ]",
1944
+ "[ A",
1945
+ "[A l",
1946
+ "[Al +",
1947
+ "[Al+ 3",
1948
+ "[Al+3 ]",
1949
+ "[ H",
1950
+ "[H g",
1951
+ "[Hg -",
1952
+ "[Hg- 2",
1953
+ "[Hg-2 ]",
1954
+ "[ I",
1955
+ "[I n",
1956
+ "[In ]",
1957
+ "[ F",
1958
+ "[F e",
1959
+ "[Fe +",
1960
+ "[Fe+ ]",
1961
+ "[ R",
1962
+ "[R u",
1963
+ "[Ru -",
1964
+ "[Ru- 2",
1965
+ "[Ru-2 ]",
1966
+ "[ I",
1967
+ "[I r",
1968
+ "[Ir +",
1969
+ "[Ir+ ]",
1970
+ "[ R",
1971
+ "[R h",
1972
+ "[Rh +",
1973
+ "[Rh+ 2",
1974
+ "[Rh+2 ]",
1975
+ "[ N",
1976
+ "[N a",
1977
+ "[Na H",
1978
+ "[NaH ]",
1979
+ "[ M",
1980
+ "[M g",
1981
+ "[Mg H",
1982
+ "[MgH 2",
1983
+ "[MgH2 ]",
1984
+ "[ A",
1985
+ "[A g",
1986
+ "[Ag -",
1987
+ "[Ag- 4",
1988
+ "[Ag-4 ]",
1989
+ "[ C",
1990
+ "[C l",
1991
+ "[Cl +",
1992
+ "[Cl+ ]",
1993
+ "[ Z",
1994
+ "[Z n",
1995
+ "[Zn -",
1996
+ "[Zn- 2",
1997
+ "[Zn-2 ]",
1998
+ "[ H",
1999
+ "[H H",
2000
+ "[HH ]",
2001
+ "[ H",
2002
+ "[H g",
2003
+ "[Hg +",
2004
+ "[Hg+ ]",
2005
+ "[ Y",
2006
+ "[Y +",
2007
+ "[Y+ 3",
2008
+ "[Y+3 ]",
2009
+ "[ P",
2010
+ "[P b",
2011
+ "[Pb +",
2012
+ "[Pb+ ]",
2013
+ "[ G",
2014
+ "[G d",
2015
+ "[Gd +",
2016
+ "[Gd+ 3",
2017
+ "[Gd+3 ]",
2018
+ "[ P",
2019
+ "[P t",
2020
+ "[Pt +",
2021
+ "[Pt+ 4",
2022
+ "[Pt+4 ]",
2023
+ "c c",
2024
+ "O )",
2025
+ "C C",
2026
+ "C (",
2027
+ "= O)",
2028
+ "c 1",
2029
+ "c (",
2030
+ "c 2",
2031
+ "( =O)",
2032
+ "C )",
2033
+ "C( =O)",
2034
+ "c1 cc",
2035
+ "c 3",
2036
+ "( O)",
2037
+ "c n",
2038
+ "C 1",
2039
+ "c2 cc",
2040
+ ") cc",
2041
+ "C O",
2042
+ "CC CC",
2043
+ "C( O)",
2044
+ "P (=O)",
2045
+ "C 2",
2046
+ "P(=O) (O)",
2047
+ "( C)",
2048
+ "c1 n",
2049
+ "F )",
2050
+ "c1cc c(",
2051
+ "C(=O) N",
2052
+ "C N",
2053
+ "O =",
2054
+ "O CC",
2055
+ "c3 cc",
2056
+ "2 )",
2057
+ "N )",
2058
+ "O C(",
2059
+ "[ n",
2060
+ "H ]",
2061
+ "c1cc cc",
2062
+ "[n H]",
2063
+ "O P(=O)(O)",
2064
+ "C l",
2065
+ "N C(=O)",
2066
+ "c 4",
2067
+ "3 )",
2068
+ "c2cc cc",
2069
+ ")cc 1",
2070
+ "n 1",
2071
+ "+ ]",
2072
+ "2 ]",
2073
+ "CC N",
2074
+ "+ 2]",
2075
+ "CC 1",
2076
+ "C( C)",
2077
+ "CO P(=O)(O)",
2078
+ "O) C(O)",
2079
+ "- ]",
2080
+ "c( =O)",
2081
+ "c1n cn",
2082
+ "C 3",
2083
+ "C =",
2084
+ "c2cc c(",
2085
+ "[ O",
2086
+ "[O -]",
2087
+ "CC (=O)",
2088
+ "n c(",
2089
+ "c1cccc c1",
2090
+ "c2cccc c2",
2091
+ "[ N",
2092
+ "S (=O)",
2093
+ "c3cc cc",
2094
+ "( F)",
2095
+ "S(=O) (=O)",
2096
+ "n 2",
2097
+ "OC( COP(=O)(O)",
2098
+ "n cn",
2099
+ "c2 c(",
2100
+ "CCN (",
2101
+ "CC (C)",
2102
+ "C1 O",
2103
+ "c 5",
2104
+ "CC (",
2105
+ "cc (",
2106
+ "4 )",
2107
+ "Cl )",
2108
+ "[O-] )",
2109
+ "c3cccc c3",
2110
+ "O= C(",
2111
+ "c1cc (",
2112
+ "cc 1",
2113
+ "OCC OCC",
2114
+ "OC(COP(=O)(O) OP(=O)(O)",
2115
+ "[ M",
2116
+ "c4 cc",
2117
+ "O C)",
2118
+ "O= C(O)",
2119
+ "c1ncn c2",
2120
+ "c3cc c(",
2121
+ "2 C1",
2122
+ "N c1ncnc2",
2123
+ "C c1ccc(",
2124
+ "C(=O) O)",
2125
+ "C( F)",
2126
+ "CCCC CCCC",
2127
+ "C2 O)",
2128
+ "c2 )",
2129
+ "CC (O)",
2130
+ "c(=O) [nH]",
2131
+ "n (",
2132
+ "CC O",
2133
+ "c1ncn 2C1",
2134
+ "c( N)",
2135
+ "O)C(O) C1O",
2136
+ "CC(=O) N",
2137
+ "F )cc",
2138
+ "c( -",
2139
+ "C(F) (F)",
2140
+ "Nc1ncnc2 c1ncn2C1",
2141
+ ") N",
2142
+ "c2 n",
2143
+ "CC CN",
2144
+ "c( C)",
2145
+ "C( C",
2146
+ "[N +]",
2147
+ "C( N)",
2148
+ "( [O-])",
2149
+ "[M g",
2150
+ "O)C(O) C2O)",
2151
+ "CC 2",
2152
+ "c1cccc (",
2153
+ "C(=O) O",
2154
+ "[Mg +2]",
2155
+ "C 4",
2156
+ "= O",
2157
+ "c( N",
2158
+ "CO c1ccc(",
2159
+ "n +2]",
2160
+ "c2ccccc2 )",
2161
+ "[ C",
2162
+ "Cl )cc",
2163
+ "[N a",
2164
+ "[Na +]",
2165
+ "C c1cc",
2166
+ "N c1n",
2167
+ "B r",
2168
+ "2 )cc1",
2169
+ "c2cc (",
2170
+ "CC OCC",
2171
+ "CN (",
2172
+ "C( =",
2173
+ "n 3",
2174
+ "c(N) ncn",
2175
+ "C S",
2176
+ "C c1",
2177
+ "c s",
2178
+ "c1 )",
2179
+ "O C",
2180
+ "N 2",
2181
+ "CC C(",
2182
+ "C c1n",
2183
+ "cc cc",
2184
+ "C2 OC(COP(=O)(O)OP(=O)(O)",
2185
+ "c(=O)[nH] 1",
2186
+ "c1 2",
2187
+ "Nc1ncnc2c1ncn2C1 OC(COP(=O)(O)OP(=O)(O)",
2188
+ "c2 nc(",
2189
+ "c3 )",
2190
+ "a +2]",
2191
+ "[C a+2]",
2192
+ "c( O)",
2193
+ "C(O) C(O)",
2194
+ "N =",
2195
+ "C #",
2196
+ "n 4",
2197
+ "(C) C)",
2198
+ "1 )",
2199
+ "[ Z",
2200
+ "[Z n+2]",
2201
+ "2) c1",
2202
+ "O=C( N",
2203
+ "= N",
2204
+ "c2 c1",
2205
+ "CN 1",
2206
+ "c4cc cc",
2207
+ "OP(=O)(O) OCC",
2208
+ "( C",
2209
+ "n cc",
2210
+ "OCCOCC OCCOCC",
2211
+ "C( CC",
2212
+ "N 1",
2213
+ "[N+] (=O)",
2214
+ "c o",
2215
+ "CC =",
2216
+ "c3ccccc3 )",
2217
+ "C n1",
2218
+ "CC C2",
2219
+ "C(F)(F) F)",
2220
+ "c1ccccc1 )",
2221
+ "[ F",
2222
+ "[F e",
2223
+ "c3 c(",
2224
+ "cn c5",
2225
+ "CC 2)",
2226
+ "CC 3",
2227
+ "C2 )",
2228
+ "OC( n4",
2229
+ "c5 4)",
2230
+ "3 OC(n4",
2231
+ "3OC(n4 cnc5",
2232
+ "n c2",
2233
+ "O)C(O)C2O) c(=O)[nH]1",
2234
+ "OP(=O)(O) O)",
2235
+ "c [nH]",
2236
+ "[n +]",
2237
+ "c4cccc c4",
2238
+ "CO C(=O)",
2239
+ "Nc1n c2c(",
2240
+ "S(=O)(=O) N",
2241
+ "ncn 2",
2242
+ "CC C1",
2243
+ "cc 2",
2244
+ "cn c3",
2245
+ "c(N)ncn c54)",
2246
+ "3OC(n4cnc5 c(N)ncnc54)",
2247
+ "c2cccc (",
2248
+ "n c3",
2249
+ "C(O) C3",
2250
+ "n 2)",
2251
+ "[Fe ]"
2252
+ ]
2253
+ }
2254
+ }
smiles/tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<mask>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
+ "eos_token": "</s>",
47
+ "mask_token": "<mask>",
48
+ "model_max_length": 1000000000000000019884624838656,
49
+ "model_type": "bpe",
50
+ "pad_token": "<pad>",
51
+ "tokenizer_class": "PreTrainedTokenizerFast",
52
+ "unk_token": "<unk>",
53
+ "vocab_size": 884
54
+ }
smiles/vocab.json ADDED
@@ -0,0 +1,886 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[Cu-": 622,
3
+ "[Pr]": 1041,
4
+ "V": 230,
5
+ "E": 215,
6
+ "[Be-2]": 988,
7
+ "[Br": 526,
8
+ "[Re": 565,
9
+ "[Rh+3]": 1013,
10
+ "[Se-]": 934,
11
+ "3)": 300,
12
+ "[Tl+]": 976,
13
+ "d": 240,
14
+ "%1": 545,
15
+ "e": 241,
16
+ "c(-": 371,
17
+ "c1cccc": 294,
18
+ "c(N": 390,
19
+ "%22": 919,
20
+ "%21": 918,
21
+ "[Li]": 865,
22
+ "[Rh": 744,
23
+ "[Ce+3]": 961,
24
+ "[Kr": 678,
25
+ "</s>": 1,
26
+ "O=C(N": 435,
27
+ "[N-": 494,
28
+ "[Au-": 611,
29
+ "F)cc": 370,
30
+ "-": 196,
31
+ "[Lu]": 902,
32
+ "[Pb+2]": 999,
33
+ "[Nb-": 613,
34
+ "[Re+]": 861,
35
+ "[Ru+3]": 1016,
36
+ "[Pd]": 886,
37
+ "[Rh+2]": 1057,
38
+ "[AsH": 771,
39
+ "Cc1ccc(": 356,
40
+ "C(CC": 444,
41
+ "[Ba": 683,
42
+ "[Gd-5": 550,
43
+ "CC2": 384,
44
+ "[Mg-2]": 1003,
45
+ "[P-2": 790,
46
+ "[SH+": 574,
47
+ "[W+4": 687,
48
+ "[AsH2": 772,
49
+ "[Eu": 731,
50
+ "[Mo+4]": 991,
51
+ "H": 218,
52
+ "C(O)C3": 482,
53
+ "[C-]": 826,
54
+ "[BH-": 606,
55
+ "M": 222,
56
+ "[Fe-3]": 926,
57
+ "[Li": 568,
58
+ "[MgH": 802,
59
+ "[NaH]": 1058,
60
+ "[Nb-2": 614,
61
+ "[P+": 520,
62
+ "[Pt+4]": 1068,
63
+ "[o": 509,
64
+ "[S": 496,
65
+ "C(": 259,
66
+ "1)": 431,
67
+ "[Zn+": 554,
68
+ "[Ag+]": 952,
69
+ "[Cr": 701,
70
+ "[o+]": 825,
71
+ "<": 208,
72
+ "Cc1n": 415,
73
+ "[se+]": 950,
74
+ "[n-]": 824,
75
+ "N2": 413,
76
+ "CCCC": 275,
77
+ "U": 229,
78
+ "[Mn+2": 657,
79
+ "[Au]": 872,
80
+ "[Ar": 592,
81
+ "[Si]": 816,
82
+ "[c-": 504,
83
+ "[c-]": 821,
84
+ "X": 232,
85
+ "0": 198,
86
+ "CCN": 306,
87
+ "[Cr+3": 703,
88
+ "[N": 324,
89
+ "[Rb+": 751,
90
+ "=N": 436,
91
+ "OC(": 291,
92
+ "[Al-3": 621,
93
+ "[SiH]": 881,
94
+ "[c+]": 846,
95
+ "a": 237,
96
+ "[Ni+2]": 938,
97
+ "[Dy+": 766,
98
+ "[As": 534,
99
+ "ncn": 331,
100
+ "[H]": 882,
101
+ "O=C(": 343,
102
+ "[Ca": 587,
103
+ "c2": 263,
104
+ "CCO": 365,
105
+ "[Ir+4": 789,
106
+ "[Sb+3": 689,
107
+ "o": 249,
108
+ "R": 226,
109
+ "CCN(": 333,
110
+ "[Yb": 696,
111
+ "[I+2]": 942,
112
+ "[Nb": 612,
113
+ "C(F)(F)": 372,
114
+ "[H+]": 1018,
115
+ "[Zn-2": 806,
116
+ "[MgH2": 803,
117
+ "[N+2]": 904,
118
+ "[Ti]": 1049,
119
+ "[te+": 651,
120
+ "[NH4": 631,
121
+ "[te]": 936,
122
+ "c3c(": 456,
123
+ "9": 207,
124
+ "[Cu": 578,
125
+ "[Au-]": 923,
126
+ "[B": 525,
127
+ "[Ga+": 699,
128
+ "[Rb+]": 1014,
129
+ "[Mg-2": 738,
130
+ "[CH3]": 1034,
131
+ "CN1": 438,
132
+ "[Fe-3": 618,
133
+ "[Pd-2]": 931,
134
+ "<pad>": 0,
135
+ "[BH": 594,
136
+ "[Si": 497,
137
+ "c5": 336,
138
+ "[IH": 788,
139
+ "[Pt+4": 812,
140
+ "[Re]": 863,
141
+ "[BH2-]": 905,
142
+ "c3cc": 288,
143
+ "[PH2": 780,
144
+ "[Al": 619,
145
+ "[O+": 507,
146
+ "[Pd-": 626,
147
+ "C#": 428,
148
+ "[Tb+3": 747,
149
+ "OCCOCC": 346,
150
+ "[O]": 858,
151
+ "[SH+]": 870,
152
+ "O=C(O)": 351,
153
+ "[Hg": 552,
154
+ "[Dy+2": 767,
155
+ ")cc": 273,
156
+ "COC(=O)": 471,
157
+ "[Be-]": 966,
158
+ "[Br-": 527,
159
+ "[CH3": 779,
160
+ "[Ga]": 901,
161
+ "[Mg+2]": 387,
162
+ "[Mo+6": 715,
163
+ "[Rh+]": 1012,
164
+ "[OH2": 570,
165
+ "[Mg+2": 659,
166
+ "[SiH": 557,
167
+ "[Yb+3]": 979,
168
+ "[Mg-": 719,
169
+ "[Mo+4": 716,
170
+ "[se+": 652,
171
+ "[Eu+": 732,
172
+ "a+2]": 423,
173
+ "c(N)ncn": 407,
174
+ "c1": 261,
175
+ "Nc1nc2c(": 472,
176
+ "c1cccc(": 385,
177
+ "c2c(": 332,
178
+ "cc1": 345,
179
+ "CN": 285,
180
+ "[La+3": 682,
181
+ "[NH3+]": 813,
182
+ "co": 447,
183
+ "[As+]": 982,
184
+ "[SeH2]": 995,
185
+ "[F": 454,
186
+ "[L": 567,
187
+ "[G": 546,
188
+ "(C)C)": 430,
189
+ "[N-]": 815,
190
+ "[MgH2]": 1059,
191
+ "[C": 394,
192
+ ")": 193,
193
+ "[Cl+]": 1061,
194
+ "[NH2": 498,
195
+ "[Hg-2]": 1052,
196
+ "[Ni+2": 637,
197
+ "[OH": 505,
198
+ "[Ru-2]": 1055,
199
+ "[Tb": 745,
200
+ "[Ba+": 684,
201
+ "[Zn": 553,
202
+ "[Lu+": 721,
203
+ "[Ca+": 607,
204
+ "2C1": 354,
205
+ "c54)": 462,
206
+ "t": 253,
207
+ "[Cl+": 583,
208
+ "[Ga": 597,
209
+ "[Ru+2": 707,
210
+ "[Ru-2": 799,
211
+ "[Sb+": 688,
212
+ "CCOCC": 403,
213
+ "[Gd+": 810,
214
+ "c1ccccc1": 322,
215
+ "C2": 278,
216
+ "[Fe]": 484,
217
+ "[Bi": 559,
218
+ "[Ru-]": 877,
219
+ "[Cu+2": 663,
220
+ "[K+": 604,
221
+ "c2ccc(": 317,
222
+ "[Ru]": 843,
223
+ "OP(=O)(O)O)": 467,
224
+ "2)": 289,
225
+ "[Sr+2]": 939,
226
+ "OC(n4": 461,
227
+ "c(": 262,
228
+ "[nH]": 295,
229
+ "[Al-]": 960,
230
+ "[PH]": 827,
231
+ "[Ag-": 624,
232
+ "[Fe+": 590,
233
+ "[BiH": 560,
234
+ "cc": 256,
235
+ "[Kr]": 968,
236
+ "i": 245,
237
+ "CCCCCCCC": 359,
238
+ "b": 238,
239
+ "[Ti": 776,
240
+ "([O-])": 381,
241
+ "[As+": 704,
242
+ "%15": 912,
243
+ "Nc1n": 399,
244
+ "Nc1ncnc2c1ncn2C1OC(COP(=O)(O)OP(=O)(O)": 420,
245
+ "[CH2+": 589,
246
+ "Cc1cc": 398,
247
+ "[H-": 773,
248
+ "[Lu+3": 722,
249
+ "[Hg+": 665,
250
+ "[Mo+6]": 990,
251
+ "7": 205,
252
+ "O)C(O)C1O": 368,
253
+ "[As-]": 930,
254
+ "[OH2+": 571,
255
+ "[BH3-": 596,
256
+ "[Pd+2": 712,
257
+ "[Re+": 566,
258
+ "[S-]": 852,
259
+ "[SiH3]": 853,
260
+ "[W+4]": 972,
261
+ "c3cccc": 326,
262
+ "%16": 913,
263
+ "[Ru-": 582,
264
+ "4": 202,
265
+ "[I]": 895,
266
+ "CC2)": 458,
267
+ "[Ce": 667,
268
+ "[cH-": 543,
269
+ "[K+]": 906,
270
+ "[Rh+": 748,
271
+ "c1ccc(": 283,
272
+ "[C+2": 781,
273
+ "[Pb]": 1038,
274
+ "cccc": 416,
275
+ "C4": 388,
276
+ "[BH2-": 603,
277
+ "I": 219,
278
+ "[La+": 681,
279
+ "[P+]": 832,
280
+ "[SH": 530,
281
+ "nc3": 481,
282
+ "[Gd+3]": 1067,
283
+ "[Nd+3": 793,
284
+ "F": 216,
285
+ "[C-": 511,
286
+ "[D": 764,
287
+ "[Mn+2]": 953,
288
+ "O)C(O)C2O)": 383,
289
+ "A": 211,
290
+ "[HH]": 1063,
291
+ "[Ir+4]": 1047,
292
+ "[Pb": 727,
293
+ "]": 236,
294
+ "CCC2": 451,
295
+ "[Mo+": 714,
296
+ "<s>": 185,
297
+ "[As-": 625,
298
+ "[Fe+3]": 958,
299
+ "[Pr+": 783,
300
+ "C2OC(COP(=O)(O)OP(=O)(O)": 417,
301
+ "OC(COP(=O)(O)": 330,
302
+ "[NH-": 515,
303
+ "Cn1": 450,
304
+ "[AsH2]": 1028,
305
+ "CC(=O)": 320,
306
+ "2]": 305,
307
+ "[Mo": 649,
308
+ "[Sr+2": 640,
309
+ "K": 220,
310
+ "[Fe+2]": 894,
311
+ "[Ti+": 777,
312
+ "[Cr+3]": 981,
313
+ "[K]": 871,
314
+ "CC(=O)N": 369,
315
+ "[Bi+3]": 903,
316
+ "C(=O)O)": 357,
317
+ "[Ag]": 1023,
318
+ "[BH3": 595,
319
+ "[CH2-": 541,
320
+ "[CaH2]": 951,
321
+ "[Cd+": 671,
322
+ "[SH-]": 838,
323
+ "C(=": 405,
324
+ "[Pt-2": 616,
325
+ "[I+3": 642,
326
+ "[Na+]": 397,
327
+ "%2": 610,
328
+ "[Cu+": 662,
329
+ "[Gd+3": 811,
330
+ "S(=O)": 325,
331
+ "%10": 847,
332
+ "[V+3": 763,
333
+ "[s": 516,
334
+ "c2cccc(": 480,
335
+ "[Ag-4": 804,
336
+ "[Co]": 873,
337
+ "[Ho+3": 737,
338
+ "g": 243,
339
+ "[Cl-": 524,
340
+ "[BH2": 602,
341
+ "[Gd-4]": 849,
342
+ "[Li+]": 933,
343
+ "[NH3+": 492,
344
+ "[I+2": 645,
345
+ "[NH4+": 632,
346
+ "[Rh+2": 800,
347
+ "[SH2": 775,
348
+ "[Sr+": 639,
349
+ "[Br+": 643,
350
+ "[W+": 686,
351
+ "[Ca+2]": 424,
352
+ "[Zn+2": 555,
353
+ "[n-": 508,
354
+ "cnc3": 477,
355
+ "u": 254,
356
+ "[PH+]": 909,
357
+ "COP(=O)(O)": 310,
358
+ "[B-]": 839,
359
+ "[Ta": 679,
360
+ "[Al]": 957,
361
+ "[Rh]": 1009,
362
+ "O": 224,
363
+ "[Ca+2": 608,
364
+ "[Dy+2]": 1024,
365
+ "6": 204,
366
+ "[Pt": 564,
367
+ "[Re-": 585,
368
+ "[Nd": 791,
369
+ "C(N)": 380,
370
+ "[PH+": 609,
371
+ "[Pb+2": 729,
372
+ "[Yb+": 697,
373
+ "[Hf]": 992,
374
+ "[Yb+3": 698,
375
+ "c(N)": 367,
376
+ "c2cc(": 402,
377
+ "OCCOCCOCCOCC": 443,
378
+ "[C+]": 820,
379
+ "O)C(O)C2O)c(=O)[nH]1": 466,
380
+ "[Ta]": 969,
381
+ "C(C)": 309,
382
+ "[Se+": 648,
383
+ "c4cc": 349,
384
+ "%23": 920,
385
+ "<unk>": 2,
386
+ "[O+2]": 1043,
387
+ "Cl": 297,
388
+ "3OC(n4cnc5": 464,
389
+ "[Zn]": 893,
390
+ "Nc1ncnc2c1ncn2C1": 373,
391
+ "[I+3]": 940,
392
+ "[Bi+3": 600,
393
+ "[Co+2": 661,
394
+ "[SH-": 531,
395
+ "[Pd+": 711,
396
+ "[Gd]": 900,
397
+ "W": 231,
398
+ "[PH2]": 1035,
399
+ "[T": 579,
400
+ ")cc1": 302,
401
+ "[Ni+": 636,
402
+ "c(=O)[nH]1": 418,
403
+ "[Bi+": 599,
404
+ "[CH2-]": 844,
405
+ "c(=O)[nH]": 363,
406
+ "[Co+2]": 954,
407
+ "[BiH3]": 854,
408
+ "[CH]": 889,
409
+ "[Gd": 547,
410
+ "[Hg-": 796,
411
+ "[Xe": 677,
412
+ "OC(COP(=O)(O)OP(=O)(O)": 347,
413
+ "c2nc(": 421,
414
+ "[Ni]": 869,
415
+ "C(F)(F)F)": 452,
416
+ "[I+": 641,
417
+ "[O+]": 823,
418
+ "[OH+2]": 1021,
419
+ "3": 201,
420
+ "c1ncn2C1": 366,
421
+ "[Pr": 782,
422
+ "[O+2": 787,
423
+ "c[nH]": 468,
424
+ "[c]": 892,
425
+ "c4": 299,
426
+ "[S+]": 828,
427
+ "[s+]": 842,
428
+ "[Ir]": 1026,
429
+ "%18": 915,
430
+ "CN(": 404,
431
+ "[H-]": 1029,
432
+ "<mask>": 189,
433
+ "[Ga+3": 700,
434
+ "[Pd-2": 627,
435
+ "[Rb": 750,
436
+ "C(C": 378,
437
+ "[Er+3": 754,
438
+ "[OH-]": 883,
439
+ "(C)": 280,
440
+ "(O)": 269,
441
+ "[Hg+2": 666,
442
+ "[Re-]": 879,
443
+ "[S+": 514,
444
+ "c1cc(": 344,
445
+ "#version:": 485,
446
+ "[cH+]": 948,
447
+ "S(=O)(=O)": 328,
448
+ "[Pd+2]": 986,
449
+ "5": 203,
450
+ "Nc1ncnc2": 355,
451
+ "[Au+": 694,
452
+ "#": 190,
453
+ "[Tb+3]": 1010,
454
+ "[Pt]": 860,
455
+ "[Al+": 794,
456
+ "CC3": 459,
457
+ "CC=": 448,
458
+ "[Tc]": 937,
459
+ "C(=O)N": 284,
460
+ "-]": 312,
461
+ "[Co+": 660,
462
+ "[Cu-2": 786,
463
+ "[Mg-]": 994,
464
+ "[Sb": 521,
465
+ "[Ca]": 884,
466
+ "+2]": 307,
467
+ "[CH-": 519,
468
+ "[Cu+2]": 955,
469
+ "[Ag-]": 929,
470
+ "[Cl+3": 584,
471
+ "CCC(": 414,
472
+ "N=": 427,
473
+ "[Cd+2]": 964,
474
+ "[Co+3]": 1008,
475
+ "[Cu-]": 928,
476
+ "P(=O)": 277,
477
+ "COc1ccc(": 391,
478
+ "[F-": 522,
479
+ "[Ho": 735,
480
+ "[Ho+3]": 1002,
481
+ "%": 191,
482
+ "[OH-": 586,
483
+ "[Al+3": 795,
484
+ "[Os+": 740,
485
+ "[Br+2": 644,
486
+ "[Fe": 455,
487
+ "[Sn]": 868,
488
+ "S(=O)(=O)N": 473,
489
+ "[Ir+": 709,
490
+ "[V-": 760,
491
+ "Br": 400,
492
+ "+]": 304,
493
+ "[CH": 518,
494
+ "[Fe-2]": 944,
495
+ "[Cs+": 693,
496
+ "2)cc1": 401,
497
+ "[Pb+": 728,
498
+ "[Cl+2": 646,
499
+ "[Pb+]": 1066,
500
+ "[NH2]": 1005,
501
+ "=O": 389,
502
+ "O)C(O)": 311,
503
+ "Y": 233,
504
+ "[Co+3": 743,
505
+ "[Mg]": 963,
506
+ "[Na+": 539,
507
+ "[W]": 897,
508
+ "%24": 921,
509
+ "[Na": 396,
510
+ "[X": 676,
511
+ "[As]": 840,
512
+ "[Br-]": 836,
513
+ "[F-]": 834,
514
+ "[CH+": 535,
515
+ "OP(=O)(O)": 296,
516
+ "[Y+": 808,
517
+ "C1": 271,
518
+ "[NH": 490,
519
+ "[Yb]": 1046,
520
+ "[NH2+": 499,
521
+ "[n+": 495,
522
+ "[nH+]": 819,
523
+ "[s+": 536,
524
+ "2": 200,
525
+ "[U": 718,
526
+ "c(N)ncnc54)": 478,
527
+ "c3": 268,
528
+ "f": 242,
529
+ "[V]": 866,
530
+ "cc2": 476,
531
+ "OCC": 287,
532
+ "[Cd+2": 672,
533
+ "[I-": 529,
534
+ "[In]": 1053,
535
+ "[Al+3]": 1051,
536
+ "c1n": 281,
537
+ "CC1": 308,
538
+ "+": 195,
539
+ "C(O)": 276,
540
+ "c(O)": 425,
541
+ "Cl)": 340,
542
+ "[Co": 577,
543
+ "[C+2]": 1037,
544
+ "c12": 419,
545
+ "C(F)": 358,
546
+ "[Hg]": 851,
547
+ "4)": 339,
548
+ "[Sm+": 725,
549
+ "[Zn+]": 880,
550
+ "3OC(n4cnc5c(N)ncnc54)": 479,
551
+ "[Mn]": 907,
552
+ "[I-]": 837,
553
+ "[N+": 488,
554
+ "[Ni+3": 723,
555
+ "[E": 730,
556
+ "[I+]": 945,
557
+ "[O-": 489,
558
+ "[Ce+": 668,
559
+ "[Sn": 572,
560
+ "O)": 257,
561
+ "[Eu+2]": 1000,
562
+ "8": 206,
563
+ "[Tb+": 746,
564
+ "[Zr]": 1004,
565
+ "c2c1": 437,
566
+ "cs": 410,
567
+ "[OH+2": 761,
568
+ "C3": 315,
569
+ "c1cc": 267,
570
+ "h": 244,
571
+ "nc2": 465,
572
+ "ncc": 442,
573
+ "[Ir+]": 1056,
574
+ "c4cccc": 439,
575
+ "[Sb]": 833,
576
+ "[NH-]": 829,
577
+ "n(": 364,
578
+ "CCC1": 475,
579
+ "[Pt+": 673,
580
+ "[Be]": 987,
581
+ "[Eu+3": 768,
582
+ "[Mo-": 742,
583
+ "L": 221,
584
+ "[P]": 898,
585
+ "[PH": 513,
586
+ "n1": 303,
587
+ "[Mg+": 658,
588
+ "[Nb-2]": 924,
589
+ "[Au+]": 978,
590
+ "[Mn+": 656,
591
+ "c2ccccc2": 323,
592
+ "%13": 910,
593
+ "ncn2": 474,
594
+ "[O": 318,
595
+ "[Pr+3": 784,
596
+ "1": 199,
597
+ "CS": 408,
598
+ "[Ba+2": 685,
599
+ "[Be": 674,
600
+ "[Cl+3]": 878,
601
+ ".": 197,
602
+ "[Ir": 708,
603
+ "[NH2+]": 817,
604
+ "[Pr+3]": 1039,
605
+ "[C]": 888,
606
+ "[Pt-": 615,
607
+ "[NaH": 801,
608
+ "[Tc": 635,
609
+ "[Te]": 875,
610
+ "[V+3]": 1022,
611
+ "[Zr": 739,
612
+ "[se": 517,
613
+ "[Sc": 758,
614
+ "N)": 290,
615
+ "[Mg": 382,
616
+ "[Ag": 623,
617
+ "[Tb]": 1036,
618
+ "[nH+": 501,
619
+ "[o+": 510,
620
+ "c": 239,
621
+ "[Ni+]": 973,
622
+ "[V-]": 1020,
623
+ "[Lu+3]": 996,
624
+ "[Ag+": 655,
625
+ "(=O)": 264,
626
+ "[SH2]": 1032,
627
+ "c3)": 422,
628
+ "[Au": 576,
629
+ ">": 210,
630
+ "[Ru+": 706,
631
+ "[Se": 500,
632
+ "[te+]": 949,
633
+ "cc(": 338,
634
+ "[Os]": 876,
635
+ "[Ag-4]": 1060,
636
+ "[Sm": 724,
637
+ "[cH+": 650,
638
+ "[Er": 752,
639
+ "[t": 633,
640
+ "[Fe+2": 591,
641
+ "[Fe+3": 664,
642
+ "CC(O)": 362,
643
+ "[Dy": 765,
644
+ "[M": 348,
645
+ "[Ir+3": 710,
646
+ "[BH3-]": 899,
647
+ "[Mo]": 947,
648
+ "[Ni": 573,
649
+ "[N]": 962,
650
+ "[Se+]": 946,
651
+ "C(=O)O": 386,
652
+ "[In": 798,
653
+ "C2)": 460,
654
+ "c(C)": 377,
655
+ "[Be-2": 713,
656
+ "[Gd-4": 549,
657
+ "[Nd+": 792,
658
+ "(F)": 327,
659
+ "c3ccccc3": 342,
660
+ "[N+]": 379,
661
+ "[OH+]": 822,
662
+ "[cH": 542,
663
+ "[Ni+3]": 997,
664
+ "[Lu": 598,
665
+ "=O)": 260,
666
+ "[NH4+]": 935,
667
+ "nc(": 321,
668
+ "c1ncn": 314,
669
+ "[NH]": 891,
670
+ "H]": 293,
671
+ "C(O)C(O)": 426,
672
+ "[Pd": 588,
673
+ "[SiH3": 558,
674
+ "[N+](=O)": 446,
675
+ "%17": 914,
676
+ "CO": 274,
677
+ "[Ar]": 896,
678
+ "[Cd": 670,
679
+ "[Ga+3]": 980,
680
+ "C)": 265,
681
+ "[Pt-2]": 925,
682
+ "[Se]": 818,
683
+ "[Rh+3": 749,
684
+ "[Y": 695,
685
+ "%25": 922,
686
+ "[Mo-]": 1007,
687
+ "c1)": 411,
688
+ "c1ccccc1)": 453,
689
+ "Cl)cc": 395,
690
+ "[Al-3]": 927,
691
+ "[Sm+3]": 998,
692
+ "[Be-": 675,
693
+ "[Sb+]": 974,
694
+ "[B-": 532,
695
+ "[Tl": 690,
696
+ "c1ncnc2": 352,
697
+ "*": 194,
698
+ "[Os+4": 741,
699
+ "[N+2": 601,
700
+ "[SiH2]": 859,
701
+ "[Y+3": 809,
702
+ "y": 255,
703
+ "OP(=O)(O)OCC": 440,
704
+ "3OC(n4": 463,
705
+ "%12": 857,
706
+ "%20": 917,
707
+ "P": 225,
708
+ "[Au+3]": 1017,
709
+ "[Ho+": 736,
710
+ "[OH2+]": 867,
711
+ "[Y+3]": 1065,
712
+ "0.2": 486,
713
+ "[CH-]": 831,
714
+ "[Br+2]": 941,
715
+ "[CH2]": 885,
716
+ "[SeH": 562,
717
+ "D": 214,
718
+ "[A": 533,
719
+ "[Be+2]": 1027,
720
+ "[CH2": 540,
721
+ "(": 192,
722
+ "[Hg+]": 1064,
723
+ "<-": 862,
724
+ "[Li+": 629,
725
+ "[NH3": 491,
726
+ "[Nd+3]": 1050,
727
+ "B": 212,
728
+ "[Cr]": 1030,
729
+ "[Cs": 692,
730
+ "[Er+3]": 1015,
731
+ "C=": 316,
732
+ "[Eu+3]": 1025,
733
+ "[Fe-2": 647,
734
+ "C(=O)": 266,
735
+ "[NH+": 493,
736
+ "[O-])": 341,
737
+ "[Ir+3]": 985,
738
+ "[Te": 580,
739
+ "[BH-]": 908,
740
+ "[Cl+2]": 943,
741
+ "[Ti+4": 778,
742
+ "Z": 234,
743
+ "2)c1": 434,
744
+ "S": 227,
745
+ "[Ti+4]": 1033,
746
+ "[V+": 762,
747
+ "[P-": 628,
748
+ "[Zn-2]": 1062,
749
+ "c2n": 375,
750
+ "[Gd-5]": 850,
751
+ "[n+]": 469,
752
+ "n2": 329,
753
+ "(C": 441,
754
+ "[HH": 807,
755
+ "[Zn-": 805,
756
+ "[P": 512,
757
+ "[S-": 556,
758
+ "[Tl+": 691,
759
+ "Cc1": 409,
760
+ "[P-]": 932,
761
+ "[Cr+": 702,
762
+ "[I": 528,
763
+ "[U]": 993,
764
+ "[Al-": 620,
765
+ "[W": 593,
766
+ "[SeH]": 855,
767
+ ")N": 374,
768
+ "[R": 537,
769
+ "[Sc-]": 1019,
770
+ "[SiH2": 563,
771
+ "[cH-]": 845,
772
+ "[te": 634,
773
+ "[Na]": 856,
774
+ "[V+2]": 1031,
775
+ "c3ccc(": 353,
776
+ "c2)": 361,
777
+ "m": 247,
778
+ "n+2]": 392,
779
+ "p": 250,
780
+ "F)": 282,
781
+ "N": 223,
782
+ "[Ba+2]": 971,
783
+ "[Cs+]": 977,
784
+ "[Er+": 753,
785
+ "[H+": 757,
786
+ "[Cu+]": 956,
787
+ "[Cu-2]": 1042,
788
+ "[Hg-2": 797,
789
+ "[OH+": 506,
790
+ "[Os": 581,
791
+ "[Os+4]": 1006,
792
+ "[Hf": 717,
793
+ "[Pt+2": 734,
794
+ "[Ru": 538,
795
+ "[S]": 890,
796
+ "[Mn": 605,
797
+ "[Sr": 638,
798
+ "c3ccccc3)": 449,
799
+ "C1O": 335,
800
+ "n4": 429,
801
+ "[Au+3": 756,
802
+ "cn": 270,
803
+ "[Sb+3]": 975,
804
+ "CC(": 337,
805
+ "CC(C)": 334,
806
+ "[Pt+]": 965,
807
+ "[V+2": 774,
808
+ "[Z": 432,
809
+ "[nH": 487,
810
+ "[Be+2": 770,
811
+ "[Se-": 630,
812
+ "[Mn+3": 705,
813
+ "[": 235,
814
+ "[C+": 502,
815
+ "CCCN": 376,
816
+ "T": 228,
817
+ "[Eu+2": 733,
818
+ "[Fe+]": 1054,
819
+ "[NH+]": 814,
820
+ "%14": 911,
821
+ "[Os+3]": 1040,
822
+ "[Be+": 769,
823
+ "N1": 445,
824
+ "[Cr+]": 1011,
825
+ "[Ru+2]": 984,
826
+ "[Sm+3": 726,
827
+ "[Y]": 989,
828
+ "[c": 503,
829
+ "P(=O)(O)": 279,
830
+ "[CaH": 653,
831
+ "[Sc-": 759,
832
+ "[n": 292,
833
+ "c2cccc": 301,
834
+ "s": 252,
835
+ "[BiH3": 561,
836
+ "[IH]": 1044,
837
+ "[CaH2": 654,
838
+ "[se]": 830,
839
+ "[SeH2": 720,
840
+ "[Cl": 523,
841
+ "[Mn+3]": 983,
842
+ "CC": 258,
843
+ "O=": 286,
844
+ "[Cl-]": 835,
845
+ "C2O)": 360,
846
+ "G": 217,
847
+ "[K": 575,
848
+ "[Ce+3": 669,
849
+ "[Xe]": 967,
850
+ "[P-2]": 1048,
851
+ "[V": 569,
852
+ "[Os+3": 785,
853
+ "[Zn+2]": 433,
854
+ "[Fe-": 617,
855
+ "[Pt-]": 1045,
856
+ "[Hg+2]": 959,
857
+ "c(=O)": 313,
858
+ "[SH]": 864,
859
+ "[CH2+]": 887,
860
+ "c2cc": 272,
861
+ "[Gd-": 548,
862
+ "c2ccccc2)": 393,
863
+ "c4ccccc4": 470,
864
+ "[O-]": 319,
865
+ "OC)": 350,
866
+ "l": 246,
867
+ "%19": 916,
868
+ "[Ru+3": 755,
869
+ "[c+": 544,
870
+ "[Cu]": 874,
871
+ "[Pt+2]": 1001,
872
+ "[La+3]": 970,
873
+ "n": 248,
874
+ "n2)": 483,
875
+ "n3": 406,
876
+ "%11": 848,
877
+ "[H": 551,
878
+ "[CH+]": 841,
879
+ "cnc5": 457,
880
+ "[La": 680,
881
+ "r": 251,
882
+ "NC(=O)": 298,
883
+ "C": 213,
884
+ "=": 209,
885
+ "OC": 412
886
+ }