VicenteAlex commited on
Commit
b66ca3e
·
verified ·
1 Parent(s): 63d86af

Updated large model + tokenizer

Browse files

Use with tokenizer_aa--ABPE_SMILES

checkpoint-90000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/t5-efficient-large",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 4096,
8
+ "d_kv": 64,
9
+ "d_model": 1024,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 24,
22
+ "num_heads": 16,
23
+ "num_layers": 24,
24
+ "pad_token_id": 0,
25
+ "relative_attention_max_distance": 128,
26
+ "relative_attention_num_buckets": 32,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.49.0",
29
+ "use_cache": true,
30
+ "vocab_size": 916
31
+ }
checkpoint-90000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.49.0"
7
+ }
checkpoint-90000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49aa81e20d1e9c2ca6ee6e94c8f9940e27c9857d3d00639a2b974157a9e2d5d8
3
+ size 5646096481
checkpoint-90000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852ab527d6c8685c3661438ae205ba397d546d6c7fd5b9c9bd140beddbf36f31
3
+ size 2823004606
checkpoint-90000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aec490819c79a6f63fe57d7717945c2a62edd5bec5f7f87346f70d3f850fcb85
3
+ size 15984
checkpoint-90000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37e065b2911a334ed5a5026117629aa110be9d775100538d0c01453f82eee98d
3
+ size 15984
checkpoint-90000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f180712fe8cfdf84e5cb2ab7db18345adca83cd77ffb7ba8b0e6723e69a704d
3
+ size 15984
checkpoint-90000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f335f56641200416b7a555d1ecaa826c766e27375d7e530587882b2ff987415b
3
+ size 15984
checkpoint-90000/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b352deb29d54c538722b4b515ad977da373b52f817f296fadd884c68f611122f
3
+ size 15984
checkpoint-90000/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce9bf4d9e7df4bfb9d45b8d968d81b95d6ac022db70b2022784dbbc7efb2fee8
3
+ size 15984
checkpoint-90000/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc2336a849dd388832803ca1ae1551ee679915f7a474cc3e3fe34ceb7c7ddc02
3
+ size 15984
checkpoint-90000/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67d9c360ffd4194eb0856cbfed8e7ae8723cd696836761c8bdf375b97d527937
3
+ size 15984
checkpoint-90000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13808520a5a17a0cf056d8a9302f9e37bbae12098e8c1c6aabbc4c8fec47971c
3
+ size 1064
checkpoint-90000/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
checkpoint-90000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-90000/tokenizer_config.json ADDED
@@ -0,0 +1,940 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": null,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<extra_id_99>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<extra_id_98>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<extra_id_97>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": "<extra_id_96>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<extra_id_95>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<extra_id_94>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<extra_id_93>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<extra_id_92>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<extra_id_91>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<extra_id_90>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<extra_id_89>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<extra_id_88>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<extra_id_87>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<extra_id_86>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "17": {
141
+ "content": "<extra_id_85>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "18": {
149
+ "content": "<extra_id_84>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "19": {
157
+ "content": "<extra_id_83>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "20": {
165
+ "content": "<extra_id_82>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "21": {
173
+ "content": "<extra_id_81>",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "22": {
181
+ "content": "<extra_id_80>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "23": {
189
+ "content": "<extra_id_79>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "24": {
197
+ "content": "<extra_id_78>",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "25": {
205
+ "content": "<extra_id_77>",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "26": {
213
+ "content": "<extra_id_76>",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "27": {
221
+ "content": "<extra_id_75>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "28": {
229
+ "content": "<extra_id_74>",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "29": {
237
+ "content": "<extra_id_73>",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "30": {
245
+ "content": "<extra_id_72>",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "31": {
253
+ "content": "<extra_id_71>",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "32": {
261
+ "content": "<extra_id_70>",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "33": {
269
+ "content": "<extra_id_69>",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "34": {
277
+ "content": "<extra_id_68>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "35": {
285
+ "content": "<extra_id_67>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "36": {
293
+ "content": "<extra_id_66>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "37": {
301
+ "content": "<extra_id_65>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "38": {
309
+ "content": "<extra_id_64>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "39": {
317
+ "content": "<extra_id_63>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "40": {
325
+ "content": "<extra_id_62>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "41": {
333
+ "content": "<extra_id_61>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "42": {
341
+ "content": "<extra_id_60>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "43": {
349
+ "content": "<extra_id_59>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "44": {
357
+ "content": "<extra_id_58>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "45": {
365
+ "content": "<extra_id_57>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "46": {
373
+ "content": "<extra_id_56>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "47": {
381
+ "content": "<extra_id_55>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "48": {
389
+ "content": "<extra_id_54>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "49": {
397
+ "content": "<extra_id_53>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "50": {
405
+ "content": "<extra_id_52>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "51": {
413
+ "content": "<extra_id_51>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "52": {
421
+ "content": "<extra_id_50>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "53": {
429
+ "content": "<extra_id_49>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "54": {
437
+ "content": "<extra_id_48>",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "55": {
445
+ "content": "<extra_id_47>",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "56": {
453
+ "content": "<extra_id_46>",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "57": {
461
+ "content": "<extra_id_45>",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "58": {
469
+ "content": "<extra_id_44>",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "59": {
477
+ "content": "<extra_id_43>",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "60": {
485
+ "content": "<extra_id_42>",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "61": {
493
+ "content": "<extra_id_41>",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "62": {
501
+ "content": "<extra_id_40>",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "63": {
509
+ "content": "<extra_id_39>",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "64": {
517
+ "content": "<extra_id_38>",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "65": {
525
+ "content": "<extra_id_37>",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "66": {
533
+ "content": "<extra_id_36>",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "67": {
541
+ "content": "<extra_id_35>",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "68": {
549
+ "content": "<extra_id_34>",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "69": {
557
+ "content": "<extra_id_33>",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "70": {
565
+ "content": "<extra_id_32>",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "71": {
573
+ "content": "<extra_id_31>",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "72": {
581
+ "content": "<extra_id_30>",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "73": {
589
+ "content": "<extra_id_29>",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "74": {
597
+ "content": "<extra_id_28>",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "75": {
605
+ "content": "<extra_id_27>",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "76": {
613
+ "content": "<extra_id_26>",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "77": {
621
+ "content": "<extra_id_25>",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "78": {
629
+ "content": "<extra_id_24>",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "79": {
637
+ "content": "<extra_id_23>",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "80": {
645
+ "content": "<extra_id_22>",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "81": {
653
+ "content": "<extra_id_21>",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "82": {
661
+ "content": "<extra_id_20>",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "83": {
669
+ "content": "<extra_id_19>",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "84": {
677
+ "content": "<extra_id_18>",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "85": {
685
+ "content": "<extra_id_17>",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "86": {
693
+ "content": "<extra_id_16>",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "87": {
701
+ "content": "<extra_id_15>",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "88": {
709
+ "content": "<extra_id_14>",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "89": {
717
+ "content": "<extra_id_13>",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "90": {
725
+ "content": "<extra_id_12>",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "91": {
733
+ "content": "<extra_id_11>",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "92": {
741
+ "content": "<extra_id_10>",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "93": {
749
+ "content": "<extra_id_9>",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "94": {
757
+ "content": "<extra_id_8>",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "95": {
765
+ "content": "<extra_id_7>",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "96": {
773
+ "content": "<extra_id_6>",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "97": {
781
+ "content": "<extra_id_5>",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "98": {
789
+ "content": "<extra_id_4>",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "99": {
797
+ "content": "<extra_id_3>",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "100": {
805
+ "content": "<extra_id_2>",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "101": {
813
+ "content": "<extra_id_1>",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "102": {
821
+ "content": "<extra_id_0>",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ }
828
+ },
829
+ "additional_special_tokens": [
830
+ "<extra_id_0>",
831
+ "<extra_id_1>",
832
+ "<extra_id_2>",
833
+ "<extra_id_3>",
834
+ "<extra_id_4>",
835
+ "<extra_id_5>",
836
+ "<extra_id_6>",
837
+ "<extra_id_7>",
838
+ "<extra_id_8>",
839
+ "<extra_id_9>",
840
+ "<extra_id_10>",
841
+ "<extra_id_11>",
842
+ "<extra_id_12>",
843
+ "<extra_id_13>",
844
+ "<extra_id_14>",
845
+ "<extra_id_15>",
846
+ "<extra_id_16>",
847
+ "<extra_id_17>",
848
+ "<extra_id_18>",
849
+ "<extra_id_19>",
850
+ "<extra_id_20>",
851
+ "<extra_id_21>",
852
+ "<extra_id_22>",
853
+ "<extra_id_23>",
854
+ "<extra_id_24>",
855
+ "<extra_id_25>",
856
+ "<extra_id_26>",
857
+ "<extra_id_27>",
858
+ "<extra_id_28>",
859
+ "<extra_id_29>",
860
+ "<extra_id_30>",
861
+ "<extra_id_31>",
862
+ "<extra_id_32>",
863
+ "<extra_id_33>",
864
+ "<extra_id_34>",
865
+ "<extra_id_35>",
866
+ "<extra_id_36>",
867
+ "<extra_id_37>",
868
+ "<extra_id_38>",
869
+ "<extra_id_39>",
870
+ "<extra_id_40>",
871
+ "<extra_id_41>",
872
+ "<extra_id_42>",
873
+ "<extra_id_43>",
874
+ "<extra_id_44>",
875
+ "<extra_id_45>",
876
+ "<extra_id_46>",
877
+ "<extra_id_47>",
878
+ "<extra_id_48>",
879
+ "<extra_id_49>",
880
+ "<extra_id_50>",
881
+ "<extra_id_51>",
882
+ "<extra_id_52>",
883
+ "<extra_id_53>",
884
+ "<extra_id_54>",
885
+ "<extra_id_55>",
886
+ "<extra_id_56>",
887
+ "<extra_id_57>",
888
+ "<extra_id_58>",
889
+ "<extra_id_59>",
890
+ "<extra_id_60>",
891
+ "<extra_id_61>",
892
+ "<extra_id_62>",
893
+ "<extra_id_63>",
894
+ "<extra_id_64>",
895
+ "<extra_id_65>",
896
+ "<extra_id_66>",
897
+ "<extra_id_67>",
898
+ "<extra_id_68>",
899
+ "<extra_id_69>",
900
+ "<extra_id_70>",
901
+ "<extra_id_71>",
902
+ "<extra_id_72>",
903
+ "<extra_id_73>",
904
+ "<extra_id_74>",
905
+ "<extra_id_75>",
906
+ "<extra_id_76>",
907
+ "<extra_id_77>",
908
+ "<extra_id_78>",
909
+ "<extra_id_79>",
910
+ "<extra_id_80>",
911
+ "<extra_id_81>",
912
+ "<extra_id_82>",
913
+ "<extra_id_83>",
914
+ "<extra_id_84>",
915
+ "<extra_id_85>",
916
+ "<extra_id_86>",
917
+ "<extra_id_87>",
918
+ "<extra_id_88>",
919
+ "<extra_id_89>",
920
+ "<extra_id_90>",
921
+ "<extra_id_91>",
922
+ "<extra_id_92>",
923
+ "<extra_id_93>",
924
+ "<extra_id_94>",
925
+ "<extra_id_95>",
926
+ "<extra_id_96>",
927
+ "<extra_id_97>",
928
+ "<extra_id_98>",
929
+ "<extra_id_99>"
930
+ ],
931
+ "clean_up_tokenization_spaces": false,
932
+ "eos_token": "</s>",
933
+ "extra_ids": 100,
934
+ "extra_special_tokens": {},
935
+ "model_max_length": 1000000000000000019884624838656,
936
+ "pad_token": "<pad>",
937
+ "sp_model_kwargs": {},
938
+ "tokenizer_class": "T5TokenizerFast",
939
+ "unk_token": "<unk>"
940
+ }
checkpoint-90000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-90000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b836e9efdf191d1692da0b6716c415a3c5104c52b2aa29a4463befc8d62aaa5
3
+ size 5752
tokenizer_aa--ABPE_SMILES/tokenizer_ABPE_rexzyme_offset/merges.txt ADDED
@@ -0,0 +1,881 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ [ C
3
+ [C @
4
+ [C@ H
5
+ [C@H ]
6
+ [ O
7
+ [O -
8
+ [O- ]
9
+ [ C
10
+ [C @
11
+ [C@ @
12
+ [C@@ H
13
+ [C@@H ]
14
+ [ H
15
+ [H +
16
+ [H+ ]
17
+ [ n
18
+ [n H
19
+ [nH ]
20
+ [ S
21
+ [S +
22
+ [S+ ]
23
+ [ N
24
+ [N H
25
+ [NH 3
26
+ [NH3 +
27
+ [NH3+ ]
28
+ [ N
29
+ [N @
30
+ [N@ +
31
+ [N@+ ]
32
+ [ n
33
+ [n +
34
+ [n+ ]
35
+ [ P
36
+ [P ]
37
+ [ H
38
+ [H ]
39
+ [ C
40
+ [C @
41
+ [C@ @
42
+ [C@@ ]
43
+ [ C
44
+ [C @
45
+ [C@ ]
46
+ [ N
47
+ [N H
48
+ [NH 2
49
+ [NH2 +
50
+ [NH2+ ]
51
+ [ n
52
+ [n -
53
+ [n- ]
54
+ [ N
55
+ [N a
56
+ [Na +
57
+ [Na+ ]
58
+ [ N
59
+ [N +
60
+ [N+ ]
61
+ [ K
62
+ [K +
63
+ [K+ ]
64
+ [ F
65
+ [F e
66
+ [Fe ]
67
+ [ F
68
+ [F e
69
+ [Fe +
70
+ [Fe+ ]
71
+ [ S
72
+ [S H
73
+ [SH -
74
+ [SH- ]
75
+ [ N
76
+ [N H
77
+ [NH 4
78
+ [NH4 +
79
+ [NH4+ ]
80
+ [ C
81
+ [C o
82
+ [Co -
83
+ [Co- 2
84
+ [Co-2 ]
85
+ [ N
86
+ [N H
87
+ [NH 2
88
+ [NH2 -
89
+ [NH2- ]
90
+ [ 1
91
+ [1 *
92
+ [1* ]
93
+ [ N
94
+ [N H
95
+ [NH +
96
+ [NH+ ]
97
+ [ 2
98
+ [2 *
99
+ [2* ]
100
+ [ C
101
+ [C a
102
+ [Ca +
103
+ [Ca+ 2
104
+ [Ca+2 ]
105
+ [ 3
106
+ [3 *
107
+ [3* ]
108
+ [ 4
109
+ [4 *
110
+ [4* ]
111
+ C l
112
+ [ o
113
+ [o +
114
+ [o+ ]
115
+ [ M
116
+ [M g
117
+ [Mg -
118
+ [Mg- 2
119
+ [Mg-2 ]
120
+ [ C
121
+ [C -
122
+ [C- ]
123
+ % 1
124
+ %1 0
125
+ [ F
126
+ [F e
127
+ [Fe +
128
+ [Fe+ 3
129
+ [Fe+3 ]
130
+ [ F
131
+ [F e
132
+ [Fe +
133
+ [Fe+ 2
134
+ [Fe+2 ]
135
+ B r
136
+ [ C
137
+ [C u
138
+ [Cu ]
139
+ [ F
140
+ [F -
141
+ [F- ]
142
+ [ M
143
+ [M n
144
+ [Mn +
145
+ [Mn+ 2
146
+ [Mn+2 ]
147
+ [ S
148
+ [S H
149
+ [SH ]
150
+ [ S
151
+ [S -
152
+ [S- ]
153
+ [ F
154
+ [F e
155
+ [Fe -
156
+ [Fe- 2
157
+ [Fe-2 ]
158
+ [ S
159
+ [S @
160
+ [S@ ]
161
+ [ S
162
+ [S e
163
+ [Se ]
164
+ [ C
165
+ [C l
166
+ [Cl -
167
+ [Cl- ]
168
+ [ S
169
+ [S ]
170
+ [ C
171
+ [C o
172
+ [Co +
173
+ [Co+ ]
174
+ [ C
175
+ [C o
176
+ [Co +
177
+ [Co+ 2
178
+ [Co+2 ]
179
+ [ C
180
+ [C o
181
+ [Co -
182
+ [Co- 3
183
+ [Co-3 ]
184
+ [ C
185
+ [C d
186
+ [Cd +
187
+ [Cd+ 2
188
+ [Cd+2 ]
189
+ [ A
190
+ [A s
191
+ [As ]
192
+ [ B
193
+ [B r
194
+ [Br -
195
+ [Br- ]
196
+ [ C
197
+ [C o
198
+ [Co -
199
+ [Co- 4
200
+ [Co-4 ]
201
+ [ F
202
+ [F e
203
+ [Fe -
204
+ [Fe- 3
205
+ [Fe-3 ]
206
+ [ M
207
+ [M g
208
+ [Mg +
209
+ [Mg+ +
210
+ [Mg++ ]
211
+ [ N
212
+ [N ]
213
+ [ O
214
+ [O +
215
+ [O+ ]
216
+ [ C
217
+ [C u
218
+ [Cu +
219
+ [Cu+ 2
220
+ [Cu+2 ]
221
+ [ C
222
+ [C u
223
+ [Cu +
224
+ [Cu+ ]
225
+ [ P
226
+ [P b
227
+ [Pb +
228
+ [Pb+ 2
229
+ [Pb+2 ]
230
+ [ F
231
+ [F e
232
+ [Fe -
233
+ [Fe- ]
234
+ [ M
235
+ [M g
236
+ [Mg +
237
+ [Mg+ 2
238
+ [Mg+2 ]
239
+ [ n
240
+ [n H
241
+ [nH +
242
+ [nH+ ]
243
+ [ M
244
+ [M o
245
+ [Mo ]
246
+ [ R
247
+ [R b
248
+ [Rb +
249
+ [Rb+ ]
250
+ [ C
251
+ [C H
252
+ [CH +
253
+ [CH+ ]
254
+ % 1
255
+ %1 1
256
+ % 1
257
+ %1 2
258
+ % 1
259
+ %1 3
260
+ % 1
261
+ %1 4
262
+ % 1
263
+ %1 5
264
+ % 1
265
+ %1 6
266
+ % 1
267
+ %1 7
268
+ % 1
269
+ %1 8
270
+ % 1
271
+ %1 9
272
+ % 2
273
+ %2 0
274
+ % 2
275
+ %2 1
276
+ % 2
277
+ %2 2
278
+ % 2
279
+ %2 3
280
+ % 2
281
+ %2 4
282
+ % 2
283
+ %2 5
284
+ % 2
285
+ %2 6
286
+ % 2
287
+ %2 7
288
+ % 2
289
+ %2 8
290
+ % 2
291
+ %2 9
292
+ % 3
293
+ %3 0
294
+ % 3
295
+ %3 1
296
+ % 3
297
+ %3 2
298
+ % 3
299
+ %3 3
300
+ % 3
301
+ %3 4
302
+ % 3
303
+ %3 5
304
+ % 3
305
+ %3 6
306
+ % 3
307
+ %3 7
308
+ % 3
309
+ %3 8
310
+ [ N
311
+ [N i
312
+ [Ni +
313
+ [Ni+ 2
314
+ [Ni+2 ]
315
+ [ S
316
+ [S @
317
+ [S@ @
318
+ [S@@ +
319
+ [S@@+ ]
320
+ [ P
321
+ [P H
322
+ [PH ]
323
+ [ *
324
+ [* :
325
+ [*: 0
326
+ [*:0 ]
327
+ [ Z
328
+ [Z n
329
+ [Zn +
330
+ [Zn+ 2
331
+ [Zn+2 ]
332
+ [ A
333
+ [A g
334
+ [Ag +
335
+ [Ag+ ]
336
+ [ S
337
+ [S e
338
+ [Se H
339
+ [SeH -
340
+ [SeH- ]
341
+ [ c
342
+ [c -
343
+ [c- ]
344
+ [ H
345
+ [H g
346
+ [Hg ]
347
+ [ C
348
+ [C l
349
+ [Cl +
350
+ [Cl+ ]
351
+ [ *
352
+ [* -
353
+ [*- ]
354
+ [ O
355
+ [O ]
356
+ [ S
357
+ [S @
358
+ [S@ @
359
+ [S@@ ]
360
+ [ S
361
+ [S e
362
+ [Se H
363
+ [SeH ]
364
+ [ I
365
+ [I n
366
+ [In +
367
+ [In+ 3
368
+ [In+3 ]
369
+ [ S
370
+ [S b
371
+ [Sb ]
372
+ [ c
373
+ [c H
374
+ [cH -
375
+ [cH- ]
376
+ [ S
377
+ [S @
378
+ [S@ +
379
+ [S@+ ]
380
+ [ I
381
+ [I -
382
+ [I- ]
383
+ [ N
384
+ [N @
385
+ [N@ @
386
+ [N@@ H
387
+ [N@@H +
388
+ [N@@H+ ]
389
+ [ N
390
+ [N -
391
+ [N- ]
392
+ [ C
393
+ [C l
394
+ [Cl +
395
+ [Cl+ 2
396
+ [Cl+2 ]
397
+ [ F
398
+ [F e
399
+ [Fe +
400
+ [Fe+ 4
401
+ [Fe+4 ]
402
+ [ N
403
+ [N i
404
+ [Ni -
405
+ [Ni- 2
406
+ [Ni-2 ]
407
+ [ M
408
+ [M n
409
+ [Mn +
410
+ [Mn+ 3
411
+ [Mn+3 ]
412
+ [ C
413
+ [C s
414
+ [Cs +
415
+ [Cs+ ]
416
+ [ T
417
+ [T e
418
+ [Te ]
419
+ [ A
420
+ [A s
421
+ [As H
422
+ [AsH 2
423
+ [AsH2 ]
424
+ [ C
425
+ [C r
426
+ [Cr +
427
+ [Cr+ 6
428
+ [Cr+6 ]
429
+ [ C
430
+ [C r
431
+ [Cr +
432
+ [Cr+ 3
433
+ [Cr+3 ]
434
+ [ M
435
+ [M n
436
+ [Mn ]
437
+ [ L
438
+ [L i
439
+ [Li +
440
+ [Li+ ]
441
+ [ 9
442
+ [9 *
443
+ [9* ]
444
+ [ 8
445
+ [8 *
446
+ [8* ]
447
+ [ 7
448
+ [7 *
449
+ [7* ]
450
+ [ 6
451
+ [6 *
452
+ [6* ]
453
+ [ 5
454
+ [5 *
455
+ [5* ]
456
+ [ M
457
+ [M g
458
+ [Mg ]
459
+ < -
460
+ [ N
461
+ [N @
462
+ [N@ H
463
+ [N@H +
464
+ [N@H+ ]
465
+ [ S
466
+ [S i
467
+ [Si ]
468
+ [ W
469
+ [W ]
470
+ [ S
471
+ [S -
472
+ [S- -
473
+ [S-- ]
474
+ [ C
475
+ [C H
476
+ [CH ]
477
+ [ B
478
+ [B -
479
+ [B- ]
480
+ [ N
481
+ [N @
482
+ [N@ @
483
+ [N@@ +
484
+ [N@@+ ]
485
+ [ H
486
+ [H g
487
+ [Hg +
488
+ [Hg+ 2
489
+ [Hg+2 ]
490
+ [ S
491
+ [S e
492
+ [Se -
493
+ [Se- ]
494
+ [ C
495
+ [C H
496
+ [CH -
497
+ [CH- ]
498
+ [ C
499
+ [C o
500
+ [Co +
501
+ [Co+ 3
502
+ [Co+3 ]
503
+ [ C
504
+ [C H
505
+ [CH 2
506
+ [CH2 -
507
+ [CH2- ]
508
+ [ P
509
+ [P +
510
+ [P+ ]
511
+ [ C
512
+ [C o
513
+ [Co +
514
+ [Co+ +
515
+ [Co++ ]
516
+ [ N
517
+ [N i
518
+ [Ni -
519
+ [Ni- ]
520
+ [ O
521
+ [O H
522
+ [OH -
523
+ [OH- ]
524
+ [ P
525
+ [P H
526
+ [PH 2
527
+ [PH2 ]
528
+ [ O
529
+ [O H
530
+ [OH 2
531
+ [OH2 +
532
+ [OH2+ ]
533
+ [ C
534
+ [C l
535
+ [Cl ]
536
+ [ H
537
+ [H g
538
+ [Hg +
539
+ [Hg+ ]
540
+ [ C
541
+ [C H
542
+ [CH 2
543
+ [CH2 +
544
+ [CH2+ ]
545
+ H ]
546
+ O )
547
+ [ C
548
+ [C @
549
+ = O)
550
+ - ]
551
+ [ O
552
+ [O -]
553
+ ( =O)
554
+ H] (
555
+ [O-] )
556
+ [C@ @
557
+ ( [O-])
558
+ C C
559
+ n c
560
+ O P
561
+ (=O) ([O-])
562
+ OP (=O)([O-])
563
+ [C@ H](
564
+ [C@@ H](
565
+ H] 1
566
+ C (=O)
567
+ c 1
568
+ H]1 O
569
+ + ]
570
+ C )
571
+ [C@@H]( O)
572
+ C (
573
+ C OP(=O)([O-])
574
+ . [
575
+ c1 nc
576
+ c (
577
+ [C@H]( O)
578
+ n 2
579
+ [C@ H]1O
580
+ H] 2
581
+ COP(=O)([O-]) OP(=O)([O-])
582
+ > >
583
+ [C@@ H]1O
584
+ . O
585
+ = C
586
+ H +]
587
+ .[ H+]
588
+ c c
589
+ [O-]) [C@@H](O)
590
+ N )
591
+ [C@H]( COP(=O)([O-])OP(=O)([O-])
592
+ nc 3
593
+ O C
594
+ N c1nc
595
+ [ n
596
+ N H
597
+ c( N)
598
+ nc 2
599
+ H] 3
600
+ c(N) nc
601
+ * )
602
+ [O-])[C@@H](O) [C@H]1O
603
+ 3 +]
604
+ NH 3+]
605
+ c1nc n2
606
+ nc2 c1ncn2
607
+ nc2c1ncn2 [C@@H]1O
608
+ [C@ H]2
609
+ Nc1nc nc2c1ncn2[C@@H]1O
610
+ C( C)
611
+ = O
612
+ C(=O) [O-]
613
+ CC CC
614
+ [C@@ H]2
615
+ N C(=O)
616
+ CC /
617
+ nc 5
618
+ ( C)
619
+ C =
620
+ ) [C@H](O)
621
+ C(=O) N
622
+ c (=O)
623
+ C(=O) [O-])
624
+ .O =
625
+ O) [C@@H](O)
626
+ . Nc1ncnc2c1ncn2[C@@H]1O
627
+ [C@@H]2 O
628
+ [ NH3+]
629
+ [C@H]2 O)
630
+ [C@@ H]1
631
+ ([O-]) ([O-])
632
+ P ([O-])([O-])
633
+ C( *)
634
+ CC (=O)
635
+ n2 c
636
+ nc3 2
637
+ [NH3+] )
638
+ c1 cc
639
+ n2c nc3
640
+ c(=O) [n
641
+ C(C) =C
642
+ [C@ H]1
643
+ [C@H]1O [C@@H](
644
+ [C@@H]2O [C@H](COP(=O)([O-])OP(=O)([O-])
645
+ [C@ H]3
646
+ n2cnc3 c(N)nc
647
+ * N
648
+ n2cnc3c(N)nc nc32
649
+ .O= P([O-])([O-])
650
+ .[H+] .[H+]
651
+ O [C@@H](
652
+ [C@H]1O[C@@H]( n2cnc3c(N)ncnc32
653
+ [C@H]1O[C@@H](n2cnc3c(N)ncnc32 )[C@H](O)
654
+ O[C@@H]( n
655
+ C= C(
656
+ [C@@ H]3
657
+ C(*) =O
658
+ .Nc1ncnc2c1ncn2[C@@H]1O [C@H](COP(=O)([O-])OP(=O)([O-])
659
+ ( [C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])
660
+ CC/ C(C)=C
661
+ [C@H]( [NH3+])
662
+ *N [C@@H](
663
+ 4 c
664
+ [C@H]3 O[C@@H](n
665
+ 4c nc5
666
+ O)[C@@H](O) [C@H]2O)
667
+ F e
668
+ [C@H]3O[C@@H](n 4cnc5
669
+ c(N)nc nc5
670
+ c(N)ncnc5 4
671
+ [C@H]3O[C@@H](n4cnc5 c(N)ncnc54
672
+ OC [C@H]3O[C@@H](n4cnc5c(N)ncnc54
673
+ OP(=O)([O-]) [O-])[C@@H](O)[C@H]1O
674
+ OP(=O)([O-]) [O-])
675
+ [C@H]( C
676
+ ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-]) OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54
677
+ [C@@H]3 O)[C@@H](O)[C@H]2O)
678
+ CC/ C=C(
679
+ C) CC/C=C(
680
+ OC [C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)
681
+ C 1
682
+ C(=O)N CC
683
+ c 2
684
+ [C@H]( COP(=O)([O-])
685
+ / CC/C(C)=C
686
+ [C@@H]( CC
687
+ CC (C)
688
+ CC [C@H]([NH3+])
689
+ [n H]
690
+ .[ NH3+]
691
+ c(=O)[n H]1
692
+ .O =C
693
+ C [C@H](O)
694
+ >> *
695
+ * ]
696
+ .Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-]) OP(=O)([O-])[O-])[C@@H](O)[C@H]1O
697
+ /CC/C(C)=C /CC/C(C)=C
698
+ O [C@@H]1
699
+ CC 1
700
+ OP(=O)([O-]) O
701
+ O [C@H]1
702
+ >> O
703
+ C S
704
+ C(*)=O .
705
+ Fe +
706
+ .[ Fe+
707
+ [n +]
708
+ CC NC(=O)
709
+ c(=O)[n H]
710
+ C O)
711
+ .O=P([O-])([O-]) O
712
+ c1cc c
713
+ OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O) [C@@H]1
714
+ 2 c(
715
+ C1 =C
716
+ [C@H]( OP(=O)([O-])[O-])
717
+ ( *)
718
+ COP(=O)([O-])OP(=O)([O-]) OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1
719
+ OP(=O)([O-]) [O-]
720
+ CC C
721
+ [O-])[C@@H](O) [C@H]2O)
722
+ .O=P([O-])([O-]) OP(=O)([O-])O
723
+ / C)CC/C=C(
724
+ (=O) [O-]
725
+ Nc1nc 2c(
726
+ ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54 )[C@H](O)
727
+ OP (*)
728
+ NC(=O) c1ccc
729
+ Nc1ncnc2c1ncn2[C@@H]1O [C@H](COP(=O)([O-])OP(=O)([O-])
730
+ C =C
731
+ ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O) [C@@H]3O)[C@@H](O)[C@H]2O)
732
+ NC(=O) C1=C
733
+ NC(=O)C1=C N
734
+ CCCC CCCC
735
+ C= CC1
736
+ NC(=O)c1ccc [n+]
737
+ C OP(*)
738
+ .O >>
739
+ . C
740
+ O[C@@H]1 COP(*)
741
+ .Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-]) [O-])[C@@H](O)[C@H]1O
742
+ O[C@@H]1COP(*) (=O)[O-]
743
+ n (
744
+ ) [C@H](OP(=O)([O-])[O-])
745
+ ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54 )[C@H](OP(=O)([O-])[O-])
746
+ [C@@H](O) [C@H](
747
+ [C@H](C O)[C@@H](O)
748
+ [ S
749
+ ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-]) [C@@H]3O)[C@@H](O)[C@H]2O)
750
+ /C)CC/C=C( /C)CC/C=C(
751
+ = C(
752
+ [C@@H](CC C(=O)[O-])
753
+ [ Fe
754
+ c( O)
755
+ .O=C ([O-])
756
+ C[C@H](O) [C@@H](
757
+ [C@@H]( C)
758
+ C(=O)NCC S
759
+ C(=O)NCC C(=O)NCCS
760
+ [ N
761
+ [C@H](COP(=O)([O-]) [O-])[C@@H](O)[C@H]1O
762
+ ] .[Fe+
763
+ CC(=O) [O-])
764
+ *] )
765
+ [C@H](O) [C@H](O)
766
+ C)CC/C=C( \
767
+ C(=O)[O-]) C
768
+ c c1
769
+ n 1
770
+ [N +]
771
+ O[C@H]1 [C@@H](O)[C@H](
772
+ C N
773
+ [C@ ]
774
+ c 3
775
+ .[H+].[H+] .[H+].[H+]
776
+ [C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O) [C@@H]1O
777
+ P (=O)([O-])
778
+ O [C@H](CO)[C@@H](O)
779
+ .Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O >>
780
+ / C=C
781
+ CC(=O) N
782
+ OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O) [C@@H]1O
783
+ [C@H]( C)
784
+ c1 c
785
+ C(*) =O)
786
+ COP(=O)([O-]) [O-]
787
+ *N[C@@H]( CS
788
+ P(=O)([O-]) OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O
789
+ [O-])[C@@H](O)[C@H]2O) c(=O)[nH]1
790
+ N) =O)
791
+ 2 )
792
+ C(=O)[O-] >>
793
+ CC(C) (
794
+ C(C) (C)
795
+ COP(=O)([O-]) [O-])
796
+ [S +]
797
+ [S+] (
798
+ OP(=O)([O-]) OC
799
+ C O
800
+ C(=O)[O-])C [C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O
801
+ [S+]( CC[C@H]([NH3+])
802
+ [S+](CC[C@H]([NH3+]) C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O
803
+ ( O)
804
+ CC C(=O)[O-])
805
+ 1 *]
806
+ c1 [nH]
807
+ nc n2
808
+ OP(=O)([O-]) [O-])[C@@H](O)
809
+ C [C@H](
810
+ Nc1nc2c( ncn2
811
+ = P([O-])([O-])
812
+ [C@H](O) C(C)(C)
813
+ CCNC(=O) CCNC(=O)
814
+ S CCNC(=O)CCNC(=O)
815
+ SCCNC(=O)CCNC(=O) [C@H](O)C(C)(C)
816
+ ) C(*)=O.
817
+ c( C)
818
+ SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C) COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1
819
+ C(=O)[O-] .
820
+ C)CC/C=C(\ C)CC/C=C(\
821
+ SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1 OP(=O)([O-])[O-]
822
+ [Fe +]
823
+ CC (
824
+ .[ 1*]
825
+ CC(C)( COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1
826
+ [C@H](C S
827
+ =C ([O-])
828
+ [C@@ ]
829
+ CC[C@H]([NH3+]) C(=O)
830
+ [C@H](CS CC[C@H]([NH3+])C(=O)
831
+ [C@H](CSCC[C@H]([NH3+])C(=O) [O-])[C@@H](O)[C@H]1O
832
+ Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-]) OP(=O)([O-])[O-])[C@@H](O)[C@H]1O
833
+ ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O) c1
834
+ NC(=O)c1ccc[n+] ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1
835
+ OP(=O)([O-])[O-])[C@@H](O) C(=O)NCCC(=O)NCCS
836
+ CC(C)(COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1 OP(=O)([O-])[O-])[C@@H](O)C(=O)NCCC(=O)NCCS
837
+ .Nc1ncnc2c1ncn2[C@@H]1O [C@H](CSCC[C@H]([NH3+])C(=O)[O-])[C@@H](O)[C@H]1O
838
+ \ CC/C(C)=C
839
+ .[H+] >>
840
+ C c1c
841
+ CC(=O) [O-]
842
+ .C [S+](CC[C@H]([NH3+])C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O
843
+ NC(=O)C1=CN ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)
844
+ OP(=O)([O-]) OP(=O)([O-])OC
845
+ CC(=O)N [C@H]1
846
+ /CC/C(C)=C/CC/C(C)=C /CC/C(C)=C/CC/C(C)=C
847
+ *N [C@H](
848
+ O 1
849
+ NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O) C=CC1
850
+ =O) O[C@@H]1COP(*)(=O)[O-]
851
+ C[C@H](O)[C@@H]( COP(=O)([O-])OP(=O)([O-])
852
+ .O=P([O-])([O-])O .[H+]
853
+ .Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O .[H+]
854
+ .Nc1ncnc2c1ncn2[C@@H]1O [C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O
855
+ cc c(=O)[nH]
856
+ O 2)
857
+ ) N
858
+ *N[C@H]( C(*)=O)
859
+ NC(=O)C1=CN ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)
860
+ COP(=O)([O-]) O
861
+ * O[C@H]1[C@@H](O)[C@H](
862
+ [C@@H]2 O)
863
+ *N[C@H](C(*)=O) [C@@H](C)
864
+ c1 O
865
+ NC(=O)c1ccc[n+] ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)
866
+ NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O) c1
867
+ NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O) C=CC1
868
+ Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-]) [O-])[C@@H](O)[C@H]1O
869
+ CC(C) =
870
+ [C@@H]( [NH3+])
871
+ \CC/C(C)=C \CC/C(C)=C
872
+ O[C@H](CO)[C@@H](O) [C@@H]1O
873
+ [C@H](O) [C@@H](O)
874
+ Nc1nc2c( c(=O)[nH]1
875
+ 3 ].[Fe+
876
+ / C(C)=C
877
+ [C@H]2 O[C@@H](n
878
+ =O) [C@H](O)
879
+ 2 ].[Fe+
880
+ [C@@H]2O [C@H](COP(=O)([O-])
881
+ C(=O)N [C@@H](
tokenizer_aa--ABPE_SMILES/tokenizer_ABPE_rexzyme_offset/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "mask_token": {
17
+ "content": "<mask>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "pad_token": {
24
+ "content": "<pad>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer_aa--ABPE_SMILES/tokenizer_ABPE_rexzyme_offset/tokenizer.json ADDED
@@ -0,0 +1,1744 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 185,
8
+ "content": "<s>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 0,
17
+ "content": "<pad>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 1,
26
+ "content": "</s>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 2,
35
+ "content": "<unk>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 189,
44
+ "content": "<mask>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ }
51
+ ],
52
+ "normalizer": null,
53
+ "pre_tokenizer": null,
54
+ "post_processor": {
55
+ "type": "TemplateProcessing",
56
+ "single": [
57
+ {
58
+ "SpecialToken": {
59
+ "id": "<s>",
60
+ "type_id": 0
61
+ }
62
+ },
63
+ {
64
+ "Sequence": {
65
+ "id": "A",
66
+ "type_id": 0
67
+ }
68
+ },
69
+ {
70
+ "SpecialToken": {
71
+ "id": "</s>",
72
+ "type_id": 0
73
+ }
74
+ }
75
+ ],
76
+ "pair": [
77
+ {
78
+ "Sequence": {
79
+ "id": "A",
80
+ "type_id": 0
81
+ }
82
+ },
83
+ {
84
+ "Sequence": {
85
+ "id": "B",
86
+ "type_id": 1
87
+ }
88
+ }
89
+ ],
90
+ "special_tokens": {
91
+ "</s>": {
92
+ "id": "</s>",
93
+ "ids": [
94
+ 1
95
+ ],
96
+ "tokens": [
97
+ "</s>"
98
+ ]
99
+ },
100
+ "<s>": {
101
+ "id": "<s>",
102
+ "ids": [
103
+ 185
104
+ ],
105
+ "tokens": [
106
+ "<s>"
107
+ ]
108
+ }
109
+ }
110
+ },
111
+ "decoder": {
112
+ "type": "Replace",
113
+ "pattern": {
114
+ "String": ""
115
+ },
116
+ "content": ""
117
+ },
118
+ "model": {
119
+ "type": "BPE",
120
+ "dropout": null,
121
+ "unk_token": "<unk>",
122
+ "continuing_subword_prefix": null,
123
+ "end_of_word_suffix": null,
124
+ "fuse_unk": false,
125
+ "byte_fallback": false,
126
+ "ignore_merges": false,
127
+ "vocab": {
128
+ "[Si": 896,
129
+ "[S@@": 834,
130
+ "[C@H](CO)[C@@H](O)": 450,
131
+ "[N@@+]": 717,
132
+ "C(=O)": 269,
133
+ "NH3+]": 307,
134
+ "%37": 670,
135
+ "%36": 669,
136
+ "/CC/C(C)=C": 388,
137
+ "[2*": 770,
138
+ "C[C@H](O)": 396,
139
+ "[AsH": 875,
140
+ "C(=O)[O-])": 326,
141
+ "K": 220,
142
+ "NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)c1": 569,
143
+ "[Fe": 456,
144
+ "[N@@H": 863,
145
+ "(": 192,
146
+ "COP(=O)([O-])[O-]": 489,
147
+ "[Na": 751,
148
+ "[SeH-]": 678,
149
+ "[In+": 855,
150
+ "[Co++]": 724,
151
+ ".[Fe+": 409,
152
+ "C)CC/C=C(\\": 469,
153
+ "[SeH]": 685,
154
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)": 434,
155
+ "OP(=O)([O-])[O-])[C@@H](O)C(=O)NCCC(=O)NCCS": 538,
156
+ "[Zn": 841,
157
+ "[Co-3]": 626,
158
+ "[H]": 590,
159
+ "[Fe-3]": 631,
160
+ "[O]": 683,
161
+ "Br": 613,
162
+ ".[H+]>>": 542,
163
+ "C(=O)NCC": 385,
164
+ "CC(": 526,
165
+ "[C@H](COP(=O)([O-])OP(=O)([O-])": 294,
166
+ "[Co+2]": 625,
167
+ "[N@H+]": 711,
168
+ "[Pb": 818,
169
+ "<unk>": 2,
170
+ "[7*]": 706,
171
+ "T": 228,
172
+ "[C@H](C)": 486,
173
+ "[n-": 750,
174
+ "[Fe-]": 638,
175
+ "N)": 293,
176
+ ".O>>": 441,
177
+ "[7*": 889,
178
+ "[Fe+4]": 694,
179
+ "[O+": 815,
180
+ "[C@@H](CC": 389,
181
+ "CCCCCCCC": 437,
182
+ "4": 203,
183
+ "[Cr+3]": 701,
184
+ "I": 219,
185
+ "[C@H]1": 345,
186
+ "[Na+]": 593,
187
+ "[Co+": 800,
188
+ "=O)[C@H](O)": 581,
189
+ "COP(=O)([O-])[O-])": 498,
190
+ "NC(=O)C1=CN": 436,
191
+ "[O-": 735,
192
+ "n1": 472,
193
+ ".": 197,
194
+ "2c(": 417,
195
+ "c1": 270,
196
+ ".O=": 327,
197
+ "[PH]": 674,
198
+ "[C": 250,
199
+ "[C@": 251,
200
+ "H+]": 289,
201
+ "[AsH2]": 699,
202
+ "[C-]": 609,
203
+ "[CH2-": 907,
204
+ "OP(=O)([O-])[O-]": 422,
205
+ "[Rb+]": 642,
206
+ "[Mn": 790,
207
+ "[Se-]": 719,
208
+ "C(": 275,
209
+ "[CH-]": 720,
210
+ "nc5": 320,
211
+ "[S+](CC[C@H]([NH3+])C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 505,
212
+ "[S@+": 860,
213
+ "[NH2+]": 591,
214
+ ")[C@H](OP(=O)([O-])[O-])": 447,
215
+ "|": 247,
216
+ "[C@H](CSCC[C@H]([NH3+])C(=O)[O-])[C@@H](O)[C@H]1O": 534,
217
+ "[3": 774,
218
+ "[B-]": 716,
219
+ "[Li": 882,
220
+ "[cH-]": 688,
221
+ "M": 222,
222
+ "n2c": 338,
223
+ "C(=O)[O-])C": 470,
224
+ "[S@]": 620,
225
+ "[C@H]3O[C@@H](n": 367,
226
+ "[Co+3": 905,
227
+ "[S@+]": 689,
228
+ ".O=C([O-])": 458,
229
+ "[Mg+": 813,
230
+ "%3": 830,
231
+ "[Mg+2": 821,
232
+ "c1c": 487,
233
+ "[c-]": 679,
234
+ "[3*]": 604,
235
+ "[Co+2": 801,
236
+ "Z": 230,
237
+ "\\CC/C(C)=C\\CC/C(C)=C": 574,
238
+ "%21": 654,
239
+ "[CH2+]": 731,
240
+ "[Mg]": 709,
241
+ ".[H+]": 290,
242
+ "CCCC": 316,
243
+ "[6": 890,
244
+ "%31": 664,
245
+ "[C@@]": 531,
246
+ "CC(C)(": 496,
247
+ "[S@@+": 835,
248
+ "[Se-": 903,
249
+ ".[NH3+]": 393,
250
+ "[Cr": 877,
251
+ "c1ncn2": 308,
252
+ "c1nc": 278,
253
+ "C(=O)[O-]>>": 495,
254
+ "[Ca": 771,
255
+ "O[C@H]1[C@@H](O)[C@H](": 474,
256
+ "[As": 807,
257
+ "OP": 263,
258
+ "[S+](": 500,
259
+ "[Rb": 825,
260
+ "i": 240,
261
+ "[W]": 713,
262
+ ")N": 560,
263
+ "[8": 886,
264
+ "[Co-4": 811,
265
+ "H": 218,
266
+ "O)": 249,
267
+ "[Mg": 781,
268
+ "%35": 668,
269
+ "[NH3+": 743,
270
+ "c(=O)[n": 343,
271
+ "CC(=O)N[C@H]1": 548,
272
+ "[3*": 775,
273
+ "c1[nH]": 509,
274
+ "P(=O)([O-])": 480,
275
+ "[Mn]": 702,
276
+ "O[C@@H]1COP(*)": 443,
277
+ "[Fe+": 757,
278
+ "[Cl-": 799,
279
+ "[K+": 755,
280
+ "C[C@H](O)[C@@H](": 459,
281
+ "[*:": 838,
282
+ "[Hg+]": 730,
283
+ "%19": 652,
284
+ "[AsH2": 876,
285
+ "[S--": 898,
286
+ "O[C@H](CO)[C@@H](O)[C@@H]1O": 575,
287
+ "[Sb": 857,
288
+ "[S": 451,
289
+ "COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1": 421,
290
+ "[4*": 777,
291
+ "[N@+": 745,
292
+ "H]1O": 271,
293
+ "[*": 837,
294
+ "[Cl+2": 866,
295
+ "[Fe]": 595,
296
+ "cc1": 471,
297
+ "l": 241,
298
+ "[C@H]1O[C@@H](": 346,
299
+ "NH": 299,
300
+ "[C-": 784,
301
+ "[NH+]": 601,
302
+ "-]": 253,
303
+ "W": 229,
304
+ "[PH": 836,
305
+ "C": 216,
306
+ "[Se]": 621,
307
+ "[9": 884,
308
+ "%22": 655,
309
+ "5": 204,
310
+ "[Fe+3": 786,
311
+ "[C@@H](CCC(=O)[O-])": 455,
312
+ "[Ni-2]": 695,
313
+ "[OH-]": 726,
314
+ "c(": 279,
315
+ "cc": 291,
316
+ "=O": 314,
317
+ "%11": 644,
318
+ "/C(C)=C": 579,
319
+ "P(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 491,
320
+ "[Co-": 763,
321
+ "*N[C@H](C(*)=O)": 561,
322
+ "3].[Fe+": 578,
323
+ "[7": 888,
324
+ "[Mo]": 641,
325
+ "ncn2": 510,
326
+ "[F-]": 615,
327
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O.[H+]": 556,
328
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 399,
329
+ "NC(=O)c1ccc[n+]": 439,
330
+ "[5": 892,
331
+ "[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)": 356,
332
+ ">>": 285,
333
+ "c(O)": 457,
334
+ "[Fe+2": 787,
335
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1": 536,
336
+ "[Cd+": 804,
337
+ "OP(=O)([O-])OC": 501,
338
+ "[Mg-2]": 608,
339
+ "SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)([O-])[O-]": 524,
340
+ "/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C": 549,
341
+ "CC1": 402,
342
+ "[CH2": 906,
343
+ "[Z": 840,
344
+ "([O-])([O-])": 334,
345
+ ".Nc1ncnc2c1ncn2[C@@H]1O": 329,
346
+ ">>*": 397,
347
+ "C)": 273,
348
+ "[Br-]": 629,
349
+ "%15": 648,
350
+ "</s>": 1,
351
+ "[*-": 852,
352
+ "[H": 737,
353
+ ")[C@H](O)": 323,
354
+ "[NH+": 768,
355
+ "[Mg+2]": 639,
356
+ "CC": 261,
357
+ "[n": 298,
358
+ "[S@@+]": 673,
359
+ "[Pb+2": 820,
360
+ "[n+]": 410,
361
+ "[nH+": 822,
362
+ "[N+": 753,
363
+ "[OH2+": 914,
364
+ "nc2c1ncn2": 309,
365
+ "OP(=O)([O-])[O-])": 377,
366
+ "#version:": 732,
367
+ "[1": 766,
368
+ "[Se": 797,
369
+ "H]2": 283,
370
+ "NC(=O)C1=C": 435,
371
+ "CO": 502,
372
+ "[C@]": 476,
373
+ "[B-": 899,
374
+ "(=O)[O-]": 427,
375
+ "[SeH-": 847,
376
+ "[C@@H]([NH3+])": 573,
377
+ "[K": 754,
378
+ "<s>": 185,
379
+ "CC(=O)": 337,
380
+ "[9*": 885,
381
+ "[cH": 858,
382
+ "OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 485,
383
+ "[O-])[C@@H](O)[C@H]2O)": 424,
384
+ "C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 503,
385
+ "[*-]": 682,
386
+ "[N@H": 894,
387
+ "[As]": 628,
388
+ "c(N)ncnc54": 373,
389
+ "[S-]": 618,
390
+ "n": 242,
391
+ "[": 231,
392
+ "<-": 710,
393
+ "CS": 406,
394
+ "*]": 398,
395
+ "O[C@H](CO)[C@@H](O)": 481,
396
+ "O[C@H]1": 404,
397
+ "[C@H](C": 378,
398
+ "[Cl+2]": 693,
399
+ "8": 207,
400
+ "COP(=O)([O-])": 276,
401
+ "/CC/C(C)=C/CC/C(C)=C": 400,
402
+ "R": 226,
403
+ "*])": 467,
404
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H]([NH3+])C(=O)[O-])[C@@H](O)[C@H]1O": 540,
405
+ "[Br-": 810,
406
+ "[Cu": 788,
407
+ "[Fe+2]": 612,
408
+ "[H+": 738,
409
+ "[C@H": 734,
410
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 557,
411
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O>>": 482,
412
+ "F": 217,
413
+ "[Mo": 823,
414
+ "[NH3+])": 340,
415
+ "[Cr+": 878,
416
+ "[S]": 623,
417
+ "Nc1ncnc2c1ncn2[C@@H]1O": 312,
418
+ "c(=O)": 325,
419
+ "d": 237,
420
+ "C=C": 433,
421
+ "(*)": 420,
422
+ "[C@@H": 736,
423
+ "[Cr+6": 879,
424
+ "[F-": 789,
425
+ "C1=C": 418,
426
+ "CC[C@H]([NH3+])C(=O)": 532,
427
+ "(=O)([O-])": 264,
428
+ "[I": 853,
429
+ "[C@H]1O[C@@H](n2cnc3c(N)ncnc32": 355,
430
+ "O)[C@@H](O)[C@H]2O)": 369,
431
+ "c(=O)[nH]": 412,
432
+ "O": 224,
433
+ "[C@@H]3": 359,
434
+ "CC(C)(COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)([O-])[O-])[C@@H](O)C(=O)NCCC(=O)NCCS": 539,
435
+ "N": 223,
436
+ "=C([O-])": 530,
437
+ "[Cl-]": 622,
438
+ "[N@@+": 900,
439
+ "[Rb+": 826,
440
+ "CC(=O)[O-]": 544,
441
+ "A": 214,
442
+ "[Co-2": 764,
443
+ "+": 195,
444
+ "C(=O)[O-].": 522,
445
+ "C)CC/C=C(\\C)CC/C=C(\\": 523,
446
+ "[Co+3]": 721,
447
+ "[Fe+4": 867,
448
+ "[Zn+2": 843,
449
+ "C(=O)N[C@@H](": 584,
450
+ "[C@H](CSCC[C@H]([NH3+])C(=O)": 533,
451
+ "c2": 386,
452
+ "[N-": 865,
453
+ "[Si]": 712,
454
+ "(O)": 506,
455
+ "].[Fe+": 465,
456
+ "b": 235,
457
+ "[CH]": 715,
458
+ ".O=P([O-])([O-])": 352,
459
+ "[F": 756,
460
+ "%28": 661,
461
+ "g": 239,
462
+ "%26": 659,
463
+ "C(C)": 313,
464
+ "[SeH": 846,
465
+ "n2cnc3": 342,
466
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])": 361,
467
+ "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 571,
468
+ "/C)CC/C=C(": 426,
469
+ "[C@@H]": 586,
470
+ "[Sb]": 687,
471
+ "[C@@H](C)": 460,
472
+ "[NH2-": 765,
473
+ "=C": 288,
474
+ "[nH+]": 640,
475
+ "[NH4+": 761,
476
+ "n2": 281,
477
+ "[C@H]2O[C@@H](n": 580,
478
+ "c(N)ncnc5": 372,
479
+ "9": 208,
480
+ ".O": 287,
481
+ "[C@@H]1": 333,
482
+ "[Fe-2]": 619,
483
+ "[Li+": 883,
484
+ "c(C)": 520,
485
+ "3": 202,
486
+ "CC(C)": 390,
487
+ "OP(=O)([O-])O": 403,
488
+ "[N@+]": 588,
489
+ "NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1": 537,
490
+ "[C@@H]2O": 330,
491
+ "[Cl+": 851,
492
+ ")C(*)=O.": 519,
493
+ "[Hg+2": 902,
494
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])": 362,
495
+ "[Ni-]": 725,
496
+ "CC(=O)[O-])": 466,
497
+ "*N[C@H](C(*)=O)[C@@H](C)": 566,
498
+ "CC[C@H]([NH3+])": 391,
499
+ "[C@@H](": 267,
500
+ "2].[Fe+": 582,
501
+ ">>O": 405,
502
+ "[Cd+2": 805,
503
+ "=": 211,
504
+ "[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 479,
505
+ "nc3": 295,
506
+ "[N@@H+]": 691,
507
+ ".O=P([O-])([O-])OP(=O)([O-])O": 425,
508
+ "%16": 649,
509
+ "COP(*)": 440,
510
+ "%10": 610,
511
+ "*": 194,
512
+ "NC(=O)": 318,
513
+ "[C@@": 259,
514
+ "[C@H]1O": 282,
515
+ "[CH": 827,
516
+ "CC(C)(COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1": 528,
517
+ "C[C@H](": 512,
518
+ "C(*)=O)": 488,
519
+ "[C@H](CS": 529,
520
+ "[Li+]": 703,
521
+ "[NH3+]": 331,
522
+ "/C)CC/C=C(/C)CC/C=C(": 453,
523
+ "[C@@H]1O": 286,
524
+ "C=": 322,
525
+ "H](": 257,
526
+ "[O-])[C@@H](O)[C@H]1O": 305,
527
+ "[Pb+": 819,
528
+ "[T": 873,
529
+ "[R": 824,
530
+ "[Te]": 698,
531
+ "[n+": 746,
532
+ "[H+]": 587,
533
+ "C[C@H](O)[C@@H](COP(=O)([O-])OP(=O)([O-])": 554,
534
+ "[o+": 779,
535
+ "n(": 446,
536
+ "nc2c1ncn2[C@@H]1O": 310,
537
+ "nc32": 339,
538
+ "7": 206,
539
+ "[Co": 762,
540
+ "[Ag+]": 677,
541
+ "SCCNC(=O)CCNC(=O)": 517,
542
+ ".C": 442,
543
+ "[C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])": 347,
544
+ "[Ni+2": 833,
545
+ "CCC(=O)[O-])": 507,
546
+ "[8*]": 705,
547
+ "[S@@]": 684,
548
+ "\\": 232,
549
+ "r": 244,
550
+ "(C)": 321,
551
+ "%20": 653,
552
+ "0.2": 733,
553
+ "CC/C(C)=C": 363,
554
+ "%2": 829,
555
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1": 570,
556
+ "%25": 658,
557
+ "C(*)=O.": 407,
558
+ "[B": 808,
559
+ "%14": 647,
560
+ "[Cl": 798,
561
+ "[Mg++]": 632,
562
+ "B": 215,
563
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])": 448,
564
+ "[CH+": 828,
565
+ "[CH-": 904,
566
+ "[Zn+2]": 676,
567
+ ".O=P([O-])([O-])O": 414,
568
+ "s": 245,
569
+ "P([O-])([O-])": 335,
570
+ "S": 227,
571
+ ".C[S+](CC[C@H]([NH3+])C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 545,
572
+ "[Na+": 752,
573
+ "=O)O[C@@H]1COP(*)(=O)[O-]": 553,
574
+ "[Co-3": 802,
575
+ "/C=C": 483,
576
+ "SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1": 521,
577
+ "COP(=O)([O-])O": 563,
578
+ "[Cd+2]": 627,
579
+ "c1O": 567,
580
+ "[9*]": 704,
581
+ "O2)": 559,
582
+ "0": 199,
583
+ "[Mn+3": 870,
584
+ "[O-]": 255,
585
+ "[OH-": 911,
586
+ "[P": 747,
587
+ "C(*)=O": 360,
588
+ "OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)": 383,
589
+ "[4*]": 605,
590
+ "[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 464,
591
+ "[Cd": 803,
592
+ "ccc(=O)[nH]": 558,
593
+ "[*:0]": 675,
594
+ "[W": 897,
595
+ "COP(=O)([O-])OP(=O)([O-])": 284,
596
+ "[N@H+": 895,
597
+ "Nc1nc2c(ncn2": 513,
598
+ "[NH2+": 749,
599
+ "[NH4+]": 597,
600
+ "[Br": 809,
601
+ "/": 198,
602
+ "[C@H]3O[C@@H](n4cnc5": 371,
603
+ "[OH2+]": 728,
604
+ "[o+]": 607,
605
+ "<": 210,
606
+ "OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 376,
607
+ "CC(=O)N": 484,
608
+ "[Ag+": 845,
609
+ "H]1": 268,
610
+ "Nc1nc": 297,
611
+ "[M": 780,
612
+ "[S+](CC[C@H]([NH3+])": 504,
613
+ "CN": 475,
614
+ "OP(=O)([O-])[O-])[C@@H](O)": 511,
615
+ "Nc1nc2c(c(=O)[nH]1": 577,
616
+ "[Ni+": 832,
617
+ "C1": 384,
618
+ "[C@H]2": 311,
619
+ "\\CC/C(C)=C": 541,
620
+ "%13": 646,
621
+ "c(=O)[nH]1": 394,
622
+ "[4": 776,
623
+ "[C@H]3O[C@@H](n4cnc5c(N)ncnc54": 374,
624
+ "CC/": 319,
625
+ "[Cr+6]": 700,
626
+ "[P+": 908,
627
+ "[C@@H](O)": 274,
628
+ "[I-]": 690,
629
+ "[N]": 633,
630
+ "%29": 662,
631
+ "*N[C@H](": 550,
632
+ "[Ni-2": 869,
633
+ "[S+": 740,
634
+ "[I-": 861,
635
+ ">": 212,
636
+ "%30": 663,
637
+ "[Mg-": 782,
638
+ "[Cu+": 816,
639
+ "[1*]": 600,
640
+ "4cnc5": 368,
641
+ "[O-])[C@@H](O)[C@H]2O)c(=O)[nH]1": 492,
642
+ "[cH-": 859,
643
+ "%24": 657,
644
+ ".[1*]": 527,
645
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)": 429,
646
+ "[Co++": 909,
647
+ ".O=C": 395,
648
+ "C(=O)NCCC(=O)NCCS": 462,
649
+ "<mask>": 189,
650
+ "OP(*)": 430,
651
+ "[Mn+2": 792,
652
+ "[P+]": 723,
653
+ "[PH2": 912,
654
+ "[Ca+2]": 603,
655
+ "2": 201,
656
+ "Cc1c": 543,
657
+ "*)": 304,
658
+ "1*]": 508,
659
+ "OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54": 375,
660
+ "2)": 494,
661
+ "%17": 650,
662
+ "=O)": 252,
663
+ "[Ni": 831,
664
+ "H]3": 302,
665
+ "[C@H](O)[C@H](O)": 468,
666
+ "[C@H]([NH3+])": 364,
667
+ ".[H+].[H+].[H+].[H+]": 478,
668
+ "([O-])": 260,
669
+ "[A": 806,
670
+ "c(N)nc": 303,
671
+ "*N[C@@H](CS": 490,
672
+ "[5*": 893,
673
+ "+]": 272,
674
+ ".O=P([O-])([O-])O.[H+]": 555,
675
+ "%1": 785,
676
+ "C(=O)NCCS": 461,
677
+ "O)[C@@H](O)": 328,
678
+ "[Mg++": 814,
679
+ "Fe": 370,
680
+ "[S-": 793,
681
+ "[N@": 744,
682
+ "c1cc": 341,
683
+ "O[C@@H](": 354,
684
+ "[Cl]": 729,
685
+ "1": 200,
686
+ "[c-": 849,
687
+ "[Cu+2": 817,
688
+ ".[": 277,
689
+ "%34": 667,
690
+ "[In": 854,
691
+ "[o": 778,
692
+ "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 535,
693
+ "n2cnc3c(N)ncnc32": 351,
694
+ "[Ca+2": 773,
695
+ "[Mn+": 791,
696
+ "[nH": 739,
697
+ "[Cs": 871,
698
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54": 379,
699
+ "4c": 366,
700
+ "<pad>": 0,
701
+ "P": 225,
702
+ "[Co-2]": 598,
703
+ "*N[C@@H](": 365,
704
+ "[8*": 887,
705
+ "[Fe-2": 795,
706
+ "c(N)": 300,
707
+ "[NH4": 760,
708
+ "[S+]": 499,
709
+ "[CH2-]": 722,
710
+ "CC/C=C(": 381,
711
+ "O1": 551,
712
+ "CCNC(=O)CCNC(=O)": 516,
713
+ "[C@H](O)C(C)(C)": 515,
714
+ "[Fe+3]": 611,
715
+ "[O-])": 258,
716
+ "[Te": 874,
717
+ "[K+]": 594,
718
+ "]": 233,
719
+ "c1ccc": 415,
720
+ "OP(=O)([O-])OP(=O)([O-])OC": 547,
721
+ "(=O)": 256,
722
+ "-": 196,
723
+ "L": 221,
724
+ "OP(=O)([O-])": 265,
725
+ "[Mg-2": 783,
726
+ ")": 193,
727
+ "%23": 656,
728
+ "[Hg+2]": 718,
729
+ "C=C(": 358,
730
+ "[C@@H]2O)": 565,
731
+ "N)=O)": 493,
732
+ "[Ag": 844,
733
+ "[Mn+2]": 616,
734
+ "a": 234,
735
+ "Fe+": 408,
736
+ "CC(C)=": 572,
737
+ "[Mn+3]": 696,
738
+ "C(*)": 336,
739
+ "NC(=O)c1ccc": 431,
740
+ "%12": 645,
741
+ ":": 209,
742
+ "@": 213,
743
+ "H]": 248,
744
+ "OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1": 416,
745
+ "%18": 651,
746
+ "[6*]": 707,
747
+ "*O[C@H]1[C@@H](O)[C@H](": 564,
748
+ "[CH2+": 915,
749
+ "C(=O)N": 324,
750
+ "6": 205,
751
+ "[C@@H](O)[C@H](": 449,
752
+ "[C@H](": 266,
753
+ ".[H+].[H+]": 353,
754
+ "O[C@@H]1COP(*)(=O)[O-]": 445,
755
+ "[C@@H]3O)[C@@H](O)[C@H]2O)": 380,
756
+ "CCC": 423,
757
+ "[Fe-": 794,
758
+ "[Fe-3": 812,
759
+ "[NH2-]": 599,
760
+ "[OH2": 913,
761
+ "[Ca+": 772,
762
+ "[PH2]": 727,
763
+ "[C@H](O)[C@@H](O)": 576,
764
+ "%27": 660,
765
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)": 562,
766
+ "C=CC1": 438,
767
+ "Cl": 606,
768
+ "OC": 296,
769
+ "[5*]": 708,
770
+ "[C@H](COP(=O)([O-])": 387,
771
+ "[nH]": 392,
772
+ "c": 236,
773
+ "[Cu+]": 636,
774
+ "c3": 477,
775
+ "C(C)(C)": 497,
776
+ "%38": 671,
777
+ "n2cnc3c(N)nc": 349,
778
+ "u": 246,
779
+ "C)CC/C=C(": 382,
780
+ "Nc1nc2c(": 428,
781
+ "[P]": 589,
782
+ "[2": 769,
783
+ "#": 190,
784
+ "[OH": 910,
785
+ "C(C)=C": 344,
786
+ "3+]": 306,
787
+ "[SH-]": 596,
788
+ "[*:0": 839,
789
+ "[Co+]": 624,
790
+ "%33": 666,
791
+ "O[C@@H]1": 401,
792
+ "[Hg+": 901,
793
+ "[Hg]": 680,
794
+ "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])": 432,
795
+ "[6*": 891,
796
+ "[NH2": 748,
797
+ "%32": 665,
798
+ "[Cr+3": 880,
799
+ "[Ni+2]": 672,
800
+ "[Ni-": 868,
801
+ "[O+]": 634,
802
+ "[C@@H]2O[C@H](COP(=O)([O-])": 583,
803
+ "[Zn+": 842,
804
+ "[N": 463,
805
+ "nc": 262,
806
+ "[O-])[C@@H](O)": 292,
807
+ "[C@H](O)": 280,
808
+ "nc2": 301,
809
+ "[C@H]2O)": 332,
810
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 444,
811
+ "[Cs+]": 697,
812
+ "[SH-": 759,
813
+ "[O": 254,
814
+ "=C(": 454,
815
+ "C(=O)[O-]": 315,
816
+ "[C@H]": 585,
817
+ "%": 191,
818
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)": 452,
819
+ "[Cu+2]": 635,
820
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1": 552,
821
+ "NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)": 568,
822
+ "[Fe+]": 525,
823
+ "e": 238,
824
+ "[Cu]": 614,
825
+ "[1*": 767,
826
+ "SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)": 518,
827
+ "[CH+]": 643,
828
+ "CO)": 413,
829
+ "o": 243,
830
+ "*N": 350,
831
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)": 546,
832
+ "[Cl+]": 681,
833
+ "[2*]": 602,
834
+ "[In+3]": 686,
835
+ "[C@@H]2": 317,
836
+ "[SH]": 617,
837
+ "[Cs+": 872,
838
+ "[c": 848,
839
+ "[C@H](OP(=O)([O-])[O-])": 419,
840
+ "=P([O-])([O-])": 514,
841
+ "[N+]": 473,
842
+ "[N-]": 692,
843
+ "[Co-4]": 630,
844
+ "[N@@": 862,
845
+ "[NH": 741,
846
+ "[NH3": 742,
847
+ "[Hg": 850,
848
+ "O[C@@H](n": 357,
849
+ "[Pb+2]": 637,
850
+ "[C@H]3": 348,
851
+ "[n-]": 592,
852
+ "[In+3": 856,
853
+ "[L": 881,
854
+ "[S--]": 714,
855
+ "CCNC(=O)": 411,
856
+ "[S@": 796,
857
+ "[N@@H+": 864,
858
+ "[SH": 758
859
+ },
860
+ "merges": [
861
+ "#version: 0.2",
862
+ "[ C",
863
+ "[C @",
864
+ "[C@ H",
865
+ "[C@H ]",
866
+ "[ O",
867
+ "[O -",
868
+ "[O- ]",
869
+ "[ C",
870
+ "[C @",
871
+ "[C@ @",
872
+ "[C@@ H",
873
+ "[C@@H ]",
874
+ "[ H",
875
+ "[H +",
876
+ "[H+ ]",
877
+ "[ n",
878
+ "[n H",
879
+ "[nH ]",
880
+ "[ S",
881
+ "[S +",
882
+ "[S+ ]",
883
+ "[ N",
884
+ "[N H",
885
+ "[NH 3",
886
+ "[NH3 +",
887
+ "[NH3+ ]",
888
+ "[ N",
889
+ "[N @",
890
+ "[N@ +",
891
+ "[N@+ ]",
892
+ "[ n",
893
+ "[n +",
894
+ "[n+ ]",
895
+ "[ P",
896
+ "[P ]",
897
+ "[ H",
898
+ "[H ]",
899
+ "[ C",
900
+ "[C @",
901
+ "[C@ @",
902
+ "[C@@ ]",
903
+ "[ C",
904
+ "[C @",
905
+ "[C@ ]",
906
+ "[ N",
907
+ "[N H",
908
+ "[NH 2",
909
+ "[NH2 +",
910
+ "[NH2+ ]",
911
+ "[ n",
912
+ "[n -",
913
+ "[n- ]",
914
+ "[ N",
915
+ "[N a",
916
+ "[Na +",
917
+ "[Na+ ]",
918
+ "[ N",
919
+ "[N +",
920
+ "[N+ ]",
921
+ "[ K",
922
+ "[K +",
923
+ "[K+ ]",
924
+ "[ F",
925
+ "[F e",
926
+ "[Fe ]",
927
+ "[ F",
928
+ "[F e",
929
+ "[Fe +",
930
+ "[Fe+ ]",
931
+ "[ S",
932
+ "[S H",
933
+ "[SH -",
934
+ "[SH- ]",
935
+ "[ N",
936
+ "[N H",
937
+ "[NH 4",
938
+ "[NH4 +",
939
+ "[NH4+ ]",
940
+ "[ C",
941
+ "[C o",
942
+ "[Co -",
943
+ "[Co- 2",
944
+ "[Co-2 ]",
945
+ "[ N",
946
+ "[N H",
947
+ "[NH 2",
948
+ "[NH2 -",
949
+ "[NH2- ]",
950
+ "[ 1",
951
+ "[1 *",
952
+ "[1* ]",
953
+ "[ N",
954
+ "[N H",
955
+ "[NH +",
956
+ "[NH+ ]",
957
+ "[ 2",
958
+ "[2 *",
959
+ "[2* ]",
960
+ "[ C",
961
+ "[C a",
962
+ "[Ca +",
963
+ "[Ca+ 2",
964
+ "[Ca+2 ]",
965
+ "[ 3",
966
+ "[3 *",
967
+ "[3* ]",
968
+ "[ 4",
969
+ "[4 *",
970
+ "[4* ]",
971
+ "C l",
972
+ "[ o",
973
+ "[o +",
974
+ "[o+ ]",
975
+ "[ M",
976
+ "[M g",
977
+ "[Mg -",
978
+ "[Mg- 2",
979
+ "[Mg-2 ]",
980
+ "[ C",
981
+ "[C -",
982
+ "[C- ]",
983
+ "% 1",
984
+ "%1 0",
985
+ "[ F",
986
+ "[F e",
987
+ "[Fe +",
988
+ "[Fe+ 3",
989
+ "[Fe+3 ]",
990
+ "[ F",
991
+ "[F e",
992
+ "[Fe +",
993
+ "[Fe+ 2",
994
+ "[Fe+2 ]",
995
+ "B r",
996
+ "[ C",
997
+ "[C u",
998
+ "[Cu ]",
999
+ "[ F",
1000
+ "[F -",
1001
+ "[F- ]",
1002
+ "[ M",
1003
+ "[M n",
1004
+ "[Mn +",
1005
+ "[Mn+ 2",
1006
+ "[Mn+2 ]",
1007
+ "[ S",
1008
+ "[S H",
1009
+ "[SH ]",
1010
+ "[ S",
1011
+ "[S -",
1012
+ "[S- ]",
1013
+ "[ F",
1014
+ "[F e",
1015
+ "[Fe -",
1016
+ "[Fe- 2",
1017
+ "[Fe-2 ]",
1018
+ "[ S",
1019
+ "[S @",
1020
+ "[S@ ]",
1021
+ "[ S",
1022
+ "[S e",
1023
+ "[Se ]",
1024
+ "[ C",
1025
+ "[C l",
1026
+ "[Cl -",
1027
+ "[Cl- ]",
1028
+ "[ S",
1029
+ "[S ]",
1030
+ "[ C",
1031
+ "[C o",
1032
+ "[Co +",
1033
+ "[Co+ ]",
1034
+ "[ C",
1035
+ "[C o",
1036
+ "[Co +",
1037
+ "[Co+ 2",
1038
+ "[Co+2 ]",
1039
+ "[ C",
1040
+ "[C o",
1041
+ "[Co -",
1042
+ "[Co- 3",
1043
+ "[Co-3 ]",
1044
+ "[ C",
1045
+ "[C d",
1046
+ "[Cd +",
1047
+ "[Cd+ 2",
1048
+ "[Cd+2 ]",
1049
+ "[ A",
1050
+ "[A s",
1051
+ "[As ]",
1052
+ "[ B",
1053
+ "[B r",
1054
+ "[Br -",
1055
+ "[Br- ]",
1056
+ "[ C",
1057
+ "[C o",
1058
+ "[Co -",
1059
+ "[Co- 4",
1060
+ "[Co-4 ]",
1061
+ "[ F",
1062
+ "[F e",
1063
+ "[Fe -",
1064
+ "[Fe- 3",
1065
+ "[Fe-3 ]",
1066
+ "[ M",
1067
+ "[M g",
1068
+ "[Mg +",
1069
+ "[Mg+ +",
1070
+ "[Mg++ ]",
1071
+ "[ N",
1072
+ "[N ]",
1073
+ "[ O",
1074
+ "[O +",
1075
+ "[O+ ]",
1076
+ "[ C",
1077
+ "[C u",
1078
+ "[Cu +",
1079
+ "[Cu+ 2",
1080
+ "[Cu+2 ]",
1081
+ "[ C",
1082
+ "[C u",
1083
+ "[Cu +",
1084
+ "[Cu+ ]",
1085
+ "[ P",
1086
+ "[P b",
1087
+ "[Pb +",
1088
+ "[Pb+ 2",
1089
+ "[Pb+2 ]",
1090
+ "[ F",
1091
+ "[F e",
1092
+ "[Fe -",
1093
+ "[Fe- ]",
1094
+ "[ M",
1095
+ "[M g",
1096
+ "[Mg +",
1097
+ "[Mg+ 2",
1098
+ "[Mg+2 ]",
1099
+ "[ n",
1100
+ "[n H",
1101
+ "[nH +",
1102
+ "[nH+ ]",
1103
+ "[ M",
1104
+ "[M o",
1105
+ "[Mo ]",
1106
+ "[ R",
1107
+ "[R b",
1108
+ "[Rb +",
1109
+ "[Rb+ ]",
1110
+ "[ C",
1111
+ "[C H",
1112
+ "[CH +",
1113
+ "[CH+ ]",
1114
+ "% 1",
1115
+ "%1 1",
1116
+ "% 1",
1117
+ "%1 2",
1118
+ "% 1",
1119
+ "%1 3",
1120
+ "% 1",
1121
+ "%1 4",
1122
+ "% 1",
1123
+ "%1 5",
1124
+ "% 1",
1125
+ "%1 6",
1126
+ "% 1",
1127
+ "%1 7",
1128
+ "% 1",
1129
+ "%1 8",
1130
+ "% 1",
1131
+ "%1 9",
1132
+ "% 2",
1133
+ "%2 0",
1134
+ "% 2",
1135
+ "%2 1",
1136
+ "% 2",
1137
+ "%2 2",
1138
+ "% 2",
1139
+ "%2 3",
1140
+ "% 2",
1141
+ "%2 4",
1142
+ "% 2",
1143
+ "%2 5",
1144
+ "% 2",
1145
+ "%2 6",
1146
+ "% 2",
1147
+ "%2 7",
1148
+ "% 2",
1149
+ "%2 8",
1150
+ "% 2",
1151
+ "%2 9",
1152
+ "% 3",
1153
+ "%3 0",
1154
+ "% 3",
1155
+ "%3 1",
1156
+ "% 3",
1157
+ "%3 2",
1158
+ "% 3",
1159
+ "%3 3",
1160
+ "% 3",
1161
+ "%3 4",
1162
+ "% 3",
1163
+ "%3 5",
1164
+ "% 3",
1165
+ "%3 6",
1166
+ "% 3",
1167
+ "%3 7",
1168
+ "% 3",
1169
+ "%3 8",
1170
+ "[ N",
1171
+ "[N i",
1172
+ "[Ni +",
1173
+ "[Ni+ 2",
1174
+ "[Ni+2 ]",
1175
+ "[ S",
1176
+ "[S @",
1177
+ "[S@ @",
1178
+ "[S@@ +",
1179
+ "[S@@+ ]",
1180
+ "[ P",
1181
+ "[P H",
1182
+ "[PH ]",
1183
+ "[ *",
1184
+ "[* :",
1185
+ "[*: 0",
1186
+ "[*:0 ]",
1187
+ "[ Z",
1188
+ "[Z n",
1189
+ "[Zn +",
1190
+ "[Zn+ 2",
1191
+ "[Zn+2 ]",
1192
+ "[ A",
1193
+ "[A g",
1194
+ "[Ag +",
1195
+ "[Ag+ ]",
1196
+ "[ S",
1197
+ "[S e",
1198
+ "[Se H",
1199
+ "[SeH -",
1200
+ "[SeH- ]",
1201
+ "[ c",
1202
+ "[c -",
1203
+ "[c- ]",
1204
+ "[ H",
1205
+ "[H g",
1206
+ "[Hg ]",
1207
+ "[ C",
1208
+ "[C l",
1209
+ "[Cl +",
1210
+ "[Cl+ ]",
1211
+ "[ *",
1212
+ "[* -",
1213
+ "[*- ]",
1214
+ "[ O",
1215
+ "[O ]",
1216
+ "[ S",
1217
+ "[S @",
1218
+ "[S@ @",
1219
+ "[S@@ ]",
1220
+ "[ S",
1221
+ "[S e",
1222
+ "[Se H",
1223
+ "[SeH ]",
1224
+ "[ I",
1225
+ "[I n",
1226
+ "[In +",
1227
+ "[In+ 3",
1228
+ "[In+3 ]",
1229
+ "[ S",
1230
+ "[S b",
1231
+ "[Sb ]",
1232
+ "[ c",
1233
+ "[c H",
1234
+ "[cH -",
1235
+ "[cH- ]",
1236
+ "[ S",
1237
+ "[S @",
1238
+ "[S@ +",
1239
+ "[S@+ ]",
1240
+ "[ I",
1241
+ "[I -",
1242
+ "[I- ]",
1243
+ "[ N",
1244
+ "[N @",
1245
+ "[N@ @",
1246
+ "[N@@ H",
1247
+ "[N@@H +",
1248
+ "[N@@H+ ]",
1249
+ "[ N",
1250
+ "[N -",
1251
+ "[N- ]",
1252
+ "[ C",
1253
+ "[C l",
1254
+ "[Cl +",
1255
+ "[Cl+ 2",
1256
+ "[Cl+2 ]",
1257
+ "[ F",
1258
+ "[F e",
1259
+ "[Fe +",
1260
+ "[Fe+ 4",
1261
+ "[Fe+4 ]",
1262
+ "[ N",
1263
+ "[N i",
1264
+ "[Ni -",
1265
+ "[Ni- 2",
1266
+ "[Ni-2 ]",
1267
+ "[ M",
1268
+ "[M n",
1269
+ "[Mn +",
1270
+ "[Mn+ 3",
1271
+ "[Mn+3 ]",
1272
+ "[ C",
1273
+ "[C s",
1274
+ "[Cs +",
1275
+ "[Cs+ ]",
1276
+ "[ T",
1277
+ "[T e",
1278
+ "[Te ]",
1279
+ "[ A",
1280
+ "[A s",
1281
+ "[As H",
1282
+ "[AsH 2",
1283
+ "[AsH2 ]",
1284
+ "[ C",
1285
+ "[C r",
1286
+ "[Cr +",
1287
+ "[Cr+ 6",
1288
+ "[Cr+6 ]",
1289
+ "[ C",
1290
+ "[C r",
1291
+ "[Cr +",
1292
+ "[Cr+ 3",
1293
+ "[Cr+3 ]",
1294
+ "[ M",
1295
+ "[M n",
1296
+ "[Mn ]",
1297
+ "[ L",
1298
+ "[L i",
1299
+ "[Li +",
1300
+ "[Li+ ]",
1301
+ "[ 9",
1302
+ "[9 *",
1303
+ "[9* ]",
1304
+ "[ 8",
1305
+ "[8 *",
1306
+ "[8* ]",
1307
+ "[ 7",
1308
+ "[7 *",
1309
+ "[7* ]",
1310
+ "[ 6",
1311
+ "[6 *",
1312
+ "[6* ]",
1313
+ "[ 5",
1314
+ "[5 *",
1315
+ "[5* ]",
1316
+ "[ M",
1317
+ "[M g",
1318
+ "[Mg ]",
1319
+ "< -",
1320
+ "[ N",
1321
+ "[N @",
1322
+ "[N@ H",
1323
+ "[N@H +",
1324
+ "[N@H+ ]",
1325
+ "[ S",
1326
+ "[S i",
1327
+ "[Si ]",
1328
+ "[ W",
1329
+ "[W ]",
1330
+ "[ S",
1331
+ "[S -",
1332
+ "[S- -",
1333
+ "[S-- ]",
1334
+ "[ C",
1335
+ "[C H",
1336
+ "[CH ]",
1337
+ "[ B",
1338
+ "[B -",
1339
+ "[B- ]",
1340
+ "[ N",
1341
+ "[N @",
1342
+ "[N@ @",
1343
+ "[N@@ +",
1344
+ "[N@@+ ]",
1345
+ "[ H",
1346
+ "[H g",
1347
+ "[Hg +",
1348
+ "[Hg+ 2",
1349
+ "[Hg+2 ]",
1350
+ "[ S",
1351
+ "[S e",
1352
+ "[Se -",
1353
+ "[Se- ]",
1354
+ "[ C",
1355
+ "[C H",
1356
+ "[CH -",
1357
+ "[CH- ]",
1358
+ "[ C",
1359
+ "[C o",
1360
+ "[Co +",
1361
+ "[Co+ 3",
1362
+ "[Co+3 ]",
1363
+ "[ C",
1364
+ "[C H",
1365
+ "[CH 2",
1366
+ "[CH2 -",
1367
+ "[CH2- ]",
1368
+ "[ P",
1369
+ "[P +",
1370
+ "[P+ ]",
1371
+ "[ C",
1372
+ "[C o",
1373
+ "[Co +",
1374
+ "[Co+ +",
1375
+ "[Co++ ]",
1376
+ "[ N",
1377
+ "[N i",
1378
+ "[Ni -",
1379
+ "[Ni- ]",
1380
+ "[ O",
1381
+ "[O H",
1382
+ "[OH -",
1383
+ "[OH- ]",
1384
+ "[ P",
1385
+ "[P H",
1386
+ "[PH 2",
1387
+ "[PH2 ]",
1388
+ "[ O",
1389
+ "[O H",
1390
+ "[OH 2",
1391
+ "[OH2 +",
1392
+ "[OH2+ ]",
1393
+ "[ C",
1394
+ "[C l",
1395
+ "[Cl ]",
1396
+ "[ H",
1397
+ "[H g",
1398
+ "[Hg +",
1399
+ "[Hg+ ]",
1400
+ "[ C",
1401
+ "[C H",
1402
+ "[CH 2",
1403
+ "[CH2 +",
1404
+ "[CH2+ ]",
1405
+ "H ]",
1406
+ "O )",
1407
+ "[ C",
1408
+ "[C @",
1409
+ "= O)",
1410
+ "- ]",
1411
+ "[ O",
1412
+ "[O -]",
1413
+ "( =O)",
1414
+ "H] (",
1415
+ "[O-] )",
1416
+ "[C@ @",
1417
+ "( [O-])",
1418
+ "C C",
1419
+ "n c",
1420
+ "O P",
1421
+ "(=O) ([O-])",
1422
+ "OP (=O)([O-])",
1423
+ "[C@ H](",
1424
+ "[C@@ H](",
1425
+ "H] 1",
1426
+ "C (=O)",
1427
+ "c 1",
1428
+ "H]1 O",
1429
+ "+ ]",
1430
+ "C )",
1431
+ "[C@@H]( O)",
1432
+ "C (",
1433
+ "C OP(=O)([O-])",
1434
+ ". [",
1435
+ "c1 nc",
1436
+ "c (",
1437
+ "[C@H]( O)",
1438
+ "n 2",
1439
+ "[C@ H]1O",
1440
+ "H] 2",
1441
+ "COP(=O)([O-]) OP(=O)([O-])",
1442
+ "> >",
1443
+ "[C@@ H]1O",
1444
+ ". O",
1445
+ "= C",
1446
+ "H +]",
1447
+ ".[ H+]",
1448
+ "c c",
1449
+ "[O-]) [C@@H](O)",
1450
+ "N )",
1451
+ "[C@H]( COP(=O)([O-])OP(=O)([O-])",
1452
+ "nc 3",
1453
+ "O C",
1454
+ "N c1nc",
1455
+ "[ n",
1456
+ "N H",
1457
+ "c( N)",
1458
+ "nc 2",
1459
+ "H] 3",
1460
+ "c(N) nc",
1461
+ "* )",
1462
+ "[O-])[C@@H](O) [C@H]1O",
1463
+ "3 +]",
1464
+ "NH 3+]",
1465
+ "c1nc n2",
1466
+ "nc2 c1ncn2",
1467
+ "nc2c1ncn2 [C@@H]1O",
1468
+ "[C@ H]2",
1469
+ "Nc1nc nc2c1ncn2[C@@H]1O",
1470
+ "C( C)",
1471
+ "= O",
1472
+ "C(=O) [O-]",
1473
+ "CC CC",
1474
+ "[C@@ H]2",
1475
+ "N C(=O)",
1476
+ "CC /",
1477
+ "nc 5",
1478
+ "( C)",
1479
+ "C =",
1480
+ ") [C@H](O)",
1481
+ "C(=O) N",
1482
+ "c (=O)",
1483
+ "C(=O) [O-])",
1484
+ ".O =",
1485
+ "O) [C@@H](O)",
1486
+ ". Nc1ncnc2c1ncn2[C@@H]1O",
1487
+ "[C@@H]2 O",
1488
+ "[ NH3+]",
1489
+ "[C@H]2 O)",
1490
+ "[C@@ H]1",
1491
+ "([O-]) ([O-])",
1492
+ "P ([O-])([O-])",
1493
+ "C( *)",
1494
+ "CC (=O)",
1495
+ "n2 c",
1496
+ "nc3 2",
1497
+ "[NH3+] )",
1498
+ "c1 cc",
1499
+ "n2c nc3",
1500
+ "c(=O) [n",
1501
+ "C(C) =C",
1502
+ "[C@ H]1",
1503
+ "[C@H]1O [C@@H](",
1504
+ "[C@@H]2O [C@H](COP(=O)([O-])OP(=O)([O-])",
1505
+ "[C@ H]3",
1506
+ "n2cnc3 c(N)nc",
1507
+ "* N",
1508
+ "n2cnc3c(N)nc nc32",
1509
+ ".O= P([O-])([O-])",
1510
+ ".[H+] .[H+]",
1511
+ "O [C@@H](",
1512
+ "[C@H]1O[C@@H]( n2cnc3c(N)ncnc32",
1513
+ "[C@H]1O[C@@H](n2cnc3c(N)ncnc32 )[C@H](O)",
1514
+ "O[C@@H]( n",
1515
+ "C= C(",
1516
+ "[C@@ H]3",
1517
+ "C(*) =O",
1518
+ ".Nc1ncnc2c1ncn2[C@@H]1O [C@H](COP(=O)([O-])OP(=O)([O-])",
1519
+ "( [C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])",
1520
+ "CC/ C(C)=C",
1521
+ "[C@H]( [NH3+])",
1522
+ "*N [C@@H](",
1523
+ "4 c",
1524
+ "[C@H]3 O[C@@H](n",
1525
+ "4c nc5",
1526
+ "O)[C@@H](O) [C@H]2O)",
1527
+ "F e",
1528
+ "[C@H]3O[C@@H](n 4cnc5",
1529
+ "c(N)nc nc5",
1530
+ "c(N)ncnc5 4",
1531
+ "[C@H]3O[C@@H](n4cnc5 c(N)ncnc54",
1532
+ "OC [C@H]3O[C@@H](n4cnc5c(N)ncnc54",
1533
+ "OP(=O)([O-]) [O-])[C@@H](O)[C@H]1O",
1534
+ "OP(=O)([O-]) [O-])",
1535
+ "[C@H]( C",
1536
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-]) OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54",
1537
+ "[C@@H]3 O)[C@@H](O)[C@H]2O)",
1538
+ "CC/ C=C(",
1539
+ "C) CC/C=C(",
1540
+ "OC [C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)",
1541
+ "C 1",
1542
+ "C(=O)N CC",
1543
+ "c 2",
1544
+ "[C@H]( COP(=O)([O-])",
1545
+ "/ CC/C(C)=C",
1546
+ "[C@@H]( CC",
1547
+ "CC (C)",
1548
+ "CC [C@H]([NH3+])",
1549
+ "[n H]",
1550
+ ".[ NH3+]",
1551
+ "c(=O)[n H]1",
1552
+ ".O =C",
1553
+ "C [C@H](O)",
1554
+ ">> *",
1555
+ "* ]",
1556
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-]) OP(=O)([O-])[O-])[C@@H](O)[C@H]1O",
1557
+ "/CC/C(C)=C /CC/C(C)=C",
1558
+ "O [C@@H]1",
1559
+ "CC 1",
1560
+ "OP(=O)([O-]) O",
1561
+ "O [C@H]1",
1562
+ ">> O",
1563
+ "C S",
1564
+ "C(*)=O .",
1565
+ "Fe +",
1566
+ ".[ Fe+",
1567
+ "[n +]",
1568
+ "CC NC(=O)",
1569
+ "c(=O)[n H]",
1570
+ "C O)",
1571
+ ".O=P([O-])([O-]) O",
1572
+ "c1cc c",
1573
+ "OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O) [C@@H]1",
1574
+ "2 c(",
1575
+ "C1 =C",
1576
+ "[C@H]( OP(=O)([O-])[O-])",
1577
+ "( *)",
1578
+ "COP(=O)([O-])OP(=O)([O-]) OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1",
1579
+ "OP(=O)([O-]) [O-]",
1580
+ "CC C",
1581
+ "[O-])[C@@H](O) [C@H]2O)",
1582
+ ".O=P([O-])([O-]) OP(=O)([O-])O",
1583
+ "/ C)CC/C=C(",
1584
+ "(=O) [O-]",
1585
+ "Nc1nc 2c(",
1586
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54 )[C@H](O)",
1587
+ "OP (*)",
1588
+ "NC(=O) c1ccc",
1589
+ "Nc1ncnc2c1ncn2[C@@H]1O [C@H](COP(=O)([O-])OP(=O)([O-])",
1590
+ "C =C",
1591
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O) [C@@H]3O)[C@@H](O)[C@H]2O)",
1592
+ "NC(=O) C1=C",
1593
+ "NC(=O)C1=C N",
1594
+ "CCCC CCCC",
1595
+ "C= CC1",
1596
+ "NC(=O)c1ccc [n+]",
1597
+ "C OP(*)",
1598
+ ".O >>",
1599
+ ". C",
1600
+ "O[C@@H]1 COP(*)",
1601
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-]) [O-])[C@@H](O)[C@H]1O",
1602
+ "O[C@@H]1COP(*) (=O)[O-]",
1603
+ "n (",
1604
+ ") [C@H](OP(=O)([O-])[O-])",
1605
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54 )[C@H](OP(=O)([O-])[O-])",
1606
+ "[C@@H](O) [C@H](",
1607
+ "[C@H](C O)[C@@H](O)",
1608
+ "[ S",
1609
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-]) [C@@H]3O)[C@@H](O)[C@H]2O)",
1610
+ "/C)CC/C=C( /C)CC/C=C(",
1611
+ "= C(",
1612
+ "[C@@H](CC C(=O)[O-])",
1613
+ "[ Fe",
1614
+ "c( O)",
1615
+ ".O=C ([O-])",
1616
+ "C[C@H](O) [C@@H](",
1617
+ "[C@@H]( C)",
1618
+ "C(=O)NCC S",
1619
+ "C(=O)NCC C(=O)NCCS",
1620
+ "[ N",
1621
+ "[C@H](COP(=O)([O-]) [O-])[C@@H](O)[C@H]1O",
1622
+ "] .[Fe+",
1623
+ "CC(=O) [O-])",
1624
+ "*] )",
1625
+ "[C@H](O) [C@H](O)",
1626
+ "C)CC/C=C( \\",
1627
+ "C(=O)[O-]) C",
1628
+ "c c1",
1629
+ "n 1",
1630
+ "[N +]",
1631
+ "O[C@H]1 [C@@H](O)[C@H](",
1632
+ "C N",
1633
+ "[C@ ]",
1634
+ "c 3",
1635
+ ".[H+].[H+] .[H+].[H+]",
1636
+ "[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O) [C@@H]1O",
1637
+ "P (=O)([O-])",
1638
+ "O [C@H](CO)[C@@H](O)",
1639
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O >>",
1640
+ "/ C=C",
1641
+ "CC(=O) N",
1642
+ "OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O) [C@@H]1O",
1643
+ "[C@H]( C)",
1644
+ "c1 c",
1645
+ "C(*) =O)",
1646
+ "COP(=O)([O-]) [O-]",
1647
+ "*N[C@@H]( CS",
1648
+ "P(=O)([O-]) OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O",
1649
+ "[O-])[C@@H](O)[C@H]2O) c(=O)[nH]1",
1650
+ "N) =O)",
1651
+ "2 )",
1652
+ "C(=O)[O-] >>",
1653
+ "CC(C) (",
1654
+ "C(C) (C)",
1655
+ "COP(=O)([O-]) [O-])",
1656
+ "[S +]",
1657
+ "[S+] (",
1658
+ "OP(=O)([O-]) OC",
1659
+ "C O",
1660
+ "C(=O)[O-])C [C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O",
1661
+ "[S+]( CC[C@H]([NH3+])",
1662
+ "[S+](CC[C@H]([NH3+]) C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O",
1663
+ "( O)",
1664
+ "CC C(=O)[O-])",
1665
+ "1 *]",
1666
+ "c1 [nH]",
1667
+ "nc n2",
1668
+ "OP(=O)([O-]) [O-])[C@@H](O)",
1669
+ "C [C@H](",
1670
+ "Nc1nc2c( ncn2",
1671
+ "= P([O-])([O-])",
1672
+ "[C@H](O) C(C)(C)",
1673
+ "CCNC(=O) CCNC(=O)",
1674
+ "S CCNC(=O)CCNC(=O)",
1675
+ "SCCNC(=O)CCNC(=O) [C@H](O)C(C)(C)",
1676
+ ") C(*)=O.",
1677
+ "c( C)",
1678
+ "SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C) COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1",
1679
+ "C(=O)[O-] .",
1680
+ "C)CC/C=C(\\ C)CC/C=C(\\",
1681
+ "SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1 OP(=O)([O-])[O-]",
1682
+ "[Fe +]",
1683
+ "CC (",
1684
+ ".[ 1*]",
1685
+ "CC(C)( COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1",
1686
+ "[C@H](C S",
1687
+ "=C ([O-])",
1688
+ "[C@@ ]",
1689
+ "CC[C@H]([NH3+]) C(=O)",
1690
+ "[C@H](CS CC[C@H]([NH3+])C(=O)",
1691
+ "[C@H](CSCC[C@H]([NH3+])C(=O) [O-])[C@@H](O)[C@H]1O",
1692
+ "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-]) OP(=O)([O-])[O-])[C@@H](O)[C@H]1O",
1693
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O) c1",
1694
+ "NC(=O)c1ccc[n+] ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1",
1695
+ "OP(=O)([O-])[O-])[C@@H](O) C(=O)NCCC(=O)NCCS",
1696
+ "CC(C)(COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1 OP(=O)([O-])[O-])[C@@H](O)C(=O)NCCC(=O)NCCS",
1697
+ ".Nc1ncnc2c1ncn2[C@@H]1O [C@H](CSCC[C@H]([NH3+])C(=O)[O-])[C@@H](O)[C@H]1O",
1698
+ "\\ CC/C(C)=C",
1699
+ ".[H+] >>",
1700
+ "C c1c",
1701
+ "CC(=O) [O-]",
1702
+ ".C [S+](CC[C@H]([NH3+])C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O",
1703
+ "NC(=O)C1=CN ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)",
1704
+ "OP(=O)([O-]) OP(=O)([O-])OC",
1705
+ "CC(=O)N [C@H]1",
1706
+ "/CC/C(C)=C/CC/C(C)=C /CC/C(C)=C/CC/C(C)=C",
1707
+ "*N [C@H](",
1708
+ "O 1",
1709
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O) C=CC1",
1710
+ "=O) O[C@@H]1COP(*)(=O)[O-]",
1711
+ "C[C@H](O)[C@@H]( COP(=O)([O-])OP(=O)([O-])",
1712
+ ".O=P([O-])([O-])O .[H+]",
1713
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O .[H+]",
1714
+ ".Nc1ncnc2c1ncn2[C@@H]1O [C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O",
1715
+ "cc c(=O)[nH]",
1716
+ "O 2)",
1717
+ ") N",
1718
+ "*N[C@H]( C(*)=O)",
1719
+ "NC(=O)C1=CN ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)",
1720
+ "COP(=O)([O-]) O",
1721
+ "* O[C@H]1[C@@H](O)[C@H](",
1722
+ "[C@@H]2 O)",
1723
+ "*N[C@H](C(*)=O) [C@@H](C)",
1724
+ "c1 O",
1725
+ "NC(=O)c1ccc[n+] ([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)",
1726
+ "NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O) c1",
1727
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O) C=CC1",
1728
+ "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-]) [O-])[C@@H](O)[C@H]1O",
1729
+ "CC(C) =",
1730
+ "[C@@H]( [NH3+])",
1731
+ "\\CC/C(C)=C \\CC/C(C)=C",
1732
+ "O[C@H](CO)[C@@H](O) [C@@H]1O",
1733
+ "[C@H](O) [C@@H](O)",
1734
+ "Nc1nc2c( c(=O)[nH]1",
1735
+ "3 ].[Fe+",
1736
+ "/ C(C)=C",
1737
+ "[C@H]2 O[C@@H](n",
1738
+ "=O) [C@H](O)",
1739
+ "2 ].[Fe+",
1740
+ "[C@@H]2O [C@H](COP(=O)([O-])",
1741
+ "C(=O)N [C@@H]("
1742
+ ]
1743
+ }
1744
+ }
tokenizer_aa--ABPE_SMILES/tokenizer_ABPE_rexzyme_offset/tokenizer_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "bpe",
3
+ "tokenizer_class": "PreTrainedTokenizerFast",
4
+ "vocab_size": 732,
5
+ "bos_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "unk_token": "<unk>",
8
+ "pad_token": "<pad>",
9
+ "mask_token": "<mask>",
10
+ "special_tokens_map_file": "special_tokens_map.json"
11
+ }
tokenizer_aa--ABPE_SMILES/tokenizer_ABPE_rexzyme_offset/vocab.json ADDED
@@ -0,0 +1,733 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[Si": 896,
3
+ "[S@@": 834,
4
+ "[C@H](CO)[C@@H](O)": 450,
5
+ "[N@@+]": 717,
6
+ "C(=O)": 269,
7
+ "NH3+]": 307,
8
+ "%37": 670,
9
+ "%36": 669,
10
+ "/CC/C(C)=C": 388,
11
+ "[2*": 770,
12
+ "C[C@H](O)": 396,
13
+ "[AsH": 875,
14
+ "C(=O)[O-])": 326,
15
+ "K": 220,
16
+ "NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)c1": 569,
17
+ "[Fe": 456,
18
+ "[N@@H": 863,
19
+ "(": 192,
20
+ "COP(=O)([O-])[O-]": 489,
21
+ "[Na": 751,
22
+ "[SeH-]": 678,
23
+ "[In+": 855,
24
+ "[Co++]": 724,
25
+ ".[Fe+": 409,
26
+ "C)CC/C=C(\\": 469,
27
+ "[SeH]": 685,
28
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)": 434,
29
+ "OP(=O)([O-])[O-])[C@@H](O)C(=O)NCCC(=O)NCCS": 538,
30
+ "[Zn": 841,
31
+ "[Co-3]": 626,
32
+ "[H]": 590,
33
+ "[Fe-3]": 631,
34
+ "[O]": 683,
35
+ "Br": 613,
36
+ ".[H+]>>": 542,
37
+ "C(=O)NCC": 385,
38
+ "CC(": 526,
39
+ "[C@H](COP(=O)([O-])OP(=O)([O-])": 294,
40
+ "[Co+2]": 625,
41
+ "[N@H+]": 711,
42
+ "[Pb": 818,
43
+ "<unk>": 2,
44
+ "[7*]": 706,
45
+ "T": 228,
46
+ "[C@H](C)": 486,
47
+ "[n-": 750,
48
+ "[Fe-]": 638,
49
+ "N)": 293,
50
+ ".O>>": 441,
51
+ "[7*": 889,
52
+ "[Fe+4]": 694,
53
+ "[O+": 815,
54
+ "[C@@H](CC": 389,
55
+ "CCCCCCCC": 437,
56
+ "4": 203,
57
+ "[Cr+3]": 701,
58
+ "I": 219,
59
+ "[C@H]1": 345,
60
+ "[Na+]": 593,
61
+ "[Co+": 800,
62
+ "=O)[C@H](O)": 581,
63
+ "COP(=O)([O-])[O-])": 498,
64
+ "NC(=O)C1=CN": 436,
65
+ "[O-": 735,
66
+ "n1": 472,
67
+ ".": 197,
68
+ "2c(": 417,
69
+ "c1": 270,
70
+ ".O=": 327,
71
+ "[PH]": 674,
72
+ "[C": 250,
73
+ "[C@": 251,
74
+ "H+]": 289,
75
+ "[AsH2]": 699,
76
+ "[C-]": 609,
77
+ "[CH2-": 907,
78
+ "OP(=O)([O-])[O-]": 422,
79
+ "[Rb+]": 642,
80
+ "[Mn": 790,
81
+ "[Se-]": 719,
82
+ "C(": 275,
83
+ "[CH-]": 720,
84
+ "nc5": 320,
85
+ "[S+](CC[C@H]([NH3+])C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 505,
86
+ "[S@+": 860,
87
+ "[NH2+]": 591,
88
+ ")[C@H](OP(=O)([O-])[O-])": 447,
89
+ "|": 247,
90
+ "[C@H](CSCC[C@H]([NH3+])C(=O)[O-])[C@@H](O)[C@H]1O": 534,
91
+ "[3": 774,
92
+ "[B-]": 716,
93
+ "[Li": 882,
94
+ "[cH-]": 688,
95
+ "M": 222,
96
+ "n2c": 338,
97
+ "C(=O)[O-])C": 470,
98
+ "[S@]": 620,
99
+ "[C@H]3O[C@@H](n": 367,
100
+ "[Co+3": 905,
101
+ "[S@+]": 689,
102
+ ".O=C([O-])": 458,
103
+ "[Mg+": 813,
104
+ "%3": 830,
105
+ "[Mg+2": 821,
106
+ "c1c": 487,
107
+ "[c-]": 679,
108
+ "[3*]": 604,
109
+ "[Co+2": 801,
110
+ "Z": 230,
111
+ "\\CC/C(C)=C\\CC/C(C)=C": 574,
112
+ "%21": 654,
113
+ "[CH2+]": 731,
114
+ "[Mg]": 709,
115
+ ".[H+]": 290,
116
+ "CCCC": 316,
117
+ "[6": 890,
118
+ "%31": 664,
119
+ "[C@@]": 531,
120
+ "CC(C)(": 496,
121
+ "[S@@+": 835,
122
+ "[Se-": 903,
123
+ ".[NH3+]": 393,
124
+ "[Cr": 877,
125
+ "c1ncn2": 308,
126
+ "c1nc": 278,
127
+ "C(=O)[O-]>>": 495,
128
+ "[Ca": 771,
129
+ "O[C@H]1[C@@H](O)[C@H](": 474,
130
+ "[As": 807,
131
+ "OP": 263,
132
+ "[S+](": 500,
133
+ "[Rb": 825,
134
+ "i": 240,
135
+ "[W]": 713,
136
+ ")N": 560,
137
+ "[8": 886,
138
+ "[Co-4": 811,
139
+ "H": 218,
140
+ "O)": 249,
141
+ "[Mg": 781,
142
+ "%35": 668,
143
+ "[NH3+": 743,
144
+ "c(=O)[n": 343,
145
+ "CC(=O)N[C@H]1": 548,
146
+ "[3*": 775,
147
+ "c1[nH]": 509,
148
+ "P(=O)([O-])": 480,
149
+ "[Mn]": 702,
150
+ "O[C@@H]1COP(*)": 443,
151
+ "[Fe+": 757,
152
+ "[Cl-": 799,
153
+ "[K+": 755,
154
+ "C[C@H](O)[C@@H](": 459,
155
+ "[*:": 838,
156
+ "[Hg+]": 730,
157
+ "%19": 652,
158
+ "[AsH2": 876,
159
+ "[S--": 898,
160
+ "O[C@H](CO)[C@@H](O)[C@@H]1O": 575,
161
+ "[Sb": 857,
162
+ "[S": 451,
163
+ "COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1": 421,
164
+ "[4*": 777,
165
+ "[N@+": 745,
166
+ "H]1O": 271,
167
+ "[*": 837,
168
+ "[Cl+2": 866,
169
+ "[Fe]": 595,
170
+ "cc1": 471,
171
+ "l": 241,
172
+ "[C@H]1O[C@@H](": 346,
173
+ "NH": 299,
174
+ "[C-": 784,
175
+ "[NH+]": 601,
176
+ "-]": 253,
177
+ "W": 229,
178
+ "[PH": 836,
179
+ "C": 216,
180
+ "[Se]": 621,
181
+ "[9": 884,
182
+ "%22": 655,
183
+ "5": 204,
184
+ "[Fe+3": 786,
185
+ "[C@@H](CCC(=O)[O-])": 455,
186
+ "[Ni-2]": 695,
187
+ "[OH-]": 726,
188
+ "c(": 279,
189
+ "cc": 291,
190
+ "=O": 314,
191
+ "%11": 644,
192
+ "/C(C)=C": 579,
193
+ "P(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 491,
194
+ "[Co-": 763,
195
+ "*N[C@H](C(*)=O)": 561,
196
+ "3].[Fe+": 578,
197
+ "[7": 888,
198
+ "[Mo]": 641,
199
+ "ncn2": 510,
200
+ "[F-]": 615,
201
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O.[H+]": 556,
202
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 399,
203
+ "NC(=O)c1ccc[n+]": 439,
204
+ "[5": 892,
205
+ "[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)": 356,
206
+ ">>": 285,
207
+ "c(O)": 457,
208
+ "[Fe+2": 787,
209
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1": 536,
210
+ "[Cd+": 804,
211
+ "OP(=O)([O-])OC": 501,
212
+ "[Mg-2]": 608,
213
+ "SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)([O-])[O-]": 524,
214
+ "/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C/CC/C(C)=C": 549,
215
+ "CC1": 402,
216
+ "[CH2": 906,
217
+ "[Z": 840,
218
+ "([O-])([O-])": 334,
219
+ ".Nc1ncnc2c1ncn2[C@@H]1O": 329,
220
+ ">>*": 397,
221
+ "C)": 273,
222
+ "[Br-]": 629,
223
+ "%15": 648,
224
+ "</s>": 1,
225
+ "[*-": 852,
226
+ "[H": 737,
227
+ ")[C@H](O)": 323,
228
+ "[NH+": 768,
229
+ "[Mg+2]": 639,
230
+ "CC": 261,
231
+ "[n": 298,
232
+ "[S@@+]": 673,
233
+ "[Pb+2": 820,
234
+ "[n+]": 410,
235
+ "[nH+": 822,
236
+ "[N+": 753,
237
+ "[OH2+": 914,
238
+ "nc2c1ncn2": 309,
239
+ "OP(=O)([O-])[O-])": 377,
240
+ "#version:": 732,
241
+ "[1": 766,
242
+ "[Se": 797,
243
+ "H]2": 283,
244
+ "NC(=O)C1=C": 435,
245
+ "CO": 502,
246
+ "[C@]": 476,
247
+ "[B-": 899,
248
+ "(=O)[O-]": 427,
249
+ "[SeH-": 847,
250
+ "[C@@H]([NH3+])": 573,
251
+ "[K": 754,
252
+ "<s>": 185,
253
+ "CC(=O)": 337,
254
+ "[9*": 885,
255
+ "[cH": 858,
256
+ "OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 485,
257
+ "[O-])[C@@H](O)[C@H]2O)": 424,
258
+ "C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 503,
259
+ "[*-]": 682,
260
+ "[N@H": 894,
261
+ "[As]": 628,
262
+ "c(N)ncnc54": 373,
263
+ "[S-]": 618,
264
+ "n": 242,
265
+ "[": 231,
266
+ "<-": 710,
267
+ "CS": 406,
268
+ "*]": 398,
269
+ "O[C@H](CO)[C@@H](O)": 481,
270
+ "O[C@H]1": 404,
271
+ "[C@H](C": 378,
272
+ "[Cl+2]": 693,
273
+ "8": 207,
274
+ "COP(=O)([O-])": 276,
275
+ "/CC/C(C)=C/CC/C(C)=C": 400,
276
+ "R": 226,
277
+ "*])": 467,
278
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](CSCC[C@H]([NH3+])C(=O)[O-])[C@@H](O)[C@H]1O": 540,
279
+ "[Br-": 810,
280
+ "[Cu": 788,
281
+ "[Fe+2]": 612,
282
+ "[H+": 738,
283
+ "[C@H": 734,
284
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 557,
285
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O>>": 482,
286
+ "F": 217,
287
+ "[Mo": 823,
288
+ "[NH3+])": 340,
289
+ "[Cr+": 878,
290
+ "[S]": 623,
291
+ "Nc1ncnc2c1ncn2[C@@H]1O": 312,
292
+ "c(=O)": 325,
293
+ "d": 237,
294
+ "C=C": 433,
295
+ "(*)": 420,
296
+ "[C@@H": 736,
297
+ "[Cr+6": 879,
298
+ "[F-": 789,
299
+ "C1=C": 418,
300
+ "CC[C@H]([NH3+])C(=O)": 532,
301
+ "(=O)([O-])": 264,
302
+ "[I": 853,
303
+ "[C@H]1O[C@@H](n2cnc3c(N)ncnc32": 355,
304
+ "O)[C@@H](O)[C@H]2O)": 369,
305
+ "c(=O)[nH]": 412,
306
+ "O": 224,
307
+ "[C@@H]3": 359,
308
+ "CC(C)(COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1OP(=O)([O-])[O-])[C@@H](O)C(=O)NCCC(=O)NCCS": 539,
309
+ "N": 223,
310
+ "=C([O-])": 530,
311
+ "[Cl-]": 622,
312
+ "[N@@+": 900,
313
+ "[Rb+": 826,
314
+ "CC(=O)[O-]": 544,
315
+ "A": 214,
316
+ "[Co-2": 764,
317
+ "+": 195,
318
+ "C(=O)[O-].": 522,
319
+ "C)CC/C=C(\\C)CC/C=C(\\": 523,
320
+ "[Co+3]": 721,
321
+ "[Fe+4": 867,
322
+ "[Zn+2": 843,
323
+ "C(=O)N[C@@H](": 584,
324
+ "[C@H](CSCC[C@H]([NH3+])C(=O)": 533,
325
+ "c2": 386,
326
+ "[N-": 865,
327
+ "[Si]": 712,
328
+ "(O)": 506,
329
+ "].[Fe+": 465,
330
+ "b": 235,
331
+ "[CH]": 715,
332
+ ".O=P([O-])([O-])": 352,
333
+ "[F": 756,
334
+ "%28": 661,
335
+ "g": 239,
336
+ "%26": 659,
337
+ "C(C)": 313,
338
+ "[SeH": 846,
339
+ "n2cnc3": 342,
340
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])": 361,
341
+ "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 571,
342
+ "/C)CC/C=C(": 426,
343
+ "[C@@H]": 586,
344
+ "[Sb]": 687,
345
+ "[C@@H](C)": 460,
346
+ "[NH2-": 765,
347
+ "=C": 288,
348
+ "[nH+]": 640,
349
+ "[NH4+": 761,
350
+ "n2": 281,
351
+ "[C@H]2O[C@@H](n": 580,
352
+ "c(N)ncnc5": 372,
353
+ "9": 208,
354
+ ".O": 287,
355
+ "[C@@H]1": 333,
356
+ "[Fe-2]": 619,
357
+ "[Li+": 883,
358
+ "c(C)": 520,
359
+ "3": 202,
360
+ "CC(C)": 390,
361
+ "OP(=O)([O-])O": 403,
362
+ "[N@+]": 588,
363
+ "NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)c1": 537,
364
+ "[C@@H]2O": 330,
365
+ "[Cl+": 851,
366
+ ")C(*)=O.": 519,
367
+ "[Hg+2": 902,
368
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])": 362,
369
+ "[Ni-]": 725,
370
+ "CC(=O)[O-])": 466,
371
+ "*N[C@H](C(*)=O)[C@@H](C)": 566,
372
+ "CC[C@H]([NH3+])": 391,
373
+ "[C@@H](": 267,
374
+ "2].[Fe+": 582,
375
+ ">>O": 405,
376
+ "[Cd+2": 805,
377
+ "=": 211,
378
+ "[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 479,
379
+ "nc3": 295,
380
+ "[N@@H+]": 691,
381
+ ".O=P([O-])([O-])OP(=O)([O-])O": 425,
382
+ "%16": 649,
383
+ "COP(*)": 440,
384
+ "%10": 610,
385
+ "*": 194,
386
+ "NC(=O)": 318,
387
+ "[C@@": 259,
388
+ "[C@H]1O": 282,
389
+ "[CH": 827,
390
+ "CC(C)(COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1": 528,
391
+ "C[C@H](": 512,
392
+ "C(*)=O)": 488,
393
+ "[C@H](CS": 529,
394
+ "[Li+]": 703,
395
+ "[NH3+]": 331,
396
+ "/C)CC/C=C(/C)CC/C=C(": 453,
397
+ "[C@@H]1O": 286,
398
+ "C=": 322,
399
+ "H](": 257,
400
+ "[O-])[C@@H](O)[C@H]1O": 305,
401
+ "[Pb+": 819,
402
+ "[T": 873,
403
+ "[R": 824,
404
+ "[Te]": 698,
405
+ "[n+": 746,
406
+ "[H+]": 587,
407
+ "C[C@H](O)[C@@H](COP(=O)([O-])OP(=O)([O-])": 554,
408
+ "[o+": 779,
409
+ "n(": 446,
410
+ "nc2c1ncn2[C@@H]1O": 310,
411
+ "nc32": 339,
412
+ "7": 206,
413
+ "[Co": 762,
414
+ "[Ag+]": 677,
415
+ "SCCNC(=O)CCNC(=O)": 517,
416
+ ".C": 442,
417
+ "[C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])": 347,
418
+ "[Ni+2": 833,
419
+ "CCC(=O)[O-])": 507,
420
+ "[8*]": 705,
421
+ "[S@@]": 684,
422
+ "\\": 232,
423
+ "r": 244,
424
+ "(C)": 321,
425
+ "%20": 653,
426
+ "0.2": 733,
427
+ "CC/C(C)=C": 363,
428
+ "%2": 829,
429
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1": 570,
430
+ "%25": 658,
431
+ "C(*)=O.": 407,
432
+ "[B": 808,
433
+ "%14": 647,
434
+ "[Cl": 798,
435
+ "[Mg++]": 632,
436
+ "B": 215,
437
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])": 448,
438
+ "[CH+": 828,
439
+ "[CH-": 904,
440
+ "[Zn+2]": 676,
441
+ ".O=P([O-])([O-])O": 414,
442
+ "s": 245,
443
+ "P([O-])([O-])": 335,
444
+ "S": 227,
445
+ ".C[S+](CC[C@H]([NH3+])C(=O)[O-])C[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1O": 545,
446
+ "[Na+": 752,
447
+ "=O)O[C@@H]1COP(*)(=O)[O-]": 553,
448
+ "[Co-3": 802,
449
+ "/C=C": 483,
450
+ "SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)COP(=O)([O-])OP(=O)([O-])OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1": 521,
451
+ "COP(=O)([O-])O": 563,
452
+ "[Cd+2]": 627,
453
+ "c1O": 567,
454
+ "[9*]": 704,
455
+ "O2)": 559,
456
+ "0": 199,
457
+ "[Mn+3": 870,
458
+ "[O-]": 255,
459
+ "[OH-": 911,
460
+ "[P": 747,
461
+ "C(*)=O": 360,
462
+ "OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)": 383,
463
+ "[4*]": 605,
464
+ "[C@H](COP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 464,
465
+ "[Cd": 803,
466
+ "ccc(=O)[nH]": 558,
467
+ "[*:0]": 675,
468
+ "[W": 897,
469
+ "COP(=O)([O-])OP(=O)([O-])": 284,
470
+ "[N@H+": 895,
471
+ "Nc1nc2c(ncn2": 513,
472
+ "[NH2+": 749,
473
+ "[NH4+]": 597,
474
+ "[Br": 809,
475
+ "/": 198,
476
+ "[C@H]3O[C@@H](n4cnc5": 371,
477
+ "[OH2+]": 728,
478
+ "[o+]": 607,
479
+ "<": 210,
480
+ "OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 376,
481
+ "CC(=O)N": 484,
482
+ "[Ag+": 845,
483
+ "H]1": 268,
484
+ "Nc1nc": 297,
485
+ "[M": 780,
486
+ "[S+](CC[C@H]([NH3+])": 504,
487
+ "CN": 475,
488
+ "OP(=O)([O-])[O-])[C@@H](O)": 511,
489
+ "Nc1nc2c(c(=O)[nH]1": 577,
490
+ "[Ni+": 832,
491
+ "C1": 384,
492
+ "[C@H]2": 311,
493
+ "\\CC/C(C)=C": 541,
494
+ "%13": 646,
495
+ "c(=O)[nH]1": 394,
496
+ "[4": 776,
497
+ "[C@H]3O[C@@H](n4cnc5c(N)ncnc54": 374,
498
+ "CC/": 319,
499
+ "[Cr+6]": 700,
500
+ "[P+": 908,
501
+ "[C@@H](O)": 274,
502
+ "[I-]": 690,
503
+ "[N]": 633,
504
+ "%29": 662,
505
+ "*N[C@H](": 550,
506
+ "[Ni-2": 869,
507
+ "[S+": 740,
508
+ "[I-": 861,
509
+ ">": 212,
510
+ "%30": 663,
511
+ "[Mg-": 782,
512
+ "[Cu+": 816,
513
+ "[1*]": 600,
514
+ "4cnc5": 368,
515
+ "[O-])[C@@H](O)[C@H]2O)c(=O)[nH]1": 492,
516
+ "[cH-": 859,
517
+ "%24": 657,
518
+ ".[1*]": 527,
519
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)": 429,
520
+ "[Co++": 909,
521
+ ".O=C": 395,
522
+ "C(=O)NCCC(=O)NCCS": 462,
523
+ "<mask>": 189,
524
+ "OP(*)": 430,
525
+ "[Mn+2": 792,
526
+ "[P+]": 723,
527
+ "[PH2": 912,
528
+ "[Ca+2]": 603,
529
+ "2": 201,
530
+ "Cc1c": 543,
531
+ "*)": 304,
532
+ "1*]": 508,
533
+ "OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54": 375,
534
+ "2)": 494,
535
+ "%17": 650,
536
+ "=O)": 252,
537
+ "[Ni": 831,
538
+ "H]3": 302,
539
+ "[C@H](O)[C@H](O)": 468,
540
+ "[C@H]([NH3+])": 364,
541
+ ".[H+].[H+].[H+].[H+]": 478,
542
+ "([O-])": 260,
543
+ "[A": 806,
544
+ "c(N)nc": 303,
545
+ "*N[C@@H](CS": 490,
546
+ "[5*": 893,
547
+ "+]": 272,
548
+ ".O=P([O-])([O-])O.[H+]": 555,
549
+ "%1": 785,
550
+ "C(=O)NCCS": 461,
551
+ "O)[C@@H](O)": 328,
552
+ "[Mg++": 814,
553
+ "Fe": 370,
554
+ "[S-": 793,
555
+ "[N@": 744,
556
+ "c1cc": 341,
557
+ "O[C@@H](": 354,
558
+ "[Cl]": 729,
559
+ "1": 200,
560
+ "[c-": 849,
561
+ "[Cu+2": 817,
562
+ ".[": 277,
563
+ "%34": 667,
564
+ "[In": 854,
565
+ "[o": 778,
566
+ "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 535,
567
+ "n2cnc3c(N)ncnc32": 351,
568
+ "[Ca+2": 773,
569
+ "[Mn+": 791,
570
+ "[nH": 739,
571
+ "[Cs": 871,
572
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54": 379,
573
+ "4c": 366,
574
+ "<pad>": 0,
575
+ "P": 225,
576
+ "[Co-2]": 598,
577
+ "*N[C@@H](": 365,
578
+ "[8*": 887,
579
+ "[Fe-2": 795,
580
+ "c(N)": 300,
581
+ "[NH4": 760,
582
+ "[S+]": 499,
583
+ "[CH2-]": 722,
584
+ "CC/C=C(": 381,
585
+ "O1": 551,
586
+ "CCNC(=O)CCNC(=O)": 516,
587
+ "[C@H](O)C(C)(C)": 515,
588
+ "[Fe+3]": 611,
589
+ "[O-])": 258,
590
+ "[Te": 874,
591
+ "[K+]": 594,
592
+ "]": 233,
593
+ "c1ccc": 415,
594
+ "OP(=O)([O-])OP(=O)([O-])OC": 547,
595
+ "(=O)": 256,
596
+ "-": 196,
597
+ "L": 221,
598
+ "OP(=O)([O-])": 265,
599
+ "[Mg-2": 783,
600
+ ")": 193,
601
+ "%23": 656,
602
+ "[Hg+2]": 718,
603
+ "C=C(": 358,
604
+ "[C@@H]2O)": 565,
605
+ "N)=O)": 493,
606
+ "[Ag": 844,
607
+ "[Mn+2]": 616,
608
+ "a": 234,
609
+ "Fe+": 408,
610
+ "CC(C)=": 572,
611
+ "[Mn+3]": 696,
612
+ "C(*)": 336,
613
+ "NC(=O)c1ccc": 431,
614
+ "%12": 645,
615
+ ":": 209,
616
+ "@": 213,
617
+ "H]": 248,
618
+ "OC[C@H]1O[C@@H](n2cnc3c(N)ncnc32)[C@H](O)[C@@H]1": 416,
619
+ "%18": 651,
620
+ "[6*]": 707,
621
+ "*O[C@H]1[C@@H](O)[C@H](": 564,
622
+ "[CH2+": 915,
623
+ "C(=O)N": 324,
624
+ "6": 205,
625
+ "[C@@H](O)[C@H](": 449,
626
+ "[C@H](": 266,
627
+ ".[H+].[H+]": 353,
628
+ "O[C@@H]1COP(*)(=O)[O-]": 445,
629
+ "[C@@H]3O)[C@@H](O)[C@H]2O)": 380,
630
+ "CCC": 423,
631
+ "[Fe-": 794,
632
+ "[Fe-3": 812,
633
+ "[NH2-]": 599,
634
+ "[OH2": 913,
635
+ "[Ca+": 772,
636
+ "[PH2]": 727,
637
+ "[C@H](O)[C@@H](O)": 576,
638
+ "%27": 660,
639
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)": 562,
640
+ "C=CC1": 438,
641
+ "Cl": 606,
642
+ "OC": 296,
643
+ "[5*]": 708,
644
+ "[C@H](COP(=O)([O-])": 387,
645
+ "[nH]": 392,
646
+ "c": 236,
647
+ "[Cu+]": 636,
648
+ "c3": 477,
649
+ "C(C)(C)": 497,
650
+ "%38": 671,
651
+ "n2cnc3c(N)nc": 349,
652
+ "u": 246,
653
+ "C)CC/C=C(": 382,
654
+ "Nc1nc2c(": 428,
655
+ "[P]": 589,
656
+ "[2": 769,
657
+ "#": 190,
658
+ "[OH": 910,
659
+ "C(C)=C": 344,
660
+ "3+]": 306,
661
+ "[SH-]": 596,
662
+ "[*:0": 839,
663
+ "[Co+]": 624,
664
+ "%33": 666,
665
+ "O[C@@H]1": 401,
666
+ "[Hg+": 901,
667
+ "[Hg]": 680,
668
+ "Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])": 432,
669
+ "[6*": 891,
670
+ "[NH2": 748,
671
+ "%32": 665,
672
+ "[Cr+3": 880,
673
+ "[Ni+2]": 672,
674
+ "[Ni-": 868,
675
+ "[O+]": 634,
676
+ "[C@@H]2O[C@H](COP(=O)([O-])": 583,
677
+ "[Zn+": 842,
678
+ "[N": 463,
679
+ "nc": 262,
680
+ "[O-])[C@@H](O)": 292,
681
+ "[C@H](O)": 280,
682
+ "nc2": 301,
683
+ "[C@H]2O)": 332,
684
+ ".Nc1ncnc2c1ncn2[C@@H]1O[C@H](COP(=O)([O-])OP(=O)([O-])[O-])[C@@H](O)[C@H]1O": 444,
685
+ "[Cs+]": 697,
686
+ "[SH-": 759,
687
+ "[O": 254,
688
+ "=C(": 454,
689
+ "C(=O)[O-]": 315,
690
+ "[C@H]": 585,
691
+ "%": 191,
692
+ "([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)": 452,
693
+ "[Cu+2]": 635,
694
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)C=CC1": 552,
695
+ "NC(=O)c1ccc[n+]([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](OP(=O)([O-])[O-])[C@@H]3O)[C@@H](O)[C@H]2O)": 568,
696
+ "[Fe+]": 525,
697
+ "e": 238,
698
+ "[Cu]": 614,
699
+ "[1*": 767,
700
+ "SCCNC(=O)CCNC(=O)[C@H](O)C(C)(C)": 518,
701
+ "[CH+]": 643,
702
+ "CO)": 413,
703
+ "o": 243,
704
+ "*N": 350,
705
+ "NC(=O)C1=CN([C@@H]2O[C@H](COP(=O)([O-])OP(=O)([O-])OC[C@H]3O[C@@H](n4cnc5c(N)ncnc54)[C@H](O)[C@@H]3O)[C@@H](O)[C@H]2O)": 546,
706
+ "[Cl+]": 681,
707
+ "[2*]": 602,
708
+ "[In+3]": 686,
709
+ "[C@@H]2": 317,
710
+ "[SH]": 617,
711
+ "[Cs+": 872,
712
+ "[c": 848,
713
+ "[C@H](OP(=O)([O-])[O-])": 419,
714
+ "=P([O-])([O-])": 514,
715
+ "[N+]": 473,
716
+ "[N-]": 692,
717
+ "[Co-4]": 630,
718
+ "[N@@": 862,
719
+ "[NH": 741,
720
+ "[NH3": 742,
721
+ "[Hg": 850,
722
+ "O[C@@H](n": 357,
723
+ "[Pb+2]": 637,
724
+ "[C@H]3": 348,
725
+ "[n-]": 592,
726
+ "[In+3": 856,
727
+ "[L": 881,
728
+ "[S--]": 714,
729
+ "CCNC(=O)": 411,
730
+ "[S@": 796,
731
+ "[N@@H+": 864,
732
+ "[SH": 758
733
+ }
tokenizer_aa--ABPE_SMILES/tokenizer_aa/special_tokens_map.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
+ }
tokenizer_aa--ABPE_SMILES/tokenizer_aa/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_aa--ABPE_SMILES/tokenizer_aa/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "8": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "9": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "10": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "11": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "12": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "13": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "14": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "15": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "16": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "17": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "18": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "19": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "20": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "21": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "22": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "23": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "24": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "25": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "26": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "27": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "28": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "29": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "30": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "31": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "33": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "34": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "35": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "36": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "37": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "38": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "39": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "40": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "41": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "42": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "43": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "44": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "45": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "46": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "47": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "48": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "49": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "50": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "51": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "52": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "53": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "54": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "55": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "56": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "57": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "58": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "59": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "60": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "61": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "62": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "63": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "64": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "65": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "66": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "67": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "68": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "69": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "70": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "71": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "72": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "73": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "74": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "75": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "76": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "77": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "78": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "79": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "80": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "81": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "82": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "83": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "84": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "85": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "86": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "87": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "88": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "89": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "90": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "91": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "92": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "93": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "94": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "95": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "96": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "97": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "98": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "99": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "100": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "101": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "102": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": false,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 1000000000000000019884624838656,
934
+ "pad_token": "<pad>",
935
+ "sp_model_kwargs": {},
936
+ "tokenizer_class": "T5Tokenizer",
937
+ "unk_token": "<unk>"
938
+ }