gbyuvd commited on
Commit
a3e71f1
·
verified ·
1 Parent(s): a8868fc

First commit

Browse files
Files changed (2) hide show
  1. FastChemTokenizer.py +322 -0
  2. smitok/vocab.json +1240 -0
FastChemTokenizer.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import json
3
+ import os
4
+ from typing import List, Union, Optional, Tuple
5
+ from transformers.tokenization_utils_base import BatchEncoding
6
+ from functools import lru_cache
7
+
8
+ # Copyright 2025 Genta Pramillean Bayu (@gbyuvd)
9
+ #
10
+ # Licensed under the Apache License, Version 2.0 (the "License");
11
+ # you may not use this file except in compliance with the License.
12
+ # You may obtain a copy of the License at
13
+ #
14
+ # http://www.apache.org/licenses/LICENSE-2.0
15
+ #
16
+ # Unless required by applicable law or agreed to in writing, software
17
+ # distributed under the License is distributed on an "AS IS" BASIS,
18
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ # See the License for the specific language governing permissions and
20
+ # limitations under the License.
21
+
22
+ class TrieNode:
23
+ __slots__ = ['children', 'token_id']
24
+ def __init__(self):
25
+ self.children = {}
26
+ self.token_id = None # If set, this node completes a valid token
27
+
28
+
29
+ class FastChemTokenizer:
30
+ def __init__(self, token_to_id, model_max_length=512):
31
+ self.token_to_id = token_to_id
32
+ self.id_to_token = {v: k for k, v in token_to_id.items()}
33
+ # No more self.token_set — replaced by trie
34
+ self.model_max_length = model_max_length
35
+
36
+ # Precompute max token length for possible use & clarity
37
+ self.max_token_len = max(len(t) for t in token_to_id.keys())
38
+
39
+ # Build trie for fast longest-match lookup
40
+ self.trie_root = self._build_trie(token_to_id)
41
+
42
+ # Validate required special tokens
43
+ required_special_tokens = ["<s>", "</s>", "<pad>", "<unk>", "<mask>"]
44
+ for tok in required_special_tokens:
45
+ if tok not in token_to_id:
46
+ raise KeyError(f"Required special token '{tok}' not found in vocab.")
47
+
48
+ # Special token IDs
49
+ self.bos_token_id = token_to_id["<s>"]
50
+ self.eos_token_id = token_to_id["</s>"]
51
+ self.pad_token_id = token_to_id["<pad>"]
52
+ self.unk_token_id = token_to_id["<unk>"]
53
+ self.mask_token_id = token_to_id["<mask>"]
54
+
55
+ # Special tokens for convenience
56
+ self.bos_token = "<s>"
57
+ self.eos_token = "</s>"
58
+ self.pad_token = "<pad>"
59
+ self.unk_token = "<unk>"
60
+ self.mask_token = "<mask>"
61
+
62
+ def _build_trie(self, token_to_id):
63
+ root = TrieNode()
64
+ for token, tid in token_to_id.items():
65
+ node = root
66
+ for char in token:
67
+ if char not in node.children:
68
+ node.children[char] = TrieNode()
69
+ node = node.children[char]
70
+ node.token_id = tid
71
+ return root
72
+
73
+ def __len__(self):
74
+ """Return vocab size — REQUIRED for HF compatibility."""
75
+ return len(self.token_to_id)
76
+
77
+ def __call__(self, text: Union[str, List[str]], text_pair: Optional[Union[str, List[str]]] = None, **kwargs) -> BatchEncoding:
78
+ if isinstance(text, list):
79
+ batch = [(t, p) if p is not None else t for t, p in zip(text, text_pair)] if text_pair else text
80
+ return self.batch_encode_plus(batch, **kwargs)
81
+ else:
82
+ return self.encode_plus(text=text, text_pair=text_pair, **kwargs)
83
+
84
+ @lru_cache(maxsize=10000)
85
+ def _cached_encode_str(self, s: str) -> Tuple[int, ...]:
86
+ return tuple(self._encode_core(s))
87
+
88
+ def _encode_core(self, text: str) -> List[int]:
89
+ """Core encoding logic using Trie — no caching."""
90
+ tokens = text
91
+ result_ids = []
92
+ i = 0
93
+ n = len(tokens)
94
+
95
+ while i < n:
96
+ node = self.trie_root
97
+ j = i
98
+ last_match_id = None
99
+ last_match_end = i
100
+
101
+ # Traverse trie while characters match
102
+ while j < n and tokens[j] in node.children:
103
+ node = node.children[tokens[j]]
104
+ j += 1
105
+ if node.token_id is not None:
106
+ last_match_id = node.token_id
107
+ last_match_end = j # Remember end of valid token
108
+
109
+ if last_match_id is not None:
110
+ result_ids.append(last_match_id)
111
+ i = last_match_end
112
+ else:
113
+ # Fallback: encode single char
114
+ tok = tokens[i]
115
+ result_ids.append(self.token_to_id.get(tok, self.unk_token_id))
116
+ i += 1
117
+
118
+ return result_ids
119
+
120
+ def encode(self, text: str) -> List[int]:
121
+ """Public encode method — strips input and uses cache."""
122
+ return list(self._cached_encode_str(text.strip()))
123
+
124
+ def decode(self, token_ids: Union[List[int], torch.Tensor], skip_special_tokens: bool = False) -> str:
125
+ if isinstance(token_ids, torch.Tensor):
126
+ token_ids = token_ids.tolist()
127
+
128
+ if skip_special_tokens:
129
+ special_ids = {
130
+ self.bos_token_id,
131
+ self.eos_token_id,
132
+ self.pad_token_id,
133
+ self.mask_token_id,
134
+ }
135
+ else:
136
+ special_ids = set()
137
+
138
+ tokens = []
139
+ for tid in token_ids:
140
+ if tid in special_ids:
141
+ continue
142
+ token = self.id_to_token.get(tid, self.unk_token)
143
+ tokens.append(token)
144
+
145
+ return "".join(tokens)
146
+
147
+ def decode_with_trace(self, token_ids: List[int]) -> None:
148
+ print(f"\n🔍 Decoding {len(token_ids)} tokens:")
149
+ for i, tid in enumerate(token_ids):
150
+ token = self.id_to_token.get(tid, self.unk_token)
151
+ print(f" [{i:03d}] ID={tid:5d} → '{token}'")
152
+
153
+ def convert_ids_to_tokens(self, ids: List[int]) -> List[str]:
154
+ return [self.id_to_token.get(i, self.unk_token) for i in ids]
155
+
156
+ def convert_tokens_to_ids(self, tokens: List[str]) -> List[int]:
157
+ return [self.token_to_id.get(t, self.unk_token_id) for t in tokens]
158
+
159
+ def encode_plus(
160
+ self,
161
+ text: str,
162
+ text_pair: Optional[str] = None,
163
+ add_special_tokens: bool = True,
164
+ padding: Union[bool, str] = False,
165
+ truncation: bool = False,
166
+ max_length: Optional[int] = None,
167
+ return_tensors: Optional[str] = None,
168
+ return_attention_mask: bool = True,
169
+ return_token_type_ids: bool = True,
170
+ ) -> BatchEncoding:
171
+ if max_length is None:
172
+ max_length = self.model_max_length
173
+
174
+ ids_a = self.encode(text)
175
+
176
+ if text_pair is not None:
177
+ ids_b = self.encode(text_pair)
178
+ else:
179
+ ids_b = None
180
+
181
+ input_ids = []
182
+ token_type_ids = []
183
+
184
+ if add_special_tokens:
185
+ input_ids.append(self.bos_token_id)
186
+ token_type_ids.append(0)
187
+ if ids_b is not None:
188
+ input_ids.extend(ids_a)
189
+ token_type_ids.extend([0] * len(ids_a))
190
+ input_ids.append(self.eos_token_id)
191
+ token_type_ids.append(0)
192
+
193
+ input_ids.extend(ids_b)
194
+ token_type_ids.extend([1] * len(ids_b))
195
+ input_ids.append(self.eos_token_id)
196
+ token_type_ids.append(1)
197
+ else:
198
+ input_ids.extend(ids_a)
199
+ token_type_ids.extend([0] * len(ids_a))
200
+ input_ids.append(self.eos_token_id)
201
+ token_type_ids.append(0)
202
+ else:
203
+ input_ids = ids_a
204
+ token_type_ids = [0] * len(input_ids)
205
+ if ids_b is not None:
206
+ input_ids.extend(ids_b)
207
+ token_type_ids.extend([1] * len(ids_b))
208
+
209
+ if truncation and len(input_ids) > max_length:
210
+ input_ids = input_ids[:max_length]
211
+ token_type_ids = token_type_ids[:max_length]
212
+
213
+ if padding:
214
+ pad_len = max_length - len(input_ids)
215
+ if pad_len > 0:
216
+ input_ids.extend([self.pad_token_id] * pad_len)
217
+ token_type_ids.extend([0] * pad_len)
218
+
219
+ attention_mask = [1 if tid != self.pad_token_id else 0 for tid in input_ids]
220
+
221
+ encoded_dict = {
222
+ "input_ids": input_ids,
223
+ "attention_mask": attention_mask,
224
+ }
225
+ if return_token_type_ids:
226
+ encoded_dict["token_type_ids"] = token_type_ids
227
+
228
+ if return_tensors == "pt":
229
+ output = {}
230
+ for k, v in encoded_dict.items():
231
+ tensor = torch.tensor(v, dtype=torch.long) # Fixed: use torch.tensor, not as_tensor
232
+ if tensor.ndim == 1:
233
+ tensor = tensor.unsqueeze(0)
234
+ output[k] = tensor
235
+ else:
236
+ output = encoded_dict
237
+
238
+ return BatchEncoding(output, tensor_type=return_tensors)
239
+
240
+ def batch_encode_plus(
241
+ self,
242
+ batch_text_or_text_pairs: List[Union[str, Tuple[str, str]]],
243
+ **kwargs
244
+ ) -> BatchEncoding:
245
+ all_input_ids = []
246
+ all_attention_masks = []
247
+ all_token_type_ids = []
248
+
249
+ for item in batch_text_or_text_pairs:
250
+ if isinstance(item, tuple):
251
+ text, text_pair = item
252
+ else:
253
+ text, text_pair = item, None
254
+
255
+ encoded = self.encode_plus(
256
+ text=text,
257
+ text_pair=text_pair,
258
+ **kwargs
259
+ )
260
+ all_input_ids.append(encoded["input_ids"])
261
+ all_attention_masks.append(encoded["attention_mask"])
262
+ if "token_type_ids" in encoded:
263
+ all_token_type_ids.append(encoded["token_type_ids"])
264
+
265
+ batched = {
266
+ "input_ids": all_input_ids,
267
+ "attention_mask": all_attention_masks,
268
+ }
269
+ if all_token_type_ids:
270
+ batched["token_type_ids"] = all_token_type_ids
271
+
272
+ if kwargs.get("return_tensors") == "pt":
273
+ def to_tensor_list(lst):
274
+ # Use torch.tensor for safety — avoids "copy construct from tensor" warning
275
+ return [torch.tensor(item, dtype=torch.long) for item in lst]
276
+
277
+ batched = {
278
+ k: torch.nn.utils.rnn.pad_sequence(
279
+ to_tensor_list(v),
280
+ batch_first=True,
281
+ padding_value=self.pad_token_id if k == "input_ids" else 0
282
+ )
283
+ for k, v in batched.items()
284
+ }
285
+
286
+ return BatchEncoding(batched, tensor_type=kwargs.get("return_tensors"))
287
+
288
+ # Save vocab to directory
289
+ def save_pretrained(self, save_directory: str):
290
+ """
291
+ Save tokenizer vocab as `vocab.json` in target directory.
292
+ Mimics Hugging Face convention.
293
+ """
294
+ if not os.path.exists(save_directory):
295
+ os.makedirs(save_directory)
296
+
297
+ vocab_file = os.path.join(save_directory, "vocab.json")
298
+
299
+ # Keys are strings, values are ints — JSON-safe
300
+ with open(vocab_file, "w", encoding="utf-8") as f:
301
+ json.dump(self.token_to_id, f, ensure_ascii=False, indent=2)
302
+
303
+ print(f"✅ Tokenizer vocab saved to: {vocab_file}")
304
+
305
+ # Load from pretrained directory
306
+ @classmethod
307
+ def from_pretrained(cls, pretrained_directory: str, model_max_length=512):
308
+ """
309
+ Load tokenizer from directory containing `vocab.json`.
310
+ """
311
+ vocab_file = os.path.join(pretrained_directory, "vocab.json")
312
+
313
+ if not os.path.exists(vocab_file):
314
+ raise FileNotFoundError(f"Vocab file not found: {vocab_file}")
315
+
316
+ with open(vocab_file, "r", encoding="utf-8") as f:
317
+ token_to_id = json.load(f)
318
+
319
+ # Convert keys to str (JSON loads as str anyway), values to int
320
+ token_to_id = {str(k): int(v) for k, v in token_to_id.items()}
321
+
322
+ return cls(token_to_id=token_to_id, model_max_length=model_max_length)
smitok/vocab.json ADDED
@@ -0,0 +1,1240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<s>": 0,
3
+ "<pad>": 1,
4
+ "</s>": 2,
5
+ "<unk>": 3,
6
+ "<mask>": 4,
7
+ "cccc([N+](=O)[O-])": 5,
8
+ "ccc([N+](=O)[O-])": 6,
9
+ "([N+](=O)[O-])cc": 7,
10
+ "cc([N+](=O)[O-])": 8,
11
+ ")[C@H](CCCNC(=N": 9,
12
+ ")[C@H](CCCCN)NC": 10,
13
+ "[CH][CH][CH][CH": 11,
14
+ "C@@H](CCCNC(=N)": 12,
15
+ "c([N+](=O)[O-])": 13,
16
+ "](CCCNC(=N)N)NC": 14,
17
+ "N[C@@H](CCCNC(=": 15,
18
+ "[C@@H](CCCNC(=N": 16,
19
+ "O)[C@H](CCCNC(=": 17,
20
+ "@@H](CCCNC(=N)N": 18,
21
+ "([N+](=O)[O-])c": 19,
22
+ "COc1ccc(-c2coc3": 20,
23
+ "(=O)[C@H](CCCNC": 21,
24
+ "[C@@H](Cc1ccccc": 22,
25
+ "1ccc([N+](=O)[O": 23,
26
+ "(=O)[C@H](CCCCN": 24,
27
+ "N+](=O)[O-])cc1": 25,
28
+ "H](CCCNC(=N)N)": 26,
29
+ "c[nH]c2ccccc12": 27,
30
+ "N+](=O)[O-])cc": 28,
31
+ ")N[C@@H](CCCNC": 29,
32
+ "c[nH]c3ccccc23": 30,
33
+ ")N[C@@H](CCCCN": 31,
34
+ "@H](CCCNC(=N)N": 32,
35
+ "([N+](=O)[O-])": 33,
36
+ "COc1ccc(NC(=O)": 34,
37
+ "][CH][CH][CH][": 35,
38
+ "N[C@@H](CCCCN)": 36,
39
+ "O=[N+]([O-])c1": 37,
40
+ "C@H](CCCNC(=N)": 38,
41
+ "CH][CH][CH][C]": 39,
42
+ "COc1ccc(-c2ccc": 40,
43
+ "[nH]c2ccccc12)": 41,
44
+ "](CO)[C@@H](O": 42,
45
+ "COc1ccc(-c2cc": 43,
46
+ "ccc(S(=O)(=O)": 44,
47
+ "C@@H](NC(=O)[": 45,
48
+ "=[N+]([O-])c1": 46,
49
+ "H](CO)[C@@H](": 47,
50
+ "NC(=O)[C@@H](": 48,
51
+ "COc1ccc(C(=O)": 49,
52
+ "H](CCCNC(=N)N": 50,
53
+ "N[C@@H](CCCNC": 51,
54
+ "(=O)[C@@H](NC": 52,
55
+ "1ccc(S(=O)(=O": 53,
56
+ "O)[C@@H](NC(=": 54,
57
+ "[CH][CH][CH][": 55,
58
+ "COc1ccc2[nH]c": 56,
59
+ "COc1ccc(/C=C/": 57,
60
+ "[nH]c2ccccc12": 58,
61
+ ")[C@@H](NC(=O": 59,
62
+ "CO)[C@@H](O)[": 60,
63
+ "1c[nH]c2ccccc": 61,
64
+ "N+](=O)[O-])c": 62,
65
+ "@@H](NC(=O)[C": 63,
66
+ "](CCCNC(=N)N)": 64,
67
+ "COc1ccc(-c2nc": 65,
68
+ "COc1cc(C(=O)": 66,
69
+ "(=O)[C@H](Cc": 67,
70
+ "COc1ccc2[nH]": 68,
71
+ "N+](=O)[O-])": 69,
72
+ "COc1ccc(/C=C": 70,
73
+ "CH][CH][CH][": 71,
74
+ "COc1ccc(OC)c": 72,
75
+ "NC(=O)[C@H](": 73,
76
+ "COC(=O)[C@@H": 74,
77
+ "COC(=O)c1ccc": 75,
78
+ "CC(=O)Nc1ccc": 76,
79
+ "COc1cc(/C=C/": 77,
80
+ "H](O)[C@@H](": 78,
81
+ ")[C@@H](O)[C": 79,
82
+ "(=O)[C@H](CC": 80,
83
+ "COc1cccc(-c2": 81,
84
+ "@H](CO)[C@@H": 82,
85
+ "@H](NC(=O)[C": 83,
86
+ "CCCCCCCCCCCC": 84,
87
+ "COc1cc2c(cc1": 85,
88
+ ")NC(=O)[C@@H": 86,
89
+ "CCOC(=O)c1cc": 87,
90
+ "CC(=O)OC1CC2": 88,
91
+ "C@H](CO)[C@@": 89,
92
+ "Cc1ccc(C(=O)": 90,
93
+ "COc1ccc2c(c1": 91,
94
+ "@@H](O)[C@@H": 92,
95
+ "NC(=O)[C@@H]": 93,
96
+ ".[Br-].[Br-]": 94,
97
+ "C@H](NC(=O)[": 95,
98
+ ")[C@@H](NC(=": 96,
99
+ "](NC(=O)[C@H": 97,
100
+ "H](O)[C@H](": 98,
101
+ "](NC(=O)[C@": 99,
102
+ "[C@H](NC(=O": 100,
103
+ "[CH][CH][CH": 101,
104
+ ")NC(=O)[C@@": 102,
105
+ "C@H](O)[C@@": 103,
106
+ "@H](O)[C@@H": 104,
107
+ "COc1ccc(-c2": 105,
108
+ "ccc(C(F)(F)": 106,
109
+ "O=C(Nc1ccc(": 107,
110
+ ")N[C@@H](CC": 108,
111
+ ")[C@H](O)[C": 109,
112
+ ")N[C@@H](Cc": 110,
113
+ "COC(=O)c1cc": 111,
114
+ "C@@H](O)[C@": 112,
115
+ "O)[C@H](Cc1": 113,
116
+ "(=O)[C@@H](": 114,
117
+ "COc1ccc(-n2": 115,
118
+ "O)[C@H](O)[": 116,
119
+ ")NC(=O)[C@H": 117,
120
+ "COc1ccc(/C=": 118,
121
+ "+](=O)[O-])": 119,
122
+ "Cc1cc(C(=O)": 120,
123
+ "COC(=O)[C@H": 121,
124
+ "[C@H](CO)[C": 122,
125
+ "](O)[C@H](O": 123,
126
+ "COc1cc(/C=C": 124,
127
+ "H](Cc1c[nH]": 125,
128
+ "CCS(=O)(=O)": 126,
129
+ "(S(=O)(=O)N": 127,
130
+ "C[C@]12CC[C": 128,
131
+ "COc1cc(OC)c": 129,
132
+ "O=C(Nc1cccc": 130,
133
+ "CCOC(=O)c1c": 131,
134
+ "H](O)[C@@H]": 132,
135
+ "N[C@@H](CC(": 133,
136
+ "O[C@H](CO)[": 134,
137
+ "C@@H](NC(=O": 135,
138
+ "O)[C@H](CC(": 136,
139
+ ")[C@@H](O)[": 137,
140
+ "CC(=O)OC1CC": 138,
141
+ "COc1ccccc1C": 139,
142
+ "@@H](O)[C@H": 140,
143
+ "C=CC(=O)Nc1": 141,
144
+ "N[C@@H](Cc1": 142,
145
+ "O=C(COc1ccc": 143,
146
+ "(=O)N[C@@H": 144,
147
+ "COc1cc(/C=": 145,
148
+ "CC(C)(C)c1": 146,
149
+ "Na+].[Na+]": 147,
150
+ "(=O)[C@H](": 148,
151
+ "C@H](C(=O)": 149,
152
+ "Cc1c[nH]c2": 150,
153
+ "a+].[Na+].": 151,
154
+ "](O)[C@H](": 152,
155
+ "O=S(=O)(c1": 153,
156
+ "C[C@@H]1CN": 154,
157
+ "COc1cc2c(c": 155,
158
+ "COc1cc(OC)": 156,
159
+ "[C@H](C(=O": 157,
160
+ "CS(=O)(=O)": 158,
161
+ "@@H](O)[C@": 159,
162
+ ")[C@H](Cc1": 160,
163
+ "O)N[C@@H](": 161,
164
+ "NC(=O)[C@H": 162,
165
+ "C(=O)N[C@@": 163,
166
+ "C@H](NC(=O": 164,
167
+ "O=C(Nc1ccc": 165,
168
+ "S(=O)(=O)c": 166,
169
+ "COc1ccc2nc": 167,
170
+ "CC(C)[C@@H": 168,
171
+ "COc1ccc(C=": 169,
172
+ "COc1ccccc1": 170,
173
+ "+].[Na+].[": 171,
174
+ ")NC(=O)[C@": 172,
175
+ "CCOC(=O)[C": 173,
176
+ "COc1ccc(CC": 174,
177
+ "(=O)[C@@H]": 175,
178
+ "COc1ccc(C2": 176,
179
+ "(F)(F)F)cc": 177,
180
+ "COc1cc(O)c": 178,
181
+ "C@H](O)[C@": 179,
182
+ "CC(=O)c1c(": 180,
183
+ "COc1cccc(-": 181,
184
+ "N[C@H](C(=": 182,
185
+ "(S(=O)(=O)": 183,
186
+ "CN(C)c1ccc": 184,
187
+ "H](O)[C@H]": 185,
188
+ "COc1ccc([C": 186,
189
+ "CC(C)=CCc1": 187,
190
+ "CCOC(=O)C1": 188,
191
+ "Cc1ccc(-c2": 189,
192
+ "O=c1cc(-c2": 190,
193
+ "Nc1ncnc2c1": 191,
194
+ "COc1cccc(C": 192,
195
+ "COc1cc(-c2": 193,
196
+ "NC(=O)[C@@": 194,
197
+ "CC(=O)OCC1": 195,
198
+ "COc1ccc(CN": 196,
199
+ "COc1ccc(N2": 197,
200
+ "CCOC(=O)c1": 198,
201
+ "S(=O)(=O)N": 199,
202
+ "O=C(c1ccc(": 200,
203
+ "OC(CO)C(O)": 201,
204
+ "[C@H](NC(=": 202,
205
+ "CC(=O)OC1C": 203,
206
+ "CC(C)c1ccc": 204,
207
+ "@H](O)[C@H": 205,
208
+ "O)[C@H](Cc": 206,
209
+ "FC(F)(F)c1": 207,
210
+ "O)[C@H](CC": 208,
211
+ "N[C@@H](CC": 209,
212
+ "[C@@H](Cc1": 210,
213
+ "COc1ccc(-": 211,
214
+ "CC(=O)Oc1": 212,
215
+ "O)[C@@H]1": 213,
216
+ "Cc1cccc(C": 214,
217
+ "COC(=O)CC": 215,
218
+ "Cc1cc(=O)": 216,
219
+ "O)N[C@H](": 217,
220
+ "CCOc1ccc(": 218,
221
+ "N+](=O)[O": 219,
222
+ "(F)(F)F)c": 220,
223
+ "N#Cc1cccc": 221,
224
+ "[C@@H](Cc": 222,
225
+ "COc1cccc2": 223,
226
+ "COC(=O)C(": 224,
227
+ "(=O)N[C@H": 225,
228
+ ")NC(=O)[C": 226,
229
+ "COC(=O)[C": 227,
230
+ "O=C(c1ccc": 228,
231
+ "COc1ccc(N": 229,
232
+ ")[C@H](Cc": 230,
233
+ ")C(=O)N[C": 231,
234
+ "CC(C)(C)C": 232,
235
+ "CNC(=O)c1": 233,
236
+ ")C(F)(F)F": 234,
237
+ "CC(C)[C@H": 235,
238
+ "Cc1ccccc1": 236,
239
+ "CC(=O)N[C": 237,
240
+ ")N[C@@H](": 238,
241
+ "@H](NC(=O": 239,
242
+ "CC1(C)CCC": 240,
243
+ "C=C1C(=O)": 241,
244
+ ")[C@@H](O": 242,
245
+ "ccc(Cl)cc": 243,
246
+ "COC(=O)C1": 244,
247
+ "COC(=O)c1": 245,
248
+ "CC(=O)OC1": 246,
249
+ "COc1ccc(C": 247,
250
+ "COc1cc(C2": 248,
251
+ "S(=O)(=O)": 249,
252
+ "Cc1cc(C)c": 250,
253
+ "CCOC(=O)C": 251,
254
+ "COc1cccc(": 252,
255
+ "Cc1cc(O)c": 253,
256
+ "O)[C@@H](": 254,
257
+ "F)(F)F)cc": 255,
258
+ "COc1cc(O)": 256,
259
+ "C(=O)N[C@": 257,
260
+ "CC(=O)OCC": 258,
261
+ "(=O)N[C@@": 259,
262
+ "(C(F)(F)F": 260,
263
+ "[Na+].[Na": 261,
264
+ "CC(=O)Nc1": 262,
265
+ "C(F)(F)F)": 263,
266
+ "a+].[Na+]": 264,
267
+ "@H](O)[C@": 265,
268
+ "](O)[C@H]": 266,
269
+ "(=O)(=O)c": 267,
270
+ "O=c1[nH]c": 268,
271
+ "COc1cc2c(": 269,
272
+ "COc1ccc2c": 270,
273
+ ".[Na+].[N": 271,
274
+ ")N[C@H](C": 272,
275
+ "CC(=O)O[C": 273,
276
+ "O=C(COc1": 274,
277
+ "(=O)(=O)": 275,
278
+ "O=C1NC(=": 276,
279
+ "C=C(C)C1": 277,
280
+ "C[C@@H](": 278,
281
+ ")N[C@H](": 279,
282
+ "O=C(O)c1": 280,
283
+ "Cc1cc(O)": 281,
284
+ "CC(=O)NC": 282,
285
+ "CC(=O)N1": 283,
286
+ "Cc1cc2c(": 284,
287
+ "[C@@H](C": 285,
288
+ "(=O)N[C@": 286,
289
+ "COC(=O)N": 287,
290
+ ")[C@@H](": 288,
291
+ "COc1cc(C": 289,
292
+ "CCOC(=O)": 290,
293
+ "O=C(NCc1": 291,
294
+ "CCCC(=O)": 292,
295
+ "CCOc1ccc": 293,
296
+ "O=C(CCc1": 294,
297
+ "O=C(O)CC": 295,
298
+ "O=C(O)[C": 296,
299
+ "N#Cc1ccc": 297,
300
+ "CN1C(=O)": 298,
301
+ "COC(=O)C": 299,
302
+ "CCc1ccc(": 300,
303
+ "COc1cccc": 301,
304
+ ".[Na+].[": 302,
305
+ "(C(F)(F)": 303,
306
+ "O=C(CSc1": 304,
307
+ "Cc1ccc(C": 305,
308
+ "Cc1cccc(": 306,
309
+ "Cc1cc(C)": 307,
310
+ "(O)C(O)C": 308,
311
+ "Cc1ccc(N": 309,
312
+ "CC(=O)c1": 310,
313
+ "COc1ccc(": 311,
314
+ "C=CC(=O)": 312,
315
+ "CCNC(=O)": 313,
316
+ "CCC(=O)N": 314,
317
+ "COc1c2c(": 315,
318
+ "CC1CCC2(": 316,
319
+ "C(=O)N[C": 317,
320
+ "(=O)[C@H": 318,
321
+ "O)N[C@@H": 319,
322
+ ")[C@@H]1": 320,
323
+ "O=C(c1cc": 321,
324
+ "CCCCCCCC": 322,
325
+ "COc1ccc2": 323,
326
+ "CC(C)(C)": 324,
327
+ "C(F)(F)F": 325,
328
+ "NC(=O)c1": 326,
329
+ "F)(F)F)c": 327,
330
+ "CC(=O)OC": 328,
331
+ ")[C@H](O": 329,
332
+ "O)[C@H](": 330,
333
+ "(F)(F)F)": 331,
334
+ "CC(C)C[C": 332,
335
+ "Cc1ccc(-": 333,
336
+ "O=S(=O)(": 334,
337
+ "C[C@@H]1": 335,
338
+ "O)[C@@H]": 336,
339
+ "COc1c(O)": 337,
340
+ "H](O)[C@": 338,
341
+ "CC1(C)CC": 339,
342
+ "CC1(C)[C": 340,
343
+ "(NC(=O)": 341,
344
+ "CC[C@@H": 342,
345
+ "S(=O)(=": 343,
346
+ "C[C@H](": 344,
347
+ "CCCc1cc": 345,
348
+ "CCN(CC)": 346,
349
+ "C@@H](C": 347,
350
+ "a+].[Na": 348,
351
+ "CCC(=O)": 349,
352
+ "CC(=O)N": 350,
353
+ "[C@@H](": 351,
354
+ "(=O)N[C": 352,
355
+ "Clc1ccc": 353,
356
+ "CC(C)CN": 354,
357
+ "CCCCCCC": 355,
358
+ "CC1CCC2": 356,
359
+ "Cc1cc(N": 357,
360
+ "Cc1nc(N": 358,
361
+ "CC(C)c1": 359,
362
+ "O)N[C@@": 360,
363
+ "C(F)(F)": 361,
364
+ "C=C1CCC": 362,
365
+ "2ccccc2": 363,
366
+ "CC(C)CC": 364,
367
+ "O=C1Nc2": 365,
368
+ "O=C(NC1": 366,
369
+ "C[C@H]1": 367,
370
+ "[C@H](C": 368,
371
+ "(F)(F)F": 369,
372
+ ")[C@H](": 370,
373
+ ")N[C@@H": 371,
374
+ "F)(F)F)": 372,
375
+ "CCOc1cc": 373,
376
+ "Cc1ccc2": 374,
377
+ "CCC(C)C": 375,
378
+ "C(=O)N[": 376,
379
+ "CC1=C(C": 377,
380
+ "Cc1cccc": 378,
381
+ "COc1cc2": 379,
382
+ "Cc1cc(C": 380,
383
+ "COC(=O)": 381,
384
+ "O=C(NCC": 382,
385
+ "O=C(Cc1": 383,
386
+ ")C(=O)N": 384,
387
+ "C=C(C)C": 385,
388
+ "O)C(O)C": 386,
389
+ "O=C(Nc1": 387,
390
+ "CN1CCN(": 388,
391
+ "O=C(N[C": 389,
392
+ "Cc1ccc(": 390,
393
+ "C[C@]12": 391,
394
+ "CN(C)c1": 392,
395
+ ")[C@@H]": 393,
396
+ "CCc1ccc": 394,
397
+ "(=O)[C@": 395,
398
+ "CC1(C)C": 396,
399
+ "O=C(CN1": 397,
400
+ "COc1cc(": 398,
401
+ "N[C@H](": 399,
402
+ "COc1ccc": 400,
403
+ "O=C1C(=": 401,
404
+ "CC12CCC": 402,
405
+ "O=C(O)C": 403,
406
+ "[C@@H]2": 404,
407
+ "CNC(=O)": 405,
408
+ "O)N[C@H": 406,
409
+ "O)[C@H]": 407,
410
+ "1ccccc1": 408,
411
+ "[C@@H]1": 409,
412
+ "CC(C)[C": 410,
413
+ "CC(C)C(": 411,
414
+ "COc1c(": 412,
415
+ ")C(=O)": 413,
416
+ "CC(C)N": 414,
417
+ "O)(=O)": 415,
418
+ "CC1CCC": 416,
419
+ "F)(F)F": 417,
420
+ "[C@H]2": 418,
421
+ "C=C(C)": 419,
422
+ "[C@@H]": 420,
423
+ "CC(C)C": 421,
424
+ "CCc1nc": 422,
425
+ "CC(C)=": 423,
426
+ "CCCCCC": 424,
427
+ "C@H](C": 425,
428
+ "CCCCc1": 426,
429
+ "Cc1nc2": 427,
430
+ "Cc1nnc": 428,
431
+ "CCc1cc": 429,
432
+ "c1ccc(": 430,
433
+ "O=C1C=": 431,
434
+ "CC1OC(": 432,
435
+ "Fc1ccc": 433,
436
+ "(=O)c1": 434,
437
+ "C@@H]2": 435,
438
+ "C@H](O": 436,
439
+ "C[C@@H": 437,
440
+ "NC(=O)": 438,
441
+ "CCCCN(": 439,
442
+ "C=CCc1": 440,
443
+ "Cc1oc2": 441,
444
+ "O)[C@@": 442,
445
+ "C=C1CC": 443,
446
+ "CC[C@H": 444,
447
+ ")N[C@@": 445,
448
+ ")[C@H]": 446,
449
+ "CC(C)(": 447,
450
+ "(C)(C)": 448,
451
+ "O=C(O)": 449,
452
+ "(=O)[C": 450,
453
+ "Cc1cc(": 451,
454
+ "C[C@@]": 452,
455
+ "N=C(N)": 453,
456
+ "Cc1cc2": 454,
457
+ "N#Cc1c": 455,
458
+ "C@@H]1": 456,
459
+ "(F)(F)": 457,
460
+ "O=C1c2": 458,
461
+ "(C(=O)": 459,
462
+ "CN(Cc1": 460,
463
+ "C(=O)N": 461,
464
+ "CCCC[C": 462,
465
+ "O=c1cc": 463,
466
+ "Cc1nc(": 464,
467
+ "O=C(NC": 465,
468
+ "CC(=O)": 466,
469
+ "CN(C)C": 467,
470
+ ")C(O)C": 468,
471
+ ")(F)F)": 469,
472
+ "O=C(c1": 470,
473
+ "[C@H](": 471,
474
+ "C@@H](": 472,
475
+ "Nc1ccc": 473,
476
+ "COc1cc": 474,
477
+ "O=C(CC": 475,
478
+ "CC1(C)": 476,
479
+ ")NC(=O": 477,
480
+ "Cc1ccc": 478,
481
+ "O=c1c2": 479,
482
+ "CC1=CC": 480,
483
+ "O=C1NC": 481,
484
+ "Oc1ccc": 482,
485
+ "[C@H]1": 483,
486
+ "(=O)N[": 484,
487
+ "Cc1c(C": 485,
488
+ "O=C1CC": 486,
489
+ "O=C(O": 487,
490
+ "N[C@H": 488,
491
+ "c1ccc": 489,
492
+ "CCN(C": 490,
493
+ "CC(O)": 491,
494
+ ")(F)F": 492,
495
+ "@@H]1": 493,
496
+ "COCCN": 494,
497
+ "O=C(C": 495,
498
+ "CCCc1": 496,
499
+ "CC1CC": 497,
500
+ "Cn1nc": 498,
501
+ "CN1CC": 499,
502
+ "Cc1oc": 500,
503
+ "(=O)N": 501,
504
+ "O=C(N": 502,
505
+ "@@H](": 503,
506
+ "Cc1cn": 504,
507
+ "C@@H]": 505,
508
+ "C=C1C": 506,
509
+ "O=c1c": 507,
510
+ "CCn1c": 508,
511
+ "COC1=": 509,
512
+ "[C@H]": 510,
513
+ "CC1(C": 511,
514
+ "Cc1cc": 512,
515
+ "CCC[C": 513,
516
+ "Cc1c(": 514,
517
+ "CCC(C": 515,
518
+ "C(=O)": 516,
519
+ "C[C@H": 517,
520
+ "C@H](": 518,
521
+ "F)(F)": 519,
522
+ ")[C@@": 520,
523
+ "[nH]c": 521,
524
+ "C/C(=": 522,
525
+ "C[C@]": 523,
526
+ "C(O)C": 524,
527
+ "C=CC1": 525,
528
+ "CN(C)": 526,
529
+ "Cc1nn": 527,
530
+ "CCCN(": 528,
531
+ "N#Cc1": 529,
532
+ "CC1=C": 530,
533
+ "ccccc": 531,
534
+ "(=O)c": 532,
535
+ "COc1c": 533,
536
+ "Cn1cc": 534,
537
+ "C=C[C": 535,
538
+ "Cc1nc": 536,
539
+ "CC(C)": 537,
540
+ "CCOc1": 538,
541
+ "CCCCC": 539,
542
+ "O)[C@": 540,
543
+ "C@H]1": 541,
544
+ "1ccc(": 542,
545
+ "(=O)O": 543,
546
+ "(=O)C": 544,
547
+ "Nc1nc": 545,
548
+ "C=C(C": 546,
549
+ "CCCCN": 547,
550
+ "C/C=C": 548,
551
+ "(=O)[": 549,
552
+ "O)N[C": 550,
553
+ "C@H]2": 551,
554
+ "COC[C": 552,
555
+ "O=C([": 553,
556
+ "C=CCN": 554,
557
+ "CCC1(": 555,
558
+ ")C(O)": 556,
559
+ "N#CC1": 557,
560
+ "O=C1C": 558,
561
+ "COC1C": 559,
562
+ "COCC": 560,
563
+ "COC1": 561,
564
+ "O=C1": 562,
565
+ "COc1": 563,
566
+ "CC(C": 564,
567
+ "(Cl)": 565,
568
+ "CCC(": 566,
569
+ "CN(C": 567,
570
+ "[nH]": 568,
571
+ ")cc1": 569,
572
+ "OC[C": 570,
573
+ "O)[C": 571,
574
+ "@H]1": 572,
575
+ "O)C(": 573,
576
+ "CN1C": 574,
577
+ "(C)C": 575,
578
+ ")[C@": 576,
579
+ "1ccc": 577,
580
+ "C=CC": 578,
581
+ "C=C1": 579,
582
+ "CSc1": 580,
583
+ "@@H]": 581,
584
+ "CO[C": 582,
585
+ "CC1=": 583,
586
+ "CCCC": 584,
587
+ "(O)C": 585,
588
+ "O)c1": 586,
589
+ "CCn1": 587,
590
+ "[C@@": 588,
591
+ "ccc(": 589,
592
+ "CCN1": 590,
593
+ "CC/C": 591,
594
+ "O=C(": 592,
595
+ "C@H]": 593,
596
+ "O)N[": 594,
597
+ "Cn1c": 595,
598
+ "CCc1": 596,
599
+ "CCC1": 597,
600
+ "CC12": 598,
601
+ "CN[C": 599,
602
+ "CC1C": 600,
603
+ "CC=C": 601,
604
+ "C=C(": 602,
605
+ "CCCN": 603,
606
+ "Cc1c": 604,
607
+ "CC1(": 605,
608
+ "@H](": 606,
609
+ "CNc1": 607,
610
+ "CNCC": 608,
611
+ "O=c1": 609,
612
+ "Clc1": 610,
613
+ "N#CC": 611,
614
+ "(=O)": 612,
615
+ ")N[C": 613,
616
+ "C(O)": 614,
617
+ "CC(O": 615,
618
+ ")C(=": 616,
619
+ "CC[C": 617,
620
+ "CCN(": 618,
621
+ "c1cc": 619,
622
+ "(C(=": 620,
623
+ "nH]c": 621,
624
+ "(-c2": 622,
625
+ "N[C": 623,
626
+ "O=C": 624,
627
+ "cc(": 625,
628
+ "CCN": 626,
629
+ ")[C": 627,
630
+ "O)N": 628,
631
+ ")c2": 629,
632
+ "=C(": 630,
633
+ "O)[": 631,
634
+ "(-c": 632,
635
+ ")CC": 633,
636
+ "(O)": 634,
637
+ "[C@": 635,
638
+ "C@@": 636,
639
+ "@H]": 637,
640
+ "H]1": 638,
641
+ ")C(": 639,
642
+ "cc1": 640,
643
+ "C)C": 641,
644
+ "C/C": 642,
645
+ "O=S": 643,
646
+ "Cc1": 644,
647
+ "CCC": 645,
648
+ "CC=": 646,
649
+ "NC(": 647,
650
+ "CNC": 648,
651
+ "C[C": 649,
652
+ "nH]": 650,
653
+ "C(C": 651,
654
+ "N#C": 652,
655
+ "CO/": 653,
656
+ ")c(": 654,
657
+ "ccc": 655,
658
+ "H](": 656,
659
+ ")cc": 657,
660
+ "O)c": 658,
661
+ "(C)": 659,
662
+ ")c1": 660,
663
+ "O)C": 661,
664
+ "COC": 662,
665
+ "CC(": 663,
666
+ "O[C": 664,
667
+ "C=C": 665,
668
+ "CN(": 666,
669
+ "Cl.": 667,
670
+ "Fc1": 668,
671
+ "CC1": 669,
672
+ "Nc1": 670,
673
+ "CCO": 671,
674
+ "Cn1": 672,
675
+ "Oc1": 673,
676
+ "CN1": 674,
677
+ "C[N": 675,
678
+ "C@": 676,
679
+ "H]": 677,
680
+ ")[": 678,
681
+ "OC": 679,
682
+ "CC": 680,
683
+ "C[": 681,
684
+ "O=": 682,
685
+ "CO": 683,
686
+ "NC": 684,
687
+ "3)": 685,
688
+ "=C": 686,
689
+ "@@": 687,
690
+ "F)": 688,
691
+ "c3": 689,
692
+ ")N": 690,
693
+ "](": 691,
694
+ "(O": 692,
695
+ "c(": 693,
696
+ "c1": 694,
697
+ "N#": 695,
698
+ "CN": 696,
699
+ "C=": 697,
700
+ "2)": 698,
701
+ "[C": 699,
702
+ ")C": 700,
703
+ "(C": 701,
704
+ "(=": 702,
705
+ "@H": 703,
706
+ "c2": 704,
707
+ "C(": 705,
708
+ "cc": 706,
709
+ "C)": 707,
710
+ ")c": 708,
711
+ "O)": 709,
712
+ "C#": 710,
713
+ "2c": 711,
714
+ "@": 712,
715
+ "]": 713,
716
+ "4": 714,
717
+ "F": 715,
718
+ "H": 716,
719
+ "=": 717,
720
+ "2": 718,
721
+ "1": 719,
722
+ "(": 720,
723
+ "3": 721,
724
+ ")": 722,
725
+ "N": 723,
726
+ "[": 724,
727
+ "C": 725,
728
+ "CNN": 726,
729
+ "CCCCO": 727,
730
+ "NCCOc": 728,
731
+ "ccncn": 729,
732
+ "CCNCC": 730,
733
+ "COCCCCN": 731,
734
+ "IC": 732,
735
+ "COCCOCCNC": 733,
736
+ "CCCCCn": 734,
737
+ "CCCNCC": 735,
738
+ "COCCSc": 736,
739
+ "(-[": 737,
740
+ "CCCCCc": 738,
741
+ "OCCOCC": 739,
742
+ "CSCC": 740,
743
+ "OCCS": 741,
744
+ ")(=": 742,
745
+ "Sc": 743,
746
+ "OCCN": 744,
747
+ "NCCCNC": 745,
748
+ "NCCSc": 746,
749
+ "nsc": 747,
750
+ "OCCSCC": 748,
751
+ "([": 749,
752
+ ")=": 750,
753
+ "SCC": 751,
754
+ "35": 752,
755
+ "NCCCc": 753,
756
+ "-])=": 754,
757
+ "OCCOCCS": 755,
758
+ "CCOCCNc": 756,
759
+ "OCCNc": 757,
760
+ "]([": 758,
761
+ "ccoc": 759,
762
+ "SCCCOc": 760,
763
+ "CCCCCCCCCCC": 761,
764
+ "CSc": 762,
765
+ "42": 763,
766
+ "Cc": 764,
767
+ "scnc": 765,
768
+ "+][": 766,
769
+ "ssc": 767,
770
+ "CSCCNC": 768,
771
+ "Br": 769,
772
+ "NCCCS": 770,
773
+ "ncoc": 771,
774
+ "CSCc": 772,
775
+ "31": 773,
776
+ "A": 774,
777
+ "COCCOCCN": 775,
778
+ "OCOC": 776,
779
+ "OP": 777,
780
+ "I": 778,
781
+ "cnccc": 779,
782
+ "OCCCCN": 780,
783
+ "21": 781,
784
+ "ccsc": 782,
785
+ "NCCSC": 783,
786
+ "nncs": 784,
787
+ "SCCNC": 785,
788
+ "COc": 786,
789
+ "CCCCCSc": 787,
790
+ "CCCl": 788,
791
+ "CCOCCSc": 789,
792
+ "nnn": 790,
793
+ "ncnn": 791,
794
+ "OCc": 792,
795
+ "OCCCc": 793,
796
+ "+]=[": 794,
797
+ "ClCC": 795,
798
+ "NCCS": 796,
799
+ "cccn": 797,
800
+ "nn": 798,
801
+ "CCCCCOc": 799,
802
+ "COCCCN": 800,
803
+ "NCCCCn": 801,
804
+ "CCCCSC": 802,
805
+ "NCCO": 803,
806
+ "]/": 804,
807
+ "CSC": 805,
808
+ "B": 806,
809
+ "CCCS": 807,
810
+ "@@]([": 808,
811
+ "CCCSCC": 809,
812
+ "6": 810,
813
+ "nscc": 811,
814
+ "M": 812,
815
+ "p": 813,
816
+ "COCCOCC": 814,
817
+ "NCN": 815,
818
+ "cs": 816,
819
+ "OCCOCCOCCOCCO": 817,
820
+ "NCC": 818,
821
+ "CCOCCCC": 819,
822
+ "COCCCC": 820,
823
+ "Nc": 821,
824
+ "NCCC": 822,
825
+ "nsnc": 823,
826
+ "CCCOc": 824,
827
+ "OCCSCc": 825,
828
+ "K": 826,
829
+ "ncncc": 827,
830
+ "@]": 828,
831
+ "COCCCNc": 829,
832
+ "CSCCc": 830,
833
+ "CS": 831,
834
+ "CCCCCO": 832,
835
+ "COCCOc": 833,
836
+ "CCNCc": 834,
837
+ "onc": 835,
838
+ "e": 836,
839
+ "CCOCCCNc": 837,
840
+ "OCn": 838,
841
+ "+]=": 839,
842
+ "CCOCCCc": 840,
843
+ "NCCNC": 841,
844
+ "CNn": 842,
845
+ "Nn": 843,
846
+ "CCOCCS": 844,
847
+ "SCCN": 845,
848
+ "CCCCl": 846,
849
+ "cncs": 847,
850
+ "CONC": 848,
851
+ "CCOCCNC": 849,
852
+ "nnnn": 850,
853
+ "R": 851,
854
+ "ncc": 852,
855
+ "CCCNc": 853,
856
+ "-])(": 854,
857
+ "CCCCNS": 855,
858
+ "o": 856,
859
+ "SCCSc": 857,
860
+ "CSCCOc": 858,
861
+ "9": 859,
862
+ "NCCCNS": 860,
863
+ "+]": 861,
864
+ "COCCc": 862,
865
+ "CNCCc": 863,
866
+ "COn": 864,
867
+ "Z": 865,
868
+ "CCl": 866,
869
+ "coc": 867,
870
+ "CCCSc": 868,
871
+ "NOC": 869,
872
+ "NS": 870,
873
+ ")([": 871,
874
+ "CCBr": 872,
875
+ "5": 873,
876
+ "nncc": 874,
877
+ "CI": 875,
878
+ "OCN": 876,
879
+ "CCCCCS": 877,
880
+ "NCCCCOc": 878,
881
+ "CCCSCc": 879,
882
+ "CSCCCNC": 880,
883
+ "COCCS": 881,
884
+ "CSCCCCNC": 882,
885
+ "BrCC": 883,
886
+ "(/": 884,
887
+ "CCNC": 885,
888
+ "On": 886,
889
+ "NCCc": 887,
890
+ "+": 888,
891
+ "#": 889,
892
+ "\\[": 890,
893
+ "CSCCCC": 891,
894
+ "CCOc": 892,
895
+ "NSC": 893,
896
+ "SH": 894,
897
+ "@](=": 895,
898
+ "-][": 896,
899
+ "COCCCOc": 897,
900
+ "ccnc": 898,
901
+ "CCCCSc": 899,
902
+ "FCCC": 900,
903
+ "CSCCN": 901,
904
+ "cncc": 902,
905
+ "SCCS": 903,
906
+ "sccc": 904,
907
+ "CSCCCN": 905,
908
+ "Cn": 906,
909
+ "nccs": 907,
910
+ "CSCCC": 908,
911
+ "OCCCO": 909,
912
+ "SCCCO": 910,
913
+ "COCOc": 911,
914
+ "OCCNC": 912,
915
+ "CCOCCOCC": 913,
916
+ "NCCSCC": 914,
917
+ "ccncc": 915,
918
+ "NO": 916,
919
+ "OCCBr": 917,
920
+ "+])(": 918,
921
+ "snnc": 919,
922
+ "SCCO": 920,
923
+ "g": 921,
924
+ "8": 922,
925
+ ")(": 923,
926
+ "ON": 924,
927
+ "ClCCCSc": 925,
928
+ "(-": 926,
929
+ "OCCOC": 927,
930
+ "CCCCCCN": 928,
931
+ "NNc": 929,
932
+ "CSCCOC": 930,
933
+ "NOCC": 931,
934
+ "occc": 932,
935
+ "nH": 933,
936
+ "COCCCn": 934,
937
+ "/[": 935,
938
+ "CCSc": 936,
939
+ "cccnc": 937,
940
+ "NCCOCCO": 938,
941
+ "ccs": 939,
942
+ "ncco": 940,
943
+ "CCOCCOC": 941,
944
+ "ONC": 942,
945
+ "l": 943,
946
+ "ccno": 944,
947
+ "SCCc": 945,
948
+ "nncn": 946,
949
+ "OCCSC": 947,
950
+ "csc": 948,
951
+ "T": 949,
952
+ "-])/": 950,
953
+ "CCCSC": 951,
954
+ "COCCOC": 952,
955
+ "nccn": 953,
956
+ "OCC": 954,
957
+ "r": 955,
958
+ "ccnnc": 956,
959
+ "][": 957,
960
+ "SCCOC": 958,
961
+ "CCOC": 959,
962
+ "nccc": 960,
963
+ "CCCCOCc": 961,
964
+ "SCCCS": 962,
965
+ "CCSCCOC": 963,
966
+ "ccnn": 964,
967
+ "NCc": 965,
968
+ "CCCCOC": 966,
969
+ "t": 967,
970
+ "CCOCCCn": 968,
971
+ "312": 969,
972
+ "OH": 970,
973
+ "cnnc": 971,
974
+ "CCCCn": 972,
975
+ "NCCNc": 973,
976
+ "CCSCCOc": 974,
977
+ "CCOCCOc": 975,
978
+ "+]\\": 976,
979
+ "OS": 977,
980
+ "43": 978,
981
+ "CCCF": 979,
982
+ "45": 980,
983
+ "34": 981,
984
+ "13": 982,
985
+ "COCCO": 983,
986
+ "NCCCCN": 984,
987
+ "nnccc": 985,
988
+ "SCc": 986,
989
+ "CH": 987,
990
+ "CCCNCc": 988,
991
+ "+](/[": 989,
992
+ "SCn": 990,
993
+ "](/": 991,
994
+ "OCCF": 992,
995
+ "cncnc": 993,
996
+ "snc": 994,
997
+ "OCCCC": 995,
998
+ "OCCCn": 996,
999
+ "Brc": 997,
1000
+ "ncn": 998,
1001
+ "csnn": 999,
1002
+ "SCCOc": 1000,
1003
+ "+](=": 1001,
1004
+ "CCOCCN": 1002,
1005
+ "+](-": 1003,
1006
+ "sccn": 1004,
1007
+ "CCCCS": 1005,
1008
+ "scc": 1006,
1009
+ "nc": 1007,
1010
+ "cncn": 1008,
1011
+ "+](/": 1009,
1012
+ "CNc": 1010,
1013
+ "CCCCCCO": 1011,
1014
+ "%": 1012,
1015
+ "BrCCC": 1013,
1016
+ "-]/": 1014,
1017
+ "NNN": 1015,
1018
+ "@@](=": 1016,
1019
+ "-])[": 1017,
1020
+ "NOCc": 1018,
1021
+ "CCCOCc": 1019,
1022
+ "ClC": 1020,
1023
+ "cnccn": 1021,
1024
+ "nnco": 1022,
1025
+ "CCCOC": 1023,
1026
+ "CSCCO": 1024,
1027
+ "NCCCn": 1025,
1028
+ "L": 1026,
1029
+ ")(/": 1027,
1030
+ "OCCCNC": 1028,
1031
+ "occ": 1029,
1032
+ "CCCNS": 1030,
1033
+ "OCCc": 1031,
1034
+ "41": 1032,
1035
+ "-[": 1033,
1036
+ "O": 1034,
1037
+ "OCCCCC": 1035,
1038
+ "CCc": 1036,
1039
+ "COCCCS": 1037,
1040
+ "CCSCC": 1038,
1041
+ "NCCCOC": 1039,
1042
+ "CCCBr": 1040,
1043
+ "CCCCNC": 1041,
1044
+ "NCCCSC": 1042,
1045
+ "COCCNS": 1043,
1046
+ "nccnc": 1044,
1047
+ "SCCn": 1045,
1048
+ "NOc": 1046,
1049
+ "@](": 1047,
1050
+ "OCCCl": 1048,
1051
+ "=[": 1049,
1052
+ "COCCCCC": 1050,
1053
+ "CON": 1051,
1054
+ "0": 1052,
1055
+ ")/[": 1053,
1056
+ "ccn": 1054,
1057
+ "CCCc": 1055,
1058
+ "a": 1056,
1059
+ "NN": 1057,
1060
+ "on": 1058,
1061
+ ")/": 1059,
1062
+ "+]/": 1060,
1063
+ "ocnc": 1061,
1064
+ "cnco": 1062,
1065
+ "CNS": 1063,
1066
+ "nnsc": 1064,
1067
+ ")=[": 1065,
1068
+ "CCSCc": 1066,
1069
+ "-]=[": 1067,
1070
+ "-": 1068,
1071
+ "CCNc": 1069,
1072
+ "CCCn": 1070,
1073
+ "conc": 1071,
1074
+ "CCOCc": 1072,
1075
+ "ssnc": 1073,
1076
+ "NCCCOCC": 1074,
1077
+ "OCCCS": 1075,
1078
+ "oc": 1076,
1079
+ "S": 1077,
1080
+ "CCONC": 1078,
1081
+ "CCCCOc": 1079,
1082
+ "CCCOCC": 1080,
1083
+ "+])": 1081,
1084
+ "CBr": 1082,
1085
+ "BrCc": 1083,
1086
+ "SCCC": 1084,
1087
+ "CCOCCn": 1085,
1088
+ "CCn": 1086,
1089
+ "NNC": 1087,
1090
+ "CCOP": 1088,
1091
+ "CCOCCCNC": 1089,
1092
+ "co": 1090,
1093
+ "NCCCSc": 1091,
1094
+ "cnsn": 1092,
1095
+ "COCCNc": 1093,
1096
+ "OCCSc": 1094,
1097
+ "COCO": 1095,
1098
+ "COS": 1096,
1099
+ "NCCCCC": 1097,
1100
+ ".": 1098,
1101
+ "CCCCCCS": 1099,
1102
+ "CSCCCNc": 1100,
1103
+ "-])([": 1101,
1104
+ ")-": 1102,
1105
+ "NCCCO": 1103,
1106
+ "b": 1104,
1107
+ "NNS": 1105,
1108
+ "CSCCn": 1106,
1109
+ "sn": 1107,
1110
+ "cnns": 1108,
1111
+ "G": 1109,
1112
+ "COCCn": 1110,
1113
+ "CCS": 1111,
1114
+ "OCCCSC": 1112,
1115
+ "CCCCCOC": 1113,
1116
+ "NCCOCC": 1114,
1117
+ "OCCNS": 1115,
1118
+ "CCOCCO": 1116,
1119
+ "cscc": 1117,
1120
+ "23": 1118,
1121
+ "cnsc": 1119,
1122
+ "(\\": 1120,
1123
+ "@@]": 1121,
1124
+ "COCCCNC": 1122,
1125
+ "OCO": 1123,
1126
+ "CCOCCC": 1124,
1127
+ "FC": 1125,
1128
+ "ccon": 1126,
1129
+ "-])=[": 1127,
1130
+ "OCCCOc": 1128,
1131
+ "OCCCSc": 1129,
1132
+ "c": 1130,
1133
+ "CCCCCNC": 1131,
1134
+ "cnnn": 1132,
1135
+ "OCCn": 1133,
1136
+ "CNCc": 1134,
1137
+ "cccs": 1135,
1138
+ "SC": 1136,
1139
+ "24": 1137,
1140
+ "OCCCN": 1138,
1141
+ "NCCCN": 1139,
1142
+ "-])": 1140,
1143
+ "n": 1141,
1144
+ "cnn": 1142,
1145
+ "OCCOc": 1143,
1146
+ "NCCCOc": 1144,
1147
+ "CCSC": 1145,
1148
+ "OCCCNc": 1146,
1149
+ "+]([": 1147,
1150
+ "NH": 1148,
1151
+ "sc": 1149,
1152
+ "ccco": 1150,
1153
+ "CSCN": 1151,
1154
+ "CCSCCC": 1152,
1155
+ "oncc": 1153,
1156
+ "OCCC": 1154,
1157
+ "CCOCCOCCOCC": 1155,
1158
+ "SCCCC": 1156,
1159
+ "CCOCC": 1157,
1160
+ "cccnn": 1158,
1161
+ "CCOCCOCc": 1159,
1162
+ "cn": 1160,
1163
+ "NOCCc": 1161,
1164
+ "CSCCSC": 1162,
1165
+ "cscn": 1163,
1166
+ "COCCSCc": 1164,
1167
+ "CCSS": 1165,
1168
+ "CCCNC": 1166,
1169
+ "P": 1167,
1170
+ "CCCCCCNC": 1168,
1171
+ "+](": 1169,
1172
+ "ncsc": 1170,
1173
+ "CCNS": 1171,
1174
+ "ncccc": 1172,
1175
+ "noc": 1173,
1176
+ "E": 1174,
1177
+ "COP": 1175,
1178
+ "12": 1176,
1179
+ "cccc": 1177,
1180
+ "123": 1178,
1181
+ "CCOCCCN": 1179,
1182
+ "7": 1180,
1183
+ ")\\": 1181,
1184
+ "@@](": 1182,
1185
+ "i": 1183,
1186
+ "ClCc": 1184,
1187
+ "54": 1185,
1188
+ "NCCNS": 1186,
1189
+ "COCc": 1187,
1190
+ "COCCC": 1188,
1191
+ "(/[": 1189,
1192
+ "X": 1190,
1193
+ "NCCN": 1191,
1194
+ "OCCCCn": 1192,
1195
+ "CSCCS": 1193,
1196
+ "no": 1194,
1197
+ "NCCCC": 1195,
1198
+ "CCCCc": 1196,
1199
+ "\\": 1197,
1200
+ "NCCSCc": 1198,
1201
+ "-]": 1199,
1202
+ "Ic": 1200,
1203
+ "nonc": 1201,
1204
+ "s": 1202,
1205
+ "COCCNCc": 1203,
1206
+ "CCCCCN": 1204,
1207
+ "SCN": 1205,
1208
+ "ncccn": 1206,
1209
+ "NCCOCc": 1207,
1210
+ "cnoc": 1208,
1211
+ "cnc": 1209,
1212
+ "SN": 1210,
1213
+ "/": 1211,
1214
+ "ccccn": 1212,
1215
+ "COCCNC": 1213,
1216
+ "COCCCOC": 1214,
1217
+ "CNCCN": 1215,
1218
+ "nnc": 1216,
1219
+ "NCCOC": 1217,
1220
+ "ncon": 1218,
1221
+ "ns": 1219,
1222
+ "Cl": 1220,
1223
+ "CNCCC": 1221,
1224
+ "OCCO": 1222,
1225
+ "NCCn": 1223,
1226
+ "nnnc": 1224,
1227
+ "OCCOCCOCCO": 1225,
1228
+ "SCCCc": 1226,
1229
+ "CCCCOCC": 1227,
1230
+ "(=[": 1228,
1231
+ "ncnc": 1229,
1232
+ "Oc": 1230,
1233
+ "@]([": 1231,
1234
+ "NCCCCl": 1232,
1235
+ "NCCCNc": 1233,
1236
+ "NCCCCCC": 1234,
1237
+ "CCCO": 1235,
1238
+ "CCCCNc": 1236,
1239
+ "32": 1237
1240
+ }