GreatCaptainNemo commited on
Commit
0d68acf
·
verified ·
1 Parent(s): 3de86ca

Upload folder using huggingface_hub

Browse files
adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": [
22
+ "feature_fuser"
23
+ ],
24
+ "peft_type": "LORA",
25
+ "r": 16,
26
+ "rank_pattern": {},
27
+ "revision": null,
28
+ "target_modules": [
29
+ "o_proj",
30
+ "v_proj",
31
+ "gate_proj",
32
+ "down_proj",
33
+ "up_proj",
34
+ "k_proj",
35
+ "q_proj"
36
+ ],
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_dora": false,
40
+ "use_rslora": false
41
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5538f84dd99b4f985b8abf08d80d1536ed4a121c99824d8d07cf3ceb6e99963
3
+ size 612436144
special_tokens_map.json ADDED
@@ -0,0 +1,828 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<molecule_2d>",
4
+ "<molecule_3d>",
5
+ "<protein>",
6
+ "<|CNC[C@H]1Oc2ccccc2C(=O)N([C@H](C)CO)C[C@@H]1C|>",
7
+ "<|Nc1cc(=O)[nH]c(=O)n1Cc1ccccc1|>",
8
+ "<|O=C(NCc1ccccc1)c1ccccc1|>",
9
+ "<|O=C(Nc1ccccc1)c1ccccc1|>",
10
+ "<|O=C(c1ccccc1)N1CCNCC1|>",
11
+ "<|Cc1cc(=O)n(-c2ccccc2)n1C|>",
12
+ "<|O=C(c1ccccc1)N1CCCCC1|>",
13
+ "<|CC(=O)Nc1ccc(S(N)(=O)=O)cc1|>",
14
+ "<|CC(=O)Nc1ccc2c(c1)OCCO2|>",
15
+ "<|CCN(CC)S(=O)(=O)c1ccccc1|>",
16
+ "<|COc1ccc(N2CCNCC2)cc1|>",
17
+ "<|O=C(c1ccccc1)c1ccccc1|>",
18
+ "<|O=C(c1ccccc1)N1CCOCC1|>",
19
+ "<|O=C(Nc1ccccc1)c1ccco1|>",
20
+ "<|NC(=O)COC(=O)c1ccccc1|>",
21
+ "<|CCNS(=O)(=O)c1ccc(C)cc1|>",
22
+ "<|Cc1cc(=O)n(-c2ccccc2)n1|>",
23
+ "<|COc1cc(C)cc(OC)c1OC|>",
24
+ "<|c1ccc(Cc2ccccc2)cc1|>",
25
+ "<|CCNC(=O)c1cccc(OC)c1|>",
26
+ "<|COc1ccc(C(N)=O)cc1OC|>",
27
+ "<|C[C@H](CO)NC(=O)c1ccccc1|>",
28
+ "<|Cn1c(=O)c2ncnc2n(C)c1=O|>",
29
+ "<|CCOc1ccc(NC(C)=O)cc1|>",
30
+ "<|O=CCNS(=O)(=O)c1ccccc1|>",
31
+ "<|NC(=O)CNC(=O)c1ccccc1|>",
32
+ "<|NC(=O)c1ccnc2ccccc12|>",
33
+ "<|CCN1C(=O)c2ccccc2C1=O|>",
34
+ "<|NC(=O)C1COc2ccccc2O1|>",
35
+ "<|c1ccc(Oc2ccccc2)cc1|>",
36
+ "<|CCCCn1c(N)cc(=O)[nH]c1=O|>",
37
+ "<|c1ccc2nc3nccc3nc2c1|>",
38
+ "<|c1ccc(CN2CCNCC2)cc1|>",
39
+ "<|CCNC(=O)c1ccc(OC)cc1|>",
40
+ "<|CCNS(=O)(=O)c1ccccc1|>",
41
+ "<|O=CCNC(=O)c1ccccc1|>",
42
+ "<|Cc1ccn(-c2ccccc2)n1|>",
43
+ "<|COc1cccc(OC)c1OC|>",
44
+ "<|COc1ccc(NC(C)=O)cc1|>",
45
+ "<|CC(=O)NCCc1ccccc1|>",
46
+ "<|NC(=O)C1CCN([SH](=O)=O)CC1|>",
47
+ "<|ccc1cnc2ccccc2n1|>",
48
+ "<|NC(=O)c1cccc([N+](=O)[O-])c1|>",
49
+ "<|Cc1cc(=O)oc2ccccc12|>",
50
+ "<|CCN(C)C(=O)c1ccccc1|>",
51
+ "<|CCCNC(=O)c1ccccc1|>",
52
+ "<|COc1ccc2ccccc2c1|>",
53
+ "<|c1ccc(N2CCNCC2)cc1|>",
54
+ "<|COc1ccccc1NC(C)=O|>",
55
+ "<|CCNC(=O)c1ccc(C)cc1|>",
56
+ "<|CCOc1ccccc1OCC|>",
57
+ "<|c1ccc(-c2ccccc2)cc1|>",
58
+ "<|Cc1ccc2ccc(=O)oc2c1|>",
59
+ "<|O=c1nc(=O)c2ccccc2n1|>",
60
+ "<|CCNC(=O)c1ccc(Cl)cc1|>",
61
+ "<|CC1NC(=O)N(CC(N)=O)C1=O|>",
62
+ "<|Cn1c(=O)[nH]c(=O)c2ncnc21|>",
63
+ "<|O=CCOC(=O)c1ccccc1|>",
64
+ "<|Cc1nc(=O)c2ccccc2n1|>",
65
+ "<|CCNC(=O)c1ccccc1|>",
66
+ "<|O=c1ncnc2ccccc12|>",
67
+ "<|COc1ccc(C)cc1OC|>",
68
+ "<|O=c1ccc2ccccc2o1|>",
69
+ "<|COc1cccc(C(N)=O)c1|>",
70
+ "<|CC(=O)NCc1ccccc1|>",
71
+ "<|CCOC(=O)c1ccccc1|>",
72
+ "<|CCC(=O)Nc1ccccc1|>",
73
+ "<|O=C(CS)Nc1ccccc1|>",
74
+ "<|Cc1ccc(S(N)(=O)=O)cc1|>",
75
+ "<|c1ccc(-n2cccn2)cc1|>",
76
+ "<|COc1ccc(C(N)=O)cc1|>",
77
+ "<|NC(=O)COc1ccccc1|>",
78
+ "<|O=C(CO)Nc1ccccc1|>",
79
+ "<|O=c1ccoc2ccccc12|>",
80
+ "<|O=c1nccc2ccccc12|>",
81
+ "<|O=c1nncc2ccccc12|>",
82
+ "<|Cc1ccc2ccccc2n1|>",
83
+ "<|CCN(C)Cc1ccccc1|>",
84
+ "<|CC(=O)Nc1ccc(C)cc1|>",
85
+ "<|O=c1ccnc2ccccc12|>",
86
+ "<|O=c1cnc2cncnc2n1|>",
87
+ "<|Oc1ccc2ccccc2n1|>",
88
+ "<|COc1ccccc1C(N)=O|>",
89
+ "<|O=c1ccc2ccccc2n1|>",
90
+ "<|Cc1ccc2ncccc2c1|>",
91
+ "<|Oc1ccc2ccccc2c1|>",
92
+ "<|Oc1ncnc2ccccc12|>",
93
+ "<|Cc1cccc2ccccc12|>",
94
+ "<|Cn1cnnc1SCC(N)=O|>",
95
+ "<|Cc1sc2ncncc2c1C|>",
96
+ "<|COc1ccc(C)c(OC)c1|>",
97
+ "<|O=C1NC(=O)c2ccccc21|>",
98
+ "<|Cc1ccnc2ccccc12|>",
99
+ "<|O=C1COc2ccccc2N1|>",
100
+ "<|Cn1cnc(=O)c2ncnc21|>",
101
+ "<|O=CNCCc1ccccc1|>",
102
+ "<|COc1cccc(C)c1OC|>",
103
+ "<|CNS(=O)(=O)c1ccccc1|>",
104
+ "<|CC1NC(=O)N(CC=O)C1=O|>",
105
+ "<|Nc1ccc(S(N)(=O)=O)cc1|>",
106
+ "<|O=c1ccnc2ccccn12|>",
107
+ "<|O=CN1CCN([SH](=O)=O)CC1|>",
108
+ "<|CC(=O)Nc1ccccc1C|>",
109
+ "<|CCNC(=O)C1CCNCC1|>",
110
+ "<|OCC1OCC(O)C(O)C1O|>",
111
+ "<|FC(F)(F)c1cncc(Cl)c1|>",
112
+ "<|COc1ccc(OC)c(C)c1|>",
113
+ "<|CC(=O)Nc1ccc(F)cc1|>",
114
+ "<|Cc1nncc2nnc(C)c12|>",
115
+ "<|c1ccc(-c2cscn2)cc1|>",
116
+ "<|Cc1cnc2ccccc2c1|>",
117
+ "<|c1ccc(-c2nnco2)cc1|>",
118
+ "<|FC(F)(F)Oc1ccccc1|>",
119
+ "<|O=[SH](=O)NCc1ccccc1|>",
120
+ "<|O=c1cnc2ccccc2n1|>",
121
+ "<|NC(=O)CCc1ccccc1|>",
122
+ "<|CCOC(=O)C1CCNCC1|>",
123
+ "<|Cc1cc(C)n2ncnc2n1|>",
124
+ "<|CCNCCc1ccccc1|>",
125
+ "<|O=CCNC(=O)c1ccco1|>",
126
+ "<|COc1ccc(NC=O)cc1|>",
127
+ "<|COc1ccc2ncsc2c1|>",
128
+ "<|COc1ccc2[nH]ccc2c1|>",
129
+ "<|COc1ccc(C(=O)O)cc1|>",
130
+ "<|NCC(=O)Nc1ccccc1|>",
131
+ "<|Nc1ncnc2ccccc12|>",
132
+ "<|Oc1cccc2cccnc12|>",
133
+ "<|NS(=O)(=O)c1ccc(Cl)cc1|>",
134
+ "<|CC(=O)Nc1ccccc1|>",
135
+ "<|c1ccc2ncccc2c1|>",
136
+ "<|COc1ccccc1OC|>",
137
+ "<|FC(F)(F)c1ccccc1|>",
138
+ "<|c1ccc2ccccc2c1|>",
139
+ "<|c1ccc2ncncc2c1|>",
140
+ "<|NS(=O)(=O)c1ccccc1|>",
141
+ "<|Cc1ccc2c(c1)OCO2|>",
142
+ "<|COc1cccc(OC)c1|>",
143
+ "<|O=cccc1ccccc1|>",
144
+ "<|COC(=O)c1ccccc1|>",
145
+ "<|CNC(=O)c1ccccc1|>",
146
+ "<|c1ccc2cnncc2c1|>",
147
+ "<|c1ccc2nccnc2c1|>",
148
+ "<|NC(=O)CSc1ncnn1|>",
149
+ "<|CCNCc1ccccc1|>",
150
+ "<|COc1ccc(OC)cc1|>",
151
+ "<|Cc1ccc([SH](=O)=O)cc1|>",
152
+ "<|c1ccc2c(c1)OCCO2|>",
153
+ "<|Cc1cc2ccccc2[nH]1|>",
154
+ "<|c1ccc2occcc2c1|>",
155
+ "<|O=CNCc1ccccc1|>",
156
+ "<|CCNC(=O)c1ccco1|>",
157
+ "<|O=c1ncnc2sccc12|>",
158
+ "<|FC(F)Oc1ccccc1|>",
159
+ "<|COc1cc(C)ccc1O|>",
160
+ "<|NC(=O)c1ccccc1Cl|>",
161
+ "<|Cc1ccc(C(N)=O)cc1|>",
162
+ "<|C1C2CC3CC1CC(C2)C3|>",
163
+ "<|O=COCc1ccccc1|>",
164
+ "<|Cn1ccc2ccccc21|>",
165
+ "<|Cc1cc2ccccc2n1|>",
166
+ "<|NC(=O)Cc1ccccc1|>",
167
+ "<|NC(=O)c1ccccc1F|>",
168
+ "<|NC(=O)Nc1ccccc1|>",
169
+ "<|COc1cccc(C=O)c1|>",
170
+ "<|O=CCOc1ccccc1|>",
171
+ "<|O=c1ccnc2scnn12|>",
172
+ "<|Cc1ccc([N+](=O)[O-])cc1|>",
173
+ "<|NNC(=O)c1ccccc1|>",
174
+ "<|CCC(=O)N1CCNCC1|>",
175
+ "<|Cn1cnc2ccccc21|>",
176
+ "<|Cc1cccc([N+](=O)[O-])c1|>",
177
+ "<|Cc1ccn2ncnc2n1|>",
178
+ "<|CCCCc1ccccc1|>",
179
+ "<|Cc1coc2ccccc12|>",
180
+ "<|Cn1ccc(=O)n(C)c1=O|>",
181
+ "<|CN1CCN([SH](=O)=O)CC1|>",
182
+ "<|CCOc1ccc(C)cc1|>",
183
+ "<|NC(=O)CSc1nnco1|>",
184
+ "<|O=[SH](=O)Nc1ccccc1|>",
185
+ "<|O=[SH](=O)N1CCCCCC1|>",
186
+ "<|NC(=O)c1ccc(Cl)cc1|>",
187
+ "<|Cc1cc2ccccc2o1|>",
188
+ "<|Cc1ccc(N(C)C)cc1|>",
189
+ "<|c1ccc2cnccc2c1|>",
190
+ "<|Cc1cc2cncnc2s1|>",
191
+ "<|CC(=O)NC1CCCCC1|>",
192
+ "<|CC1CCN([SH](=O)=O)CC1|>",
193
+ "<|O=c1ccnc2sccn12|>",
194
+ "<|O=[SH](=O)c1ccc(Cl)cc1|>",
195
+ "<|Cc1ccccc1[N+](=O)[O-]|>",
196
+ "<|NC(=O)c1ccc(F)cc1|>",
197
+ "<|COc1ccc(C=O)cc1|>",
198
+ "<|Cc1ccc2ncsc2c1|>",
199
+ "<|Nc1ncnc2ncnc12|>",
200
+ "<|NC(=O)CSc1nnnn1|>",
201
+ "<|CCNC(=O)c1cccs1|>",
202
+ "<|CC(=O)N1CCCCCC1|>",
203
+ "<|Nc1ccc([N+](=O)[O-])cc1|>",
204
+ "<|O=C1Cc2ccccc2N1|>",
205
+ "<|CCOc1ccc(N)cc1|>",
206
+ "<|FC(F)(F)c1ccncn1|>",
207
+ "<|CC(=O)NCc1ccco1|>",
208
+ "<|Cc1c[nH]c2ccccc12|>",
209
+ "<|NC(=O)CN1CCNCC1|>",
210
+ "<|CCNC(=O)c1ccon1|>",
211
+ "<|Cc1ccc(OCF)cc1|>",
212
+ "<|CCOC(=O)c1ccsc1|>",
213
+ "<|O=CCCc1ccccc1|>",
214
+ "<|CS(=O)(=O)c1ccccc1|>",
215
+ "<|CCCOc1ccccc1|>",
216
+ "<|NC(=O)c1ccccc1O|>",
217
+ "<|NC(=O)c1ccccc1|>",
218
+ "<|FC(F)c1ccccc1|>",
219
+ "<|COc1cccc(C)c1|>",
220
+ "<|c1ccc2[nH]ccc2c1|>",
221
+ "<|COc1ccc(C)cc1|>",
222
+ "<|CCOc1ccccc1|>",
223
+ "<|O=C(O)c1ccccc1|>",
224
+ "<|O=[SH](=O)c1ccccc1|>",
225
+ "<|c1ccc2nccc2c1|>",
226
+ "<|O=CNc1ccccc1|>",
227
+ "<|c1ccc2ncnc2c1|>",
228
+ "<|O=[SH](=O)N1CCNCC1|>",
229
+ "<|c1ccc2scnc2c1|>",
230
+ "<|c1ccc2occc2c1|>",
231
+ "<|O=[N+]([O-])c1ccccc1|>",
232
+ "<|c1ccc2[nH]cnc2c1|>",
233
+ "<|O=[SH](=O)N1CCOCC1|>",
234
+ "<|cccc1ccccc1|>",
235
+ "<|NC(=O)C1CCNCC1|>",
236
+ "<|c1ccn2ccnc2c1|>",
237
+ "<|c1ccc2c(c1)OCO2|>",
238
+ "<|CC(=O)N1CCNCC1|>",
239
+ "<|c1ncc2ccsc2n1|>",
240
+ "<|COc1ccccc1C|>",
241
+ "<|CC(=O)N1CCOCC1|>",
242
+ "<|O=[SH](=O)N1CCCCC1|>",
243
+ "<|FCOc1ccccc1|>",
244
+ "<|Cccc1ccccc1|>",
245
+ "<|CC(=O)c1ccccc1|>",
246
+ "<|c1ccc2sccc2c1|>",
247
+ "<|c1ccc2ocnc2c1|>",
248
+ "<|Nc1cc(=O)[nH]c(=O)n1|>",
249
+ "<|c1cnc2ncnn2c1|>",
250
+ "<|COc1ccc(Cl)cc1|>",
251
+ "<|NC(=O)c1cccnc1|>",
252
+ "<|c1cnc2ccnn2c1|>",
253
+ "<|c1ncc2ncnc2n1|>",
254
+ "<|Cc1cc(C)cc(C)c1|>",
255
+ "<|c1cnn2cnnc2c1|>",
256
+ "<|Cn1c(=O)ccnc1=O|>",
257
+ "<|O=CCN1CCNCC1|>",
258
+ "<|c1ncc2cnnc2n1|>",
259
+ "<|N=Cc1ccccc1O|>",
260
+ "<|NCCc1ccccc1|>",
261
+ "<|COc1ccccc1O|>",
262
+ "<|COc1ccccc1Cl|>",
263
+ "<|CCCc1ccccc1|>",
264
+ "<|Cc1ccc(Cl)cc1Cl|>",
265
+ "<|CC(=O)N1CCCCC1|>",
266
+ "<|c1cc2c(s1)CCCC2|>",
267
+ "<|CCN1CCN(C)CC1|>",
268
+ "<|Cc1ccc(C)c(C)c1|>",
269
+ "<|CN(C)c1ccccc1|>",
270
+ "<|c1cnc2ncnc2c1|>",
271
+ "<|CNCc1ccccc1|>",
272
+ "<|O=COc1ccccc1|>",
273
+ "<|CCNc1ccccc1|>",
274
+ "<|c1ccc2c(c1)CCC2|>",
275
+ "<|CC(=O)NC1CCCC1|>",
276
+ "<|O=CNC1CCCCC1|>",
277
+ "<|NC(=O)c1ccncc1|>",
278
+ "<|O=Cc1ccccc1O|>",
279
+ "<|CC(=O)Nc1nccs1|>",
280
+ "<|CC(=O)Nc1nncs1|>",
281
+ "<|NC(=O)C1CCCNC1|>",
282
+ "<|c1cnc2sccc2c1|>",
283
+ "<|O=Cc1ccc(Cl)cc1|>",
284
+ "<|c1ncc2nnnc2n1|>",
285
+ "<|Cc1c(F)cccc1Cl|>",
286
+ "<|CC(=N)c1ccccc1|>",
287
+ "<|c1ncc2sccc2n1|>",
288
+ "<|c1ccc2nnnc2c1|>",
289
+ "<|Cc1ccc(Cl)c(Cl)c1|>",
290
+ "<|CC(C)c1ccccc1|>",
291
+ "<|COc1ccc(N)cc1|>",
292
+ "<|c1cnc2nccn2c1|>",
293
+ "<|O=C1CC(=O)NC(=O)N1|>",
294
+ "<|O=CNCc1ccco1|>",
295
+ "<|CCN1C(=O)CSC1=S|>",
296
+ "<|CCNC(=O)COC=O|>",
297
+ "<|c1ccn2cnnc2c1|>",
298
+ "<|N=Cc1ccc(O)cc1|>",
299
+ "<|COc1ccccc1F|>",
300
+ "<|CCCN1CCOCC1|>",
301
+ "<|CCOC(=O)N(C)CC|>",
302
+ "<|O=CC1=C(O)C(=O)NC1|>",
303
+ "<|CCC(=O)OCC(N)=O|>",
304
+ "<|COc1ccccc1|>",
305
+ "<|O=Cc1ccccc1|>",
306
+ "<|FCc1ccccc1|>",
307
+ "<|ccc1ccccc1|>",
308
+ "<|Cc1cccc(C)c1|>",
309
+ "<|Cc1ccccc1Cl|>",
310
+ "<|CCc1ccccc1|>",
311
+ "<|Cc1ccccc1C|>",
312
+ "<|N=Cc1ccccc1|>",
313
+ "<|Cc1ccc(C)cc1|>",
314
+ "<|CCN1CCOCC1|>",
315
+ "<|ccc1cncnc1|>",
316
+ "<|Cc1ccc(Cl)cc1|>",
317
+ "<|CCN1CCNCC1|>",
318
+ "<|Cc1ccc(F)cc1|>",
319
+ "<|O=CN1CCNCC1|>",
320
+ "<|NCc1ccccc1|>",
321
+ "<|OCc1ccccc1|>",
322
+ "<|CCCCCCCC|>",
323
+ "<|Cc1ccccc1F|>",
324
+ "<|Clc1cccc(Cl)c1|>",
325
+ "<|NC(=O)c1ccco1|>",
326
+ "<|O=c1ccnc(=O)[nH]1|>",
327
+ "<|Cc1ccc(O)cc1|>",
328
+ "<|Fc1cccc(F)c1|>",
329
+ "<|Cc1cccc(Cl)c1|>",
330
+ "<|CCCC(=O)NCC|>",
331
+ "<|N#Cc1ccccc1|>",
332
+ "<|N#Cc1cccnc1|>",
333
+ "<|CCCC(=O)OCC|>",
334
+ "<|Clc1ccccc1Cl|>",
335
+ "<|NC(=O)c1cccs1|>",
336
+ "<|CC(=O)OCC(N)=O|>",
337
+ "<|O=[SH](=O)N1CCCC1|>",
338
+ "<|CCN(CC)[SH](=O)=O|>",
339
+ "<|Cc1ccccc1O|>",
340
+ "<|CCN1CSCC1=O|>",
341
+ "<|O=c1ccnc(=O)n1|>",
342
+ "<|CC1CCCC(C)C1|>",
343
+ "<|SCc1ccccc1|>",
344
+ "<|nc1cncnc1n|>",
345
+ "<|Cc1cc(C)ncn1|>",
346
+ "<|CCN1CCCCC1|>",
347
+ "<|Cnc(=O)n(C)c=O|>",
348
+ "<|Cc1cccc(F)c1|>",
349
+ "<|CC(=O)NCC(N)=O|>",
350
+ "<|CNc1ccccc1|>",
351
+ "<|Cc1ccnc(C)c1|>",
352
+ "<|O=C1CCNC(=O)N1|>",
353
+ "<|NC(=O)CN[SH](=O)=O|>",
354
+ "<|CCC1=CCCCC1|>",
355
+ "<|Cc1cc(O)ncn1|>",
356
+ "<|O=cc1ccccc1|>",
357
+ "<|Cc1cccc(Br)c1|>",
358
+ "<|Cc1cccc(O)c1|>",
359
+ "<|O=CN1CCCCC1|>",
360
+ "<|NC(=O)c1ccon1|>",
361
+ "<|cc1ccccc1n|>",
362
+ "<|CC1NC(=O)NC1=O|>",
363
+ "<|Cc1ccccc1N|>",
364
+ "<|CN1CCN(C)CC1|>",
365
+ "<|Fc1ccccc1Cl|>",
366
+ "<|Clc1ccc(Cl)cc1|>",
367
+ "<|NC(=O)c1ccsc1|>",
368
+ "<|NC(=O)c1cnnn1|>",
369
+ "<|Fc1ccccc1F|>",
370
+ "<|Cc1ccc(Br)cc1|>",
371
+ "<|CCCCNC(C)=O|>",
372
+ "<|CSc1ccccc1|>",
373
+ "<|CNC[C@@H](O)C(C)C|>",
374
+ "<|O=Pc1ccccc1|>",
375
+ "<|Cc1ccc(N)cc1|>",
376
+ "<|c1nc2sccn2n1|>",
377
+ "<|Fc1cccc(Cl)c1|>",
378
+ "<|CCC(=O)N(C)CC|>",
379
+ "<|Cc1cc(=O)ncn1|>",
380
+ "<|Oc1ccccc1O|>",
381
+ "<|CCNC(=O)C(N)=O|>",
382
+ "<|CC1CNCC(C)C1|>",
383
+ "<|Oc1ccc(Cl)cc1|>",
384
+ "<|CCN(CC)C(C)=O|>",
385
+ "<|O=CC1CCCCC1|>",
386
+ "<|Oc1cccc(O)c1|>",
387
+ "<|NC(=O)C1CC=NO1|>",
388
+ "<|CCn1cnnc1S|>",
389
+ "<|CCNC(=O)NCC|>",
390
+ "<|NCc1cccnc1|>",
391
+ "<|Cc1ccccc1|>",
392
+ "<|Clc1ccccc1|>",
393
+ "<|Fc1ccccc1|>",
394
+ "<|Oc1ccccc1|>",
395
+ "<|Nc1ccccc1|>",
396
+ "<|CN1CCNCC1|>",
397
+ "<|CCNC(=O)CC|>",
398
+ "<|Cc1ccncn1|>",
399
+ "<|CC1CCCCC1|>",
400
+ "<|O=c1ccnc[nH]1|>",
401
+ "<|Cc1ccccn1|>",
402
+ "<|Brc1ccccc1|>",
403
+ "<|O=c1ccncn1|>",
404
+ "<|O=S1(=O)CCCC1|>",
405
+ "<|CCNC(=O)CS|>",
406
+ "<|Nc1ccncn1|>",
407
+ "<|Cc1cc(C)nn1|>",
408
+ "<|O=CCN[SH](=O)=O|>",
409
+ "<|CC(C)(C)OC=O|>",
410
+ "<|CCNC(=O)CO|>",
411
+ "<|CCOC(=O)CC|>",
412
+ "<|NC(=O)CNC=O|>",
413
+ "<|CCCNC(C)=O|>",
414
+ "<|Oc1ccncn1|>",
415
+ "<|Clc1cccnc1|>",
416
+ "<|Cc1ccc(C)n1|>",
417
+ "<|nc1ccccc1|>",
418
+ "<|Cc1cccnc1|>",
419
+ "<|nc1ncccn1|>",
420
+ "<|CNCCC(C)C|>",
421
+ "<|CN1CCOCC1|>",
422
+ "<|Cc1cc(C)on1|>",
423
+ "<|NC(=O)COC=O|>",
424
+ "<|CC(=O)NCC=O|>",
425
+ "<|Sc1ccccc1|>",
426
+ "<|CCNCC(N)=O|>",
427
+ "<|CCCC(=O)OC|>",
428
+ "<|CC1CCCNC1|>",
429
+ "<|Cnc(=O)nc=O|>",
430
+ "<|CN1CCCCC1|>",
431
+ "<|Nc1ncncn1|>",
432
+ "<|cc1ncnc1n|>",
433
+ "<|CC(=O)NC(C)C|>",
434
+ "<|CC1CCNCC1|>",
435
+ "<|O=c1ccccn1|>",
436
+ "<|Cc1ccsc1C|>",
437
+ "<|oc1ccccc1|>",
438
+ "<|O=cc1cncn1|>",
439
+ "<|CCNC(=O)CN|>",
440
+ "<|CCN(C)[SH](=O)=O|>",
441
+ "<|Cc1cccnn1|>",
442
+ "<|O=c1cccnn1|>",
443
+ "<|NC1CCCCC1|>",
444
+ "<|Cn1cnnc1S|>",
445
+ "<|Clc1ccccn1|>",
446
+ "<|CCCCC(N)=O|>",
447
+ "<|Oc1ccccn1|>",
448
+ "<|NCc1ccco1|>",
449
+ "<|OCCOCCO|>",
450
+ "<|CC(C)CC(N)=O|>",
451
+ "<|CCCCCCC|>",
452
+ "<|CCCCNC=O|>",
453
+ "<|Cc1ccncc1|>",
454
+ "<|CCCN[SH](=O)=O|>",
455
+ "<|O=Cc1cccs1|>",
456
+ "<|Cc1c[nH]c(C)c1|>",
457
+ "<|O=Cc1ccco1|>",
458
+ "<|O=C1CNC(=O)N1|>",
459
+ "<|O=C1CCC(=O)N1|>",
460
+ "<|O=C1CNCCN1|>",
461
+ "<|O=C1CSC(=S)N1|>",
462
+ "<|Nc1ncccn1|>",
463
+ "<|NC1=NC(=O)CS1|>",
464
+ "<|CCN(C=O)CC|>",
465
+ "<|Cc1ccc(C)o1|>",
466
+ "<|N#Cc1cocn1|>",
467
+ "<|CSc1ncnn1|>",
468
+ "<|CCN1CCCC1|>",
469
+ "<|C1CCCNCC1|>",
470
+ "<|CCC1OCCO1|>",
471
+ "<|CC(=O)OCC=O|>",
472
+ "<|C1CCCCCC1|>",
473
+ "<|CC(=O)CC(C)C|>",
474
+ "<|CCCCNCC|>",
475
+ "<|CC(=O)OCCO|>",
476
+ "<|CCNS(C)(=O)=O|>",
477
+ "<|Nn1cnnc1S|>",
478
+ "<|CC(=O)NCCO|>",
479
+ "<|Cc1cccn1C|>",
480
+ "<|CCOC(=O)CN|>",
481
+ "<|CCCCC(=O)O|>",
482
+ "<|Brc1cccnc1|>",
483
+ "<|nc1ccncn1|>",
484
+ "<|O=c1ncccn1|>",
485
+ "<|CCN(CC)CC|>",
486
+ "<|OCc1cnnn1|>",
487
+ "<|CCCCCCO|>",
488
+ "<|CCc1nncs1|>",
489
+ "<|sc1ccccc1|>",
490
+ "<|O=C1CSC(=O)N1|>",
491
+ "<|O=C1NCC=C1O|>",
492
+ "<|Cc1ccn(C)n1|>",
493
+ "<|c1ccccc1|>",
494
+ "<|c1ccncc1|>",
495
+ "<|c1cncnc1|>",
496
+ "<|CCN[SH](=O)=O|>",
497
+ "<|CCNC(C)=O|>",
498
+ "<|C1CNCCN1|>",
499
+ "<|Cc1ccco1|>",
500
+ "<|Cc1ccnn1|>",
501
+ "<|Cc1cccs1|>",
502
+ "<|C1COCCN1|>",
503
+ "<|CCOC(C)=O|>",
504
+ "<|CCCC(N)=O|>",
505
+ "<|C1CCNCC1|>",
506
+ "<|Sc1ncnn1|>",
507
+ "<|Cc1ccno1|>",
508
+ "<|O=CCNC=O|>",
509
+ "<|C1CCCCC1|>",
510
+ "<|Cc1cccn1|>",
511
+ "<|CCCC(C)C|>",
512
+ "<|c1ccnnc1|>",
513
+ "<|CCNC(N)=O|>",
514
+ "<|cccncn|>",
515
+ "<|c1cnccn1|>",
516
+ "<|cccccn|>",
517
+ "<|CCCNCC|>",
518
+ "<|Cc1cscn1|>",
519
+ "<|CCNCC=O|>",
520
+ "<|Cc1ccc[nH]1|>",
521
+ "<|CCCNC=O|>",
522
+ "<|CC1CCCO1|>",
523
+ "<|c1ncncn1|>",
524
+ "<|CNCC(N)=O|>",
525
+ "<|c1cnncn1|>",
526
+ "<|CCCCCC|>",
527
+ "<|c1cc[n+]cc1|>",
528
+ "<|nc(=O)nc=O|>",
529
+ "<|Cn1cccn1|>",
530
+ "<|c1ccocc1|>",
531
+ "<|CCC(=O)NC|>",
532
+ "<|O=C1CSCN1|>",
533
+ "<|CCOCCO|>",
534
+ "<|CCC(=O)OC|>",
535
+ "<|CC(C)NC=O|>",
536
+ "<|O=C1CCCN1|>",
537
+ "<|Cc1ccsc1|>",
538
+ "<|Cn1ccnc1|>",
539
+ "<|Nc1nccs1|>",
540
+ "<|Cc1cnco1|>",
541
+ "<|CCN(C)CC|>",
542
+ "<|CN(C)[SH](=O)=O|>",
543
+ "<|cccc(n)=O|>",
544
+ "<|CCCC(=O)O|>",
545
+ "<|COC(=O)CN|>",
546
+ "<|cc1cccs1|>",
547
+ "<|CC(C)C(N)=O|>",
548
+ "<|CCN(C)C=O|>",
549
+ "<|CCN=C(N)S|>",
550
+ "<|N#CCC(N)=O|>",
551
+ "<|Cn1cccc1|>",
552
+ "<|O=CCCC=O|>",
553
+ "<|Cc1cc[nH]n1|>",
554
+ "<|NC(=O)C(N)=O|>",
555
+ "<|CCNC(=N)S|>",
556
+ "<|Nc1cnon1|>",
557
+ "<|Cc1nncs1|>",
558
+ "<|O=CCOC=O|>",
559
+ "<|CCCCCO|>",
560
+ "<|ccccco|>",
561
+ "<|Cc1ccon1|>",
562
+ "<|cccc(C)n|>",
563
+ "<|Nc1nncs1|>",
564
+ "<|CCNC(N)=S|>",
565
+ "<|Brc1cccs1|>",
566
+ "<|Nc1ncnn1|>",
567
+ "<|Clc1cccs1|>",
568
+ "<|C1=CCCCC1|>",
569
+ "<|COC(C)(C)C|>",
570
+ "<|CC(C)CCO|>",
571
+ "<|NC(=O)C1CC1|>",
572
+ "<|cccc(c)C|>",
573
+ "<|CCCN(C)C|>",
574
+ "<|CCC(C)CC|>",
575
+ "<|CN1CCCC1|>",
576
+ "<|CNC(=O)CS|>",
577
+ "<|NCCCCF|>",
578
+ "<|CC(C)CCN|>",
579
+ "<|Cc1nccs1|>",
580
+ "<|CCNCCO|>",
581
+ "<|CC(C)OC=O|>",
582
+ "<|OCC(O)CO|>",
583
+ "<|Sc1nnco1|>",
584
+ "<|ccccC=O|>",
585
+ "<|CCNCC|>",
586
+ "<|ccccC|>",
587
+ "<|CCNC=O|>",
588
+ "<|ccccn|>",
589
+ "<|c1cnnc1|>",
590
+ "<|c1ccsc1|>",
591
+ "<|c1ncnn1|>",
592
+ "<|c1cscn1|>",
593
+ "<|NC(=O)CS|>",
594
+ "<|c1cnoc1|>",
595
+ "<|c1ccoc1|>",
596
+ "<|CCCC=O|>",
597
+ "<|CCC(N)=O|>",
598
+ "<|CCCCC|>",
599
+ "<|c1nncs1|>",
600
+ "<|c1nnnn1|>",
601
+ "<|NC(=O)CO|>",
602
+ "<|CCOC=O|>",
603
+ "<|ccccc|>",
604
+ "<|c1cncn1|>",
605
+ "<|c1nnco1|>",
606
+ "<|c1cn[nH]c1|>",
607
+ "<|ncnc=O|>",
608
+ "<|c1cnnn1|>",
609
+ "<|C1CCCC1|>",
610
+ "<|c1cocn1|>",
611
+ "<|CNC(C)=O|>",
612
+ "<|CCN(C)C|>",
613
+ "<|c1ncon1|>",
614
+ "<|c1ccnc1|>",
615
+ "<|CCCCO|>",
616
+ "<|COC(C)=O|>",
617
+ "<|CN[SH](=O)=O|>",
618
+ "<|NCC(N)=O|>",
619
+ "<|CCCCN|>",
620
+ "<|CCNCS|>",
621
+ "<|c1c[nH]cn1|>",
622
+ "<|CCC(C)C|>",
623
+ "<|CNCC=O|>",
624
+ "<|CCC(=O)O|>",
625
+ "<|CCC(C)=O|>",
626
+ "<|CCCNC|>",
627
+ "<|C1CCNC1|>",
628
+ "<|c1cnon1|>",
629
+ "<|cccc=O|>",
630
+ "<|nccc=O|>",
631
+ "<|CC(N)C=O|>",
632
+ "<|ncncn|>",
633
+ "<|c1cc[nH]c1|>",
634
+ "<|c1nc[nH]n1|>",
635
+ "<|c1csnn1|>",
636
+ "<|CC(=O)NN|>",
637
+ "<|CS(N)(=O)=O|>",
638
+ "<|CC(O)CO|>",
639
+ "<|c1cnsc1|>",
640
+ "<|NC(=O)C=O|>",
641
+ "<|ccncn|>",
642
+ "<|COCCO|>",
643
+ "<|ncc(n)=O|>",
644
+ "<|CC(C)(C)N|>",
645
+ "<|NCC(=O)O|>",
646
+ "<|CCOP=O|>",
647
+ "<|CCCOC|>",
648
+ "<|CC(O)CF|>",
649
+ "<|ncncs|>",
650
+ "<|ccc(=O)o|>",
651
+ "<|COCCN|>",
652
+ "<|cccc[nH]|>",
653
+ "<|O=C(O)CS|>",
654
+ "<|CC[SH](=O)=O|>",
655
+ "<|O=C(O)CO|>",
656
+ "<|O=ccco|>",
657
+ "<|OCC(F)F|>",
658
+ "<|CC(C)CN|>",
659
+ "<|CCOCC|>",
660
+ "<|OCCCO|>",
661
+ "<|CCNC=S|>",
662
+ "<|ccc(C)n|>",
663
+ "<|[nH]cnc=O|>",
664
+ "<|Cnc([nH])=O|>",
665
+ "<|CC(C)C=O|>",
666
+ "<|c1ncsn1|>",
667
+ "<|NC(=O)CCl|>",
668
+ "<|NCC(F)F|>",
669
+ "<|CN(C)C=O|>",
670
+ "<|CC(C)(C)O|>",
671
+ "<|CNCCO|>",
672
+ "<|c1nn[nH]n1|>",
673
+ "<|cccc|>",
674
+ "<|CC(N)=O|>",
675
+ "<|CCCC|>",
676
+ "<|CCNC|>",
677
+ "<|cncn|>",
678
+ "<|CCC=O|>",
679
+ "<|cccn|>",
680
+ "<|N[SH](=O)=O|>",
681
+ "<|CC(C)C|>",
682
+ "<|CC(=O)O|>",
683
+ "<|ncnn|>",
684
+ "<|NCC=O|>",
685
+ "<|CCCO|>",
686
+ "<|nccn|>",
687
+ "<|OCCO|>",
688
+ "<|O=CCO|>",
689
+ "<|ccc=O|>",
690
+ "<|CC(C)O|>",
691
+ "<|CCCN|>",
692
+ "<|COC=O|>",
693
+ "<|FC(F)F|>",
694
+ "<|CC(C)N|>",
695
+ "<|CNC=O|>",
696
+ "<|NC(N)=O|>",
697
+ "<|C[SH](=O)=O|>",
698
+ "<|O=CCS|>",
699
+ "<|CCOC|>",
700
+ "<|CCC#N|>",
701
+ "<|NNC=O|>",
702
+ "<|NCCO|>",
703
+ "<|N=C(N)S|>",
704
+ "<|Cncn|>",
705
+ "<|CC(C)=O|>",
706
+ "<|CN(C)C|>",
707
+ "<|NC(N)=S|>",
708
+ "<|OCCF|>",
709
+ "<|[nH]c(n)=O|>",
710
+ "<|cc(C)n|>",
711
+ "<|CCCS|>",
712
+ "<|O=[PH](O)O|>",
713
+ "<|cc(n)=O|>",
714
+ "<|CCSC|>",
715
+ "<|NCCF|>",
716
+ "<|nc(=O)o|>",
717
+ "<|Ccnn|>",
718
+ "<|O=[SH](=O)O|>",
719
+ "<|N=C(N)N|>",
720
+ "<|C[Si]C|>",
721
+ "<|ncc=O|>",
722
+ "<|C=CCO|>",
723
+ "<|ccnn|>",
724
+ "<|CCN|>",
725
+ "<|CCC|>",
726
+ "<|CCO|>",
727
+ "<|ccn|>",
728
+ "<|O=S=O|>",
729
+ "<|NC=O|>",
730
+ "<|ncn|>",
731
+ "<|CC=O|>",
732
+ "<|O=CO|>",
733
+ "<|ncs|>",
734
+ "<|O=[N+][O-]|>",
735
+ "<|ccc|>",
736
+ "<|CNC|>",
737
+ "<|nco|>",
738
+ "<|FCF|>",
739
+ "<|nc[nH]|>",
740
+ "<|NC=S|>",
741
+ "<|CC=N|>",
742
+ "<|[nH]cn|>",
743
+ "<|NCS|>",
744
+ "<|C=CC|>",
745
+ "<|CCS|>",
746
+ "<|ccs|>",
747
+ "<|O=PO|>",
748
+ "<|nc=O|>",
749
+ "<|N=CN|>",
750
+ "<|CC#N|>",
751
+ "<|cc=O|>",
752
+ "<|C1CC1|>",
753
+ "<|Ccn|>",
754
+ "<|C[N+]C|>",
755
+ "<|O=[S+][O-]|>",
756
+ "<|cc[n+]|>",
757
+ "<|C[Si]|>",
758
+ "<|[nH]c=O|>",
759
+ "<|OCO|>",
760
+ "<|NCO|>",
761
+ "<|nsn|>",
762
+ "<|cnn|>",
763
+ "<|CCCl|>",
764
+ "<|COC|>",
765
+ "<|cco|>",
766
+ "<|nnn|>",
767
+ "<|cc[nH]|>",
768
+ "<|O=co|>",
769
+ "<|non|>",
770
+ "<|cc|>",
771
+ "<|CC|>",
772
+ "<|cn|>",
773
+ "<|CO|>",
774
+ "<|O=S|>",
775
+ "<|CN|>",
776
+ "<|[N+][O-]|>",
777
+ "<|CF|>",
778
+ "<|CS|>",
779
+ "<|C=O|>",
780
+ "<|c=O|>",
781
+ "<|C#N|>",
782
+ "<|nn|>",
783
+ "<|C=N|>",
784
+ "<|O=P|>",
785
+ "<|NN|>",
786
+ "<|ns|>",
787
+ "<|[O-][S+]|>",
788
+ "<|C[N+]|>",
789
+ "<|NO|>",
790
+ "<|c[nH]|>",
791
+ "<|no|>",
792
+ "<|N=N|>",
793
+ "<|cs|>",
794
+ "<|[n+][O-]|>",
795
+ "<|CCl|>",
796
+ "<|[B]|>",
797
+ "<|[Br]|>",
798
+ "<|[C]|>",
799
+ "<|[Cl]|>",
800
+ "<|[F]|>",
801
+ "<|[I]|>",
802
+ "<|[N]|>",
803
+ "<|[O]|>",
804
+ "<|[P]|>",
805
+ "<|[S]|>"
806
+ ],
807
+ "bos_token": {
808
+ "content": "<|begin_of_text|>",
809
+ "lstrip": false,
810
+ "normalized": false,
811
+ "rstrip": false,
812
+ "single_word": false
813
+ },
814
+ "eos_token": {
815
+ "content": "<|eot_id|>",
816
+ "lstrip": false,
817
+ "normalized": false,
818
+ "rstrip": false,
819
+ "single_word": false
820
+ },
821
+ "pad_token": {
822
+ "content": "<pad>",
823
+ "lstrip": false,
824
+ "normalized": false,
825
+ "rstrip": false,
826
+ "single_word": false
827
+ }
828
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff