willdabeatz commited on
Commit
0d3fb18
·
verified ·
1 Parent(s): 5d5c9cd

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +17 -1
  2. casf2016_target_clusters.json +950 -0
  3. fetch_clusters.py +64 -0
  4. ranking_power.py +140 -0
README.md CHANGED
@@ -19,7 +19,23 @@ All metrics computed using `tdc.Evaluator` from PyTDC v1.1.15.
19
 
20
  95% confidence intervals from 1,000 bootstrap resamples.
21
 
22
- ### Comparison with Published Methods
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  | Method | PCC | MAE (pKd) | Type | Year |
25
  |--------|-----|-----------|------|------|
 
19
 
20
  95% confidence intervals from 1,000 bootstrap resamples.
21
 
22
+ ### CASF-2016 Ranking Power (53 target clusters)
23
+
24
+ Ranking power measures whether the model correctly ranks ligands by affinity within each target protein cluster.
25
+
26
+ | Model | Avg Spearman ρ | Avg Kendall τ | Concordance | Top-1 Success |
27
+ |-------|---------------|---------------|-------------|---------------|
28
+ | X-Score | 0.247 | — | — | — |
29
+ | AutoDock Vina | 0.281 | — | — | — |
30
+ | RF-Score v3 | 0.464 | — | — | — |
31
+ | ΔVinaRF20 | 0.476 | — | — | — |
32
+ | OnionNet-2 | 0.488 | — | — | — |
33
+ | **MillerBind v9** | **0.740** | **0.662** | **82.7%** | **60.4%** |
34
+ | **MillerBind v12** | **0.979** | **0.962** | **97.9%** | **92.5%** |
35
+
36
+ v12 achieves near-perfect ranking across 53 protein targets — correctly identifying the strongest binder in 49/53 targets.
37
+
38
+ ### Comparison with Published Methods (Scoring Power)
39
 
40
  | Method | PCC | MAE (pKd) | Type | Year |
41
  |--------|-----|-----------|------|------|
casf2016_target_clusters.json ADDED
@@ -0,0 +1,950 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pdb_to_target": {
3
+ "3ao4": "Q72498",
4
+ "3gv9": "P00811",
5
+ "1uto": "P00760",
6
+ "1ps3": "Q24451",
7
+ "4ddk": "P9WIL5",
8
+ "4jsz": "P00918",
9
+ "3g2z": "Q9L5C8",
10
+ "3dxg": "P61823",
11
+ "3l7b": "P00489",
12
+ "3gr2": "P00811",
13
+ "3kgp": "P00749",
14
+ "3fcq": "P00800",
15
+ "3lka": "P39900",
16
+ "3zt2": "P12497",
17
+ "3udh": "P56817",
18
+ "3g31": "Q9L5C8",
19
+ "4llx": "Q9Y233",
20
+ "4u4s": "P19491",
21
+ "4owm": "P9WFX5",
22
+ "5aba": "P04637",
23
+ "2xdl": "P07900",
24
+ "4kz6": "P00811",
25
+ "2ymd": "Q8WSF8",
26
+ "3aru": "Q9AMP1",
27
+ "1bcu": "P00734",
28
+ "3zsx": "P12497",
29
+ "4ddh": "P9WIL5",
30
+ "4eky": "P00489",
31
+ "4abg": "P00760",
32
+ "5a7b": "P04637",
33
+ "3dx1": "Q24451",
34
+ "4bkt": "Q15370",
35
+ "2v00": "P11838",
36
+ "4cig": "P12497",
37
+ "3n7a": "P9WPX7",
38
+ "3d6q": "P61823",
39
+ "2hb1": "P18031",
40
+ "3twp": "P9WFX5",
41
+ "4agn": "P04637",
42
+ "1c5z": "P00749",
43
+ "3nq9": "P02754",
44
+ "2w66": "Q89ZI2",
45
+ "3kwa": "P00918",
46
+ "3g2n": "P00489",
47
+ "4cr9": "P03951",
48
+ "4ih5": "P26663",
49
+ "4de2": "Q9L5C8",
50
+ "3ozt": "P22734",
51
+ "3f3a": "O67854",
52
+ "1a30": "P04585",
53
+ "3ivg": "P9WIL5",
54
+ "3u9q": "Q9UBK2",
55
+ "3rsx": "P56817",
56
+ "3pxf": "P24941",
57
+ "2wbg": "Q08638",
58
+ "3rr4": "P28720",
59
+ "4w9c": "Q15369",
60
+ "3mss": "P00520",
61
+ "4agp": "P04637",
62
+ "4mgd": "Q15788",
63
+ "1vso": "P22756",
64
+ "4jxs": "P00811",
65
+ "1q8t": "P00517",
66
+ "3acw": "A9JQL9",
67
+ "4lzs": "O60885",
68
+ "3r88": "P9WFX5",
69
+ "4ciw": "P9WPX7",
70
+ "2w4x": "Q89ZI2",
71
+ "2brb": "O14757",
72
+ "1p1q": "P19491",
73
+ "3d4z": "Q24451",
74
+ "1bzc": "P18031",
75
+ "1nc3": "P0AF12",
76
+ "4agq": "P04637",
77
+ "4w9l": "P40337",
78
+ "2yge": "P02829",
79
+ "1e66": "P04058",
80
+ "1gpk": "P04058",
81
+ "1h23": "P04058",
82
+ "1mq6": "P00742",
83
+ "1nvq": "O14757",
84
+ "1o3f": "P00760",
85
+ "1o5b": "P00749",
86
+ "1oyt": "P28506",
87
+ "1q8u": "P63249",
88
+ "1r5y": "P28720",
89
+ "1sqa": "P00749",
90
+ "1u1b": "P61823",
91
+ "1w4o": "P61823",
92
+ "1yc1": "P07900",
93
+ "1z95": "P10275",
94
+ "2cet": "Q08638",
95
+ "2fvd": "P24941",
96
+ "2iwx": "P02829",
97
+ "2j78": "Q08638",
98
+ "2p4y": "P37231",
99
+ "2qbp": "P18031",
100
+ "2qbr": "P18031",
101
+ "2v7a": "P00519",
102
+ "2vvn": "Q89ZI2",
103
+ "2vw5": "P02829",
104
+ "2wca": "Q89ZI2",
105
+ "2weg": "P00918",
106
+ "2wtv": "O14965",
107
+ "2x00": "Q8WSF8",
108
+ "2xb8": "P9WPX7",
109
+ "2xbv": "P00742",
110
+ "2xnb": "P24941",
111
+ "2xys": "Q8WSF8",
112
+ "2y5h": "P00742",
113
+ "2yfe": "P37231",
114
+ "2yki": "P07900",
115
+ "2zcq": "A9JQL9",
116
+ "2zcr": "A9JQL9",
117
+ "3ag9": "P00517",
118
+ "3b68": "P10275",
119
+ "3coy": "P9WIL5",
120
+ "3dd0": "P00918",
121
+ "3e93": "Q16539",
122
+ "3ebp": "P00489",
123
+ "3ehy": "P39900",
124
+ "3ejr": "Q24451",
125
+ "3f3c": "O67854",
126
+ "3f3e": "O67854",
127
+ "3fv1": "P39086",
128
+ "3g0w": "P15207",
129
+ "3gbb": "P22756",
130
+ "3ge7": "P28720",
131
+ "3gnw": "O92972",
132
+ "3gy4": "P00760",
133
+ "3jvs": "O14757",
134
+ "3k5v": "P00520",
135
+ "3myg": "O14965",
136
+ "3n86": "P9WPX7",
137
+ "3nw9": "P22734",
138
+ "3oe5": "P22734",
139
+ "3pww": "P11838",
140
+ "3ueu": "P02754",
141
+ "3uex": "P02754",
142
+ "3uo4": "O14965",
143
+ "3uri": "P11838",
144
+ "3utu": "P01050",
145
+ "4de1": "Q9L5C8",
146
+ "4djv": "P56817",
147
+ "4gid": "P56817",
148
+ "4tmn": "P00800",
149
+ "1eby": "P03366",
150
+ "1g2k": "P04587",
151
+ "1gpn": "P04058",
152
+ "1h22": "P04058",
153
+ "1k1i": "P00760",
154
+ "1lpg": "P00742",
155
+ "1nc1": "P0AF12",
156
+ "1o0h": "P61823",
157
+ "1owh": "P00749",
158
+ "1p1n": "P19491",
159
+ "1pxn": "P24941",
160
+ "1qf1": "P00800",
161
+ "1qkt": "P03372",
162
+ "1s38": "P28720",
163
+ "1syi": "P19491",
164
+ "1y6r": "P0AF12",
165
+ "1ydr": "P00517",
166
+ "1ydt": "P63249",
167
+ "1z6e": "P00742",
168
+ "1z9g": "P00800",
169
+ "2al5": "P19491",
170
+ "2br1": "O14757",
171
+ "2c3i": "P11309",
172
+ "2cbv": "Q08638",
173
+ "2fxs": "P02829",
174
+ "2j7h": "Q08638",
175
+ "2p15": "Q15596",
176
+ "2pog": "P03372",
177
+ "2qbq": "P18031",
178
+ "2qe4": "P03372",
179
+ "2qnq": "P03367",
180
+ "2r9w": "P00811",
181
+ "2vkm": "P56817",
182
+ "2wer": "P02829",
183
+ "2wn9": "Q8WSF8",
184
+ "2wnc": "Q8WSF8",
185
+ "2wvt": "Q8A3I4",
186
+ "2xii": "Q8A3I4",
187
+ "2xj7": "Q89ZI2",
188
+ "2zb1": "Q16539",
189
+ "2zda": "P01050",
190
+ "2zy1": "A9JQL9",
191
+ "3arp": "Q9AMP1",
192
+ "3arq": "Q9AMP1",
193
+ "3arv": "Q9AMP1",
194
+ "3ary": "Q9AMP1",
195
+ "3b1m": "Q9UBK2",
196
+ "3b27": "P07900",
197
+ "3b5r": "P10275",
198
+ "3b65": "P10275",
199
+ "3bgz": "P11309",
200
+ "3bv9": "P00734",
201
+ "3cj4": "P26663",
202
+ "3coz": "P9WIL5",
203
+ "3dx2": "Q24451",
204
+ "3e5a": "Q9ULW0",
205
+ "3e92": "Q16539",
206
+ "3f3d": "O67854",
207
+ "3fur": "Q15788",
208
+ "3fv2": "P39086",
209
+ "3gc5": "P28720",
210
+ "3jvr": "O14757",
211
+ "3jya": "P11309",
212
+ "3kr8": "Q9H2K2",
213
+ "3n76": "P9WPX7",
214
+ "3nx7": "P39900",
215
+ "3o9i": "P03369",
216
+ "3oe4": "P22734",
217
+ "3ozs": "P22734",
218
+ "3p5o": "O60885",
219
+ "3prs": "P11838",
220
+ "3pyy": "P00519",
221
+ "3qgy": "Q08881",
222
+ "3qqs": "P9WFX5",
223
+ "3rlr": "P07900",
224
+ "3ryj": "P00918",
225
+ "3syr": "P00489",
226
+ "3tsk": "P39900",
227
+ "3u5j": "O60885",
228
+ "3u8k": "P58154",
229
+ "3u8n": "P58154",
230
+ "3uev": "P02754",
231
+ "3uew": "P02754",
232
+ "3ui7": "Q9Y233",
233
+ "3up2": "O14965",
234
+ "3uuo": "Q9Y233",
235
+ "3wtj": "P58154",
236
+ "3wz8": "P11838",
237
+ "3zdg": "P58154",
238
+ "3zso": "P12497",
239
+ "4cra": "P03951",
240
+ "4crc": "P03951",
241
+ "4de3": "Q9L5C8",
242
+ "4dld": "P22756",
243
+ "4dli": "Q16539",
244
+ "4e5w": "P23458",
245
+ "4e6q": "O60674",
246
+ "4ea2": "A9JQL9",
247
+ "4eo8": "P26663",
248
+ "4eor": "P20248",
249
+ "4f09": "O60674",
250
+ "4f2w": "E8NLP5",
251
+ "4f3c": "E8NLP5",
252
+ "4f9w": "Q16539",
253
+ "4gfm": "O60674",
254
+ "4gkm": "P9WFX5",
255
+ "4gr0": "P39900",
256
+ "4hge": "O60674",
257
+ "4ih7": "P26663",
258
+ "4ivb": "P23458",
259
+ "4ivc": "P23458",
260
+ "4ivd": "P23458",
261
+ "4j21": "Q9H2K2",
262
+ "4j28": "Q8A3I4",
263
+ "4j3l": "Q9H2K2",
264
+ "4jfs": "Q8A3I4",
265
+ "4jia": "O60674",
266
+ "4k18": "P11309",
267
+ "4k77": "P23458",
268
+ "4kzq": "Q9H2K2",
269
+ "4kzu": "Q9H2K2",
270
+ "4m0y": "Q08881",
271
+ "4m0z": "Q08881",
272
+ "4mme": "O67854",
273
+ "4ogj": "O60885",
274
+ "4pcs": "Q8A3I4",
275
+ "4qac": "P58154",
276
+ "4qd6": "Q08881",
277
+ "4rfm": "Q08881",
278
+ "4twp": "P00519",
279
+ "4ty7": "P03951",
280
+ "4w9h": "Q15369",
281
+ "4w9i": "Q15370",
282
+ "4wiv": "O60885",
283
+ "4x6p": "P03951",
284
+ "5c28": "Q9Y233",
285
+ "5c2h": "Q9Y233",
286
+ "5dwr": "P11309",
287
+ "5tmn": "P00800"
288
+ },
289
+ "clusters_5": {
290
+ "P00811": [
291
+ "3gv9",
292
+ "3gr2",
293
+ "4kz6",
294
+ "4jxs",
295
+ "2r9w"
296
+ ],
297
+ "P00760": [
298
+ "1uto",
299
+ "4abg",
300
+ "1o3f",
301
+ "3gy4",
302
+ "1k1i"
303
+ ],
304
+ "Q24451": [
305
+ "1ps3",
306
+ "3dx1",
307
+ "3d4z",
308
+ "3ejr",
309
+ "3dx2"
310
+ ],
311
+ "P9WIL5": [
312
+ "4ddk",
313
+ "4ddh",
314
+ "3ivg",
315
+ "3coy",
316
+ "3coz"
317
+ ],
318
+ "P00918": [
319
+ "4jsz",
320
+ "3kwa",
321
+ "2weg",
322
+ "3dd0",
323
+ "3ryj"
324
+ ],
325
+ "Q9L5C8": [
326
+ "3g2z",
327
+ "3g31",
328
+ "4de2",
329
+ "4de1",
330
+ "4de3"
331
+ ],
332
+ "P61823": [
333
+ "3dxg",
334
+ "3d6q",
335
+ "1u1b",
336
+ "1w4o",
337
+ "1o0h"
338
+ ],
339
+ "P00489": [
340
+ "3l7b",
341
+ "4eky",
342
+ "3g2n",
343
+ "3ebp",
344
+ "3syr"
345
+ ],
346
+ "P00749": [
347
+ "3kgp",
348
+ "1c5z",
349
+ "1o5b",
350
+ "1sqa",
351
+ "1owh"
352
+ ],
353
+ "P00800": [
354
+ "3fcq",
355
+ "4tmn",
356
+ "1qf1",
357
+ "1z9g",
358
+ "5tmn"
359
+ ],
360
+ "P39900": [
361
+ "3lka",
362
+ "3ehy",
363
+ "3nx7",
364
+ "3tsk",
365
+ "4gr0"
366
+ ],
367
+ "P56817": [
368
+ "3udh",
369
+ "3rsx",
370
+ "4djv",
371
+ "4gid",
372
+ "2vkm"
373
+ ],
374
+ "Q9Y233": [
375
+ "4llx",
376
+ "3ui7",
377
+ "3uuo",
378
+ "5c28",
379
+ "5c2h"
380
+ ],
381
+ "P19491": [
382
+ "4u4s",
383
+ "1p1q",
384
+ "1p1n",
385
+ "1syi",
386
+ "2al5"
387
+ ],
388
+ "P9WFX5": [
389
+ "4owm",
390
+ "3twp",
391
+ "3r88",
392
+ "3qqs",
393
+ "4gkm"
394
+ ],
395
+ "P04637": [
396
+ "5aba",
397
+ "5a7b",
398
+ "4agn",
399
+ "4agp",
400
+ "4agq"
401
+ ],
402
+ "P07900": [
403
+ "2xdl",
404
+ "1yc1",
405
+ "2yki",
406
+ "3b27",
407
+ "3rlr"
408
+ ],
409
+ "Q8WSF8": [
410
+ "2ymd",
411
+ "2x00",
412
+ "2xys",
413
+ "2wn9",
414
+ "2wnc"
415
+ ],
416
+ "Q9AMP1": [
417
+ "3aru",
418
+ "3arp",
419
+ "3arq",
420
+ "3arv",
421
+ "3ary"
422
+ ],
423
+ "P11838": [
424
+ "2v00",
425
+ "3pww",
426
+ "3uri",
427
+ "3prs",
428
+ "3wz8"
429
+ ],
430
+ "P9WPX7": [
431
+ "3n7a",
432
+ "4ciw",
433
+ "2xb8",
434
+ "3n86",
435
+ "3n76"
436
+ ],
437
+ "P18031": [
438
+ "2hb1",
439
+ "1bzc",
440
+ "2qbp",
441
+ "2qbr",
442
+ "2qbq"
443
+ ],
444
+ "P02754": [
445
+ "3nq9",
446
+ "3ueu",
447
+ "3uex",
448
+ "3uev",
449
+ "3uew"
450
+ ],
451
+ "Q89ZI2": [
452
+ "2w66",
453
+ "2w4x",
454
+ "2vvn",
455
+ "2wca",
456
+ "2xj7"
457
+ ],
458
+ "P03951": [
459
+ "4cr9",
460
+ "4cra",
461
+ "4crc",
462
+ "4ty7",
463
+ "4x6p"
464
+ ],
465
+ "P22734": [
466
+ "3ozt",
467
+ "3nw9",
468
+ "3oe5",
469
+ "3oe4",
470
+ "3ozs"
471
+ ],
472
+ "O67854": [
473
+ "3f3a",
474
+ "3f3c",
475
+ "3f3e",
476
+ "3f3d",
477
+ "4mme"
478
+ ],
479
+ "Q08638": [
480
+ "2wbg",
481
+ "2cet",
482
+ "2j78",
483
+ "2cbv",
484
+ "2j7h"
485
+ ],
486
+ "P28720": [
487
+ "3rr4",
488
+ "1r5y",
489
+ "3ge7",
490
+ "1s38",
491
+ "3gc5"
492
+ ],
493
+ "A9JQL9": [
494
+ "3acw",
495
+ "2zcq",
496
+ "2zcr",
497
+ "2zy1",
498
+ "4ea2"
499
+ ],
500
+ "O60885": [
501
+ "4lzs",
502
+ "3p5o",
503
+ "3u5j",
504
+ "4ogj",
505
+ "4wiv"
506
+ ],
507
+ "O14757": [
508
+ "2brb",
509
+ "1nvq",
510
+ "3jvs",
511
+ "2br1",
512
+ "3jvr"
513
+ ],
514
+ "P02829": [
515
+ "2yge",
516
+ "2iwx",
517
+ "2vw5",
518
+ "2fxs",
519
+ "2wer"
520
+ ],
521
+ "P04058": [
522
+ "1e66",
523
+ "1gpk",
524
+ "1h23",
525
+ "1gpn",
526
+ "1h22"
527
+ ],
528
+ "P00742": [
529
+ "1mq6",
530
+ "2xbv",
531
+ "2y5h",
532
+ "1lpg",
533
+ "1z6e"
534
+ ],
535
+ "Q16539": [
536
+ "3e93",
537
+ "2zb1",
538
+ "3e92",
539
+ "4dli",
540
+ "4f9w"
541
+ ],
542
+ "P11309": [
543
+ "2c3i",
544
+ "3bgz",
545
+ "3jya",
546
+ "4k18",
547
+ "5dwr"
548
+ ],
549
+ "Q8A3I4": [
550
+ "2wvt",
551
+ "2xii",
552
+ "4j28",
553
+ "4jfs",
554
+ "4pcs"
555
+ ],
556
+ "Q9H2K2": [
557
+ "3kr8",
558
+ "4j21",
559
+ "4j3l",
560
+ "4kzq",
561
+ "4kzu"
562
+ ],
563
+ "Q08881": [
564
+ "3qgy",
565
+ "4m0y",
566
+ "4m0z",
567
+ "4qd6",
568
+ "4rfm"
569
+ ],
570
+ "P58154": [
571
+ "3u8k",
572
+ "3u8n",
573
+ "3wtj",
574
+ "3zdg",
575
+ "4qac"
576
+ ],
577
+ "P23458": [
578
+ "4e5w",
579
+ "4ivb",
580
+ "4ivc",
581
+ "4ivd",
582
+ "4k77"
583
+ ],
584
+ "O60674": [
585
+ "4e6q",
586
+ "4f09",
587
+ "4gfm",
588
+ "4hge",
589
+ "4jia"
590
+ ]
591
+ },
592
+ "clusters_3plus": {
593
+ "P00811": [
594
+ "3gv9",
595
+ "3gr2",
596
+ "4kz6",
597
+ "4jxs",
598
+ "2r9w"
599
+ ],
600
+ "P00760": [
601
+ "1uto",
602
+ "4abg",
603
+ "1o3f",
604
+ "3gy4",
605
+ "1k1i"
606
+ ],
607
+ "Q24451": [
608
+ "1ps3",
609
+ "3dx1",
610
+ "3d4z",
611
+ "3ejr",
612
+ "3dx2"
613
+ ],
614
+ "P9WIL5": [
615
+ "4ddk",
616
+ "4ddh",
617
+ "3ivg",
618
+ "3coy",
619
+ "3coz"
620
+ ],
621
+ "P00918": [
622
+ "4jsz",
623
+ "3kwa",
624
+ "2weg",
625
+ "3dd0",
626
+ "3ryj"
627
+ ],
628
+ "Q9L5C8": [
629
+ "3g2z",
630
+ "3g31",
631
+ "4de2",
632
+ "4de1",
633
+ "4de3"
634
+ ],
635
+ "P61823": [
636
+ "3dxg",
637
+ "3d6q",
638
+ "1u1b",
639
+ "1w4o",
640
+ "1o0h"
641
+ ],
642
+ "P00489": [
643
+ "3l7b",
644
+ "4eky",
645
+ "3g2n",
646
+ "3ebp",
647
+ "3syr"
648
+ ],
649
+ "P00749": [
650
+ "3kgp",
651
+ "1c5z",
652
+ "1o5b",
653
+ "1sqa",
654
+ "1owh"
655
+ ],
656
+ "P00800": [
657
+ "3fcq",
658
+ "4tmn",
659
+ "1qf1",
660
+ "1z9g",
661
+ "5tmn"
662
+ ],
663
+ "P39900": [
664
+ "3lka",
665
+ "3ehy",
666
+ "3nx7",
667
+ "3tsk",
668
+ "4gr0"
669
+ ],
670
+ "P12497": [
671
+ "3zt2",
672
+ "3zsx",
673
+ "4cig",
674
+ "3zso"
675
+ ],
676
+ "P56817": [
677
+ "3udh",
678
+ "3rsx",
679
+ "4djv",
680
+ "4gid",
681
+ "2vkm"
682
+ ],
683
+ "Q9Y233": [
684
+ "4llx",
685
+ "3ui7",
686
+ "3uuo",
687
+ "5c28",
688
+ "5c2h"
689
+ ],
690
+ "P19491": [
691
+ "4u4s",
692
+ "1p1q",
693
+ "1p1n",
694
+ "1syi",
695
+ "2al5"
696
+ ],
697
+ "P9WFX5": [
698
+ "4owm",
699
+ "3twp",
700
+ "3r88",
701
+ "3qqs",
702
+ "4gkm"
703
+ ],
704
+ "P04637": [
705
+ "5aba",
706
+ "5a7b",
707
+ "4agn",
708
+ "4agp",
709
+ "4agq"
710
+ ],
711
+ "P07900": [
712
+ "2xdl",
713
+ "1yc1",
714
+ "2yki",
715
+ "3b27",
716
+ "3rlr"
717
+ ],
718
+ "Q8WSF8": [
719
+ "2ymd",
720
+ "2x00",
721
+ "2xys",
722
+ "2wn9",
723
+ "2wnc"
724
+ ],
725
+ "Q9AMP1": [
726
+ "3aru",
727
+ "3arp",
728
+ "3arq",
729
+ "3arv",
730
+ "3ary"
731
+ ],
732
+ "P11838": [
733
+ "2v00",
734
+ "3pww",
735
+ "3uri",
736
+ "3prs",
737
+ "3wz8"
738
+ ],
739
+ "P9WPX7": [
740
+ "3n7a",
741
+ "4ciw",
742
+ "2xb8",
743
+ "3n86",
744
+ "3n76"
745
+ ],
746
+ "P18031": [
747
+ "2hb1",
748
+ "1bzc",
749
+ "2qbp",
750
+ "2qbr",
751
+ "2qbq"
752
+ ],
753
+ "P02754": [
754
+ "3nq9",
755
+ "3ueu",
756
+ "3uex",
757
+ "3uev",
758
+ "3uew"
759
+ ],
760
+ "Q89ZI2": [
761
+ "2w66",
762
+ "2w4x",
763
+ "2vvn",
764
+ "2wca",
765
+ "2xj7"
766
+ ],
767
+ "P03951": [
768
+ "4cr9",
769
+ "4cra",
770
+ "4crc",
771
+ "4ty7",
772
+ "4x6p"
773
+ ],
774
+ "P26663": [
775
+ "4ih5",
776
+ "3cj4",
777
+ "4eo8",
778
+ "4ih7"
779
+ ],
780
+ "P22734": [
781
+ "3ozt",
782
+ "3nw9",
783
+ "3oe5",
784
+ "3oe4",
785
+ "3ozs"
786
+ ],
787
+ "O67854": [
788
+ "3f3a",
789
+ "3f3c",
790
+ "3f3e",
791
+ "3f3d",
792
+ "4mme"
793
+ ],
794
+ "P24941": [
795
+ "3pxf",
796
+ "2fvd",
797
+ "2xnb",
798
+ "1pxn"
799
+ ],
800
+ "Q08638": [
801
+ "2wbg",
802
+ "2cet",
803
+ "2j78",
804
+ "2cbv",
805
+ "2j7h"
806
+ ],
807
+ "P28720": [
808
+ "3rr4",
809
+ "1r5y",
810
+ "3ge7",
811
+ "1s38",
812
+ "3gc5"
813
+ ],
814
+ "P22756": [
815
+ "1vso",
816
+ "3gbb",
817
+ "4dld"
818
+ ],
819
+ "P00517": [
820
+ "1q8t",
821
+ "3ag9",
822
+ "1ydr"
823
+ ],
824
+ "A9JQL9": [
825
+ "3acw",
826
+ "2zcq",
827
+ "2zcr",
828
+ "2zy1",
829
+ "4ea2"
830
+ ],
831
+ "O60885": [
832
+ "4lzs",
833
+ "3p5o",
834
+ "3u5j",
835
+ "4ogj",
836
+ "4wiv"
837
+ ],
838
+ "O14757": [
839
+ "2brb",
840
+ "1nvq",
841
+ "3jvs",
842
+ "2br1",
843
+ "3jvr"
844
+ ],
845
+ "P0AF12": [
846
+ "1nc3",
847
+ "1nc1",
848
+ "1y6r"
849
+ ],
850
+ "P02829": [
851
+ "2yge",
852
+ "2iwx",
853
+ "2vw5",
854
+ "2fxs",
855
+ "2wer"
856
+ ],
857
+ "P04058": [
858
+ "1e66",
859
+ "1gpk",
860
+ "1h23",
861
+ "1gpn",
862
+ "1h22"
863
+ ],
864
+ "P00742": [
865
+ "1mq6",
866
+ "2xbv",
867
+ "2y5h",
868
+ "1lpg",
869
+ "1z6e"
870
+ ],
871
+ "P10275": [
872
+ "1z95",
873
+ "3b68",
874
+ "3b5r",
875
+ "3b65"
876
+ ],
877
+ "P00519": [
878
+ "2v7a",
879
+ "3pyy",
880
+ "4twp"
881
+ ],
882
+ "O14965": [
883
+ "2wtv",
884
+ "3myg",
885
+ "3uo4",
886
+ "3up2"
887
+ ],
888
+ "Q16539": [
889
+ "3e93",
890
+ "2zb1",
891
+ "3e92",
892
+ "4dli",
893
+ "4f9w"
894
+ ],
895
+ "P03372": [
896
+ "1qkt",
897
+ "2pog",
898
+ "2qe4"
899
+ ],
900
+ "P11309": [
901
+ "2c3i",
902
+ "3bgz",
903
+ "3jya",
904
+ "4k18",
905
+ "5dwr"
906
+ ],
907
+ "Q8A3I4": [
908
+ "2wvt",
909
+ "2xii",
910
+ "4j28",
911
+ "4jfs",
912
+ "4pcs"
913
+ ],
914
+ "Q9H2K2": [
915
+ "3kr8",
916
+ "4j21",
917
+ "4j3l",
918
+ "4kzq",
919
+ "4kzu"
920
+ ],
921
+ "Q08881": [
922
+ "3qgy",
923
+ "4m0y",
924
+ "4m0z",
925
+ "4qd6",
926
+ "4rfm"
927
+ ],
928
+ "P58154": [
929
+ "3u8k",
930
+ "3u8n",
931
+ "3wtj",
932
+ "3zdg",
933
+ "4qac"
934
+ ],
935
+ "P23458": [
936
+ "4e5w",
937
+ "4ivb",
938
+ "4ivc",
939
+ "4ivd",
940
+ "4k77"
941
+ ],
942
+ "O60674": [
943
+ "4e6q",
944
+ "4f09",
945
+ "4gfm",
946
+ "4hge",
947
+ "4jia"
948
+ ]
949
+ }
950
+ }
fetch_clusters.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Fetch CASF-2016 target clusters from RCSB PDB API."""
2
+ import pandas as pd
3
+ import json
4
+ import urllib.request
5
+ from collections import defaultdict
6
+
7
+ v9 = pd.read_csv('/Volumes/PRO-G40/millerbind-tdc-validation/predictions/casf2016_v9_predictions.csv')
8
+ pdb_ids = v9['pdb_id'].str.upper().tolist()
9
+
10
+ pdb_to_target = {}
11
+ for i in range(0, len(pdb_ids), 50):
12
+ batch = pdb_ids[i:i+50]
13
+ query = '{ entries(entry_ids: %s) { rcsb_id polymer_entities { rcsb_polymer_entity { pdbx_description } rcsb_polymer_entity_container_identifiers { uniprot_ids } } } }' % json.dumps(batch)
14
+
15
+ url = 'https://data.rcsb.org/graphql'
16
+ req = urllib.request.Request(url,
17
+ data=json.dumps({'query': query}).encode(),
18
+ headers={'Content-Type': 'application/json'})
19
+ resp = urllib.request.urlopen(req, timeout=30)
20
+ data = json.loads(resp.read())
21
+
22
+ for entry in data['data']['entries']:
23
+ pdb = entry['rcsb_id'].lower()
24
+ uniprots = []
25
+ desc = ""
26
+ for pe in (entry.get('polymer_entities') or []):
27
+ d = pe.get('rcsb_polymer_entity', {}).get('pdbx_description', '')
28
+ u = pe.get('rcsb_polymer_entity_container_identifiers', {}).get('uniprot_ids', [])
29
+ if d and not desc:
30
+ desc = d
31
+ if u:
32
+ uniprots.extend(u)
33
+ target = uniprots[0] if uniprots else desc
34
+ pdb_to_target[pdb] = target
35
+
36
+ target_groups = defaultdict(list)
37
+ for pdb, target in pdb_to_target.items():
38
+ target_groups[target].append(pdb)
39
+
40
+ clusters_5 = {t: pdbs for t, pdbs in target_groups.items() if len(pdbs) == 5}
41
+ clusters_3plus = {t: pdbs for t, pdbs in target_groups.items() if len(pdbs) >= 3}
42
+
43
+ print(f"Total PDB IDs mapped: {len(pdb_to_target)}")
44
+ print(f"Unique targets: {len(target_groups)}")
45
+ print(f"Clusters with exactly 5: {len(clusters_5)}")
46
+ print(f"Clusters with 3+: {len(clusters_3plus)}")
47
+
48
+ with open('/Volumes/PRO-G40/millerbind-tdc-validation/casf2016_target_clusters.json', 'w') as f:
49
+ json.dump({
50
+ 'pdb_to_target': pdb_to_target,
51
+ 'clusters_5': clusters_5,
52
+ 'clusters_3plus': clusters_3plus
53
+ }, f, indent=2)
54
+
55
+ print("\nClusters with 5 members:")
56
+ for t, pdbs in sorted(clusters_5.items()):
57
+ print(f" {t}: {pdbs}")
58
+
59
+ print("\nClusters with 3-4 members:")
60
+ for t, pdbs in sorted(clusters_3plus.items()):
61
+ if len(pdbs) < 5:
62
+ print(f" {t} ({len(pdbs)}): {pdbs}")
63
+
64
+ print(f"\nSaved to casf2016_target_clusters.json")
ranking_power.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ CASF-2016 Ranking Power Evaluation for MillerBind v9 and v12
3
+ ============================================================
4
+ Ranking power measures: given a target with multiple ligands,
5
+ can the scoring function correctly rank them by affinity?
6
+
7
+ CASF-2016 has 57 target clusters with 5 ligands each (285 total).
8
+ Target clusters fetched from RCSB PDB API (UniProt grouping).
9
+ """
10
+
11
+ import pandas as pd
12
+ import numpy as np
13
+ import json
14
+ from scipy.stats import spearmanr, kendalltau
15
+
16
+ # Load clusters from RCSB PDB API fetch
17
+ with open('/Volumes/PRO-G40/millerbind-tdc-validation/casf2016_target_clusters.json') as f:
18
+ cluster_data = json.load(f)
19
+
20
+ # Use all clusters with 3+ members for ranking power
21
+ CASF2016_CLUSTERS = cluster_data['clusters_3plus']
22
+
23
+ def evaluate_ranking_power(df, model_name):
24
+ df = df.copy()
25
+ df['pdb_id_lower'] = df['pdb_id'].str.lower()
26
+
27
+ spearman_list = []
28
+ kendall_list = []
29
+ target_names = []
30
+ top1_correct = 0
31
+ top1_total = 0
32
+ total_concordant = 0
33
+ total_pairs = 0
34
+ matched_clusters = 0
35
+
36
+ for target, pdb_ids in CASF2016_CLUSTERS.items():
37
+ pdb_ids_lower = [p.lower() for p in pdb_ids]
38
+ mask = df['pdb_id_lower'].isin(pdb_ids_lower)
39
+ subset = df[mask]
40
+
41
+ if len(subset) < 3:
42
+ continue
43
+
44
+ matched_clusters += 1
45
+ exp = subset['experimental_pkd'].values
46
+ pred = subset['predicted_pkd'].values
47
+
48
+ # Spearman correlation
49
+ if len(set(exp)) > 1 and len(set(pred)) > 1:
50
+ rho, _ = spearmanr(exp, pred)
51
+ spearman_list.append(rho)
52
+ target_names.append(target)
53
+
54
+ # Kendall tau
55
+ if len(set(exp)) > 1 and len(set(pred)) > 1:
56
+ tau, _ = kendalltau(exp, pred)
57
+ kendall_list.append(tau)
58
+
59
+ # Top-1: is the strongest binder correctly identified?
60
+ best_exp_idx = np.argmax(exp)
61
+ best_pred_idx = np.argmax(pred)
62
+ if best_exp_idx == best_pred_idx:
63
+ top1_correct += 1
64
+ top1_total += 1
65
+
66
+ # Concordant pairs
67
+ n = len(exp)
68
+ for i in range(n):
69
+ for j in range(i+1, n):
70
+ total_pairs += 1
71
+ if (exp[i] - exp[j]) * (pred[i] - pred[j]) > 0:
72
+ total_concordant += 1
73
+
74
+ avg_spearman = np.mean(spearman_list) if spearman_list else 0
75
+ avg_kendall = np.mean(kendall_list) if kendall_list else 0
76
+ concordance_rate = total_concordant / total_pairs if total_pairs > 0 else 0
77
+ top1_rate = top1_correct / top1_total if top1_total > 0 else 0
78
+
79
+ print(f"\n{'='*60}")
80
+ print(f" {model_name} - CASF-2016 RANKING POWER")
81
+ print(f"{'='*60}")
82
+ print(f" Matched clusters: {matched_clusters} / {len(CASF2016_CLUSTERS)}")
83
+ print(f" Avg Spearman (rho): {avg_spearman:.3f}")
84
+ print(f" Avg Kendall (tau): {avg_kendall:.3f}")
85
+ print(f" Concordance rate: {concordance_rate:.1%} ({total_concordant}/{total_pairs} pairs)")
86
+ print(f" Top-1 success rate: {top1_rate:.1%} ({top1_correct}/{top1_total})")
87
+ print(f" # targets evaluated: {len(spearman_list)}")
88
+
89
+ # Show per-target Spearman
90
+ print(f"\n Per-target Spearman correlations:")
91
+ sorted_targets = sorted(zip(target_names, spearman_list), key=lambda x: x[1], reverse=True)
92
+ for target, rho in sorted_targets:
93
+ bar = "+" * max(1, int(abs(rho) * 20))
94
+ sign = "+" if rho > 0 else "-"
95
+ print(f" {target:<25s} rho = {rho:+.3f} {bar}")
96
+
97
+ # Published comparison
98
+ print(f"\n Comparison with published CASF-2016 Ranking Power:")
99
+ print(f" {'Method':<25s} {'Avg rho':>8s}")
100
+ print(f" {'-'*35}")
101
+ published = [
102
+ ("X-Score", 0.247),
103
+ ("ChemScore", 0.282),
104
+ ("AutoDock Vina", 0.281),
105
+ ("RF-Score v3", 0.464),
106
+ ("DeltaVinaRF20", 0.476),
107
+ ("OnionNet-2", 0.488),
108
+ ]
109
+ for name, val in published:
110
+ print(f" {name:<25s} {val:>8.3f}")
111
+ print(f" {model_name:<25s} {avg_spearman:>8.3f} <-- THIS WORK")
112
+
113
+ return {
114
+ 'avg_spearman': avg_spearman,
115
+ 'avg_kendall': avg_kendall,
116
+ 'concordance_rate': concordance_rate,
117
+ 'top1_rate': top1_rate,
118
+ 'matched_clusters': matched_clusters,
119
+ }
120
+
121
+
122
+ if __name__ == "__main__":
123
+ v9 = pd.read_csv('predictions/casf2016_v9_predictions.csv')
124
+ v12 = pd.read_csv('predictions/casf2016_v12_predictions.csv')
125
+
126
+ print(f"Loaded v9: {len(v9)} predictions")
127
+ print(f"Loaded v12: {len(v12)} predictions")
128
+
129
+ v9_res = evaluate_ranking_power(v9, "MillerBind v9")
130
+ v12_res = evaluate_ranking_power(v12, "MillerBind v12")
131
+
132
+ print(f"\n{'='*60}")
133
+ print(f" SUMMARY")
134
+ print(f"{'='*60}")
135
+ print(f" {'Metric':<25s} {'v9':>10s} {'v12':>10s}")
136
+ print(f" {'-'*47}")
137
+ print(f" {'Avg Spearman':<25s} {v9_res['avg_spearman']:>10.3f} {v12_res['avg_spearman']:>10.3f}")
138
+ print(f" {'Avg Kendall':<25s} {v9_res['avg_kendall']:>10.3f} {v12_res['avg_kendall']:>10.3f}")
139
+ print(f" {'Concordance':<25s} {v9_res['concordance_rate']:>9.1%} {v12_res['concordance_rate']:>9.1%}")
140
+ print(f" {'Top-1 success':<25s} {v9_res['top1_rate']:>9.1%} {v12_res['top1_rate']:>9.1%}")