ym59 commited on
Commit
7374a91
·
verified ·
1 Parent(s): 2d44b43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -1046
app.py CHANGED
@@ -1,1046 +0,0 @@
1
- #!/usr/bin/env python3
2
- import warnings
3
- import os
4
- import time
5
- from pathlib import Path
6
- from io import BytesIO
7
- import base64
8
- import numpy as np
9
- import pandas as pd
10
- import torch
11
- import matplotlib
12
-
13
- matplotlib.use("Agg")
14
- import matplotlib.pyplot as plt
15
- import matplotlib.patches as mpatches
16
- from flask import Flask, request, jsonify, render_template_string, send_from_directory
17
-
18
- # RDKit guarded imports
19
- try:
20
- from rdkit import RDLogger
21
- RDLogger.DisableLog("rdApp.*")
22
- from rdkit import Chem
23
- from rdkit.Chem import AllChem, MACCSkeys, Descriptors, DataStructs
24
- from rdkit.Chem.rdMolDescriptors import (
25
- GetHashedAtomPairFingerprint,
26
- GetHashedTopologicalTorsionFingerprint,
27
- )
28
- except Exception:
29
- Chem = None
30
- AllChem = None
31
- MACCSkeys = None
32
- Descriptors = None
33
- DataStructs = None
34
- GetHashedAtomPairFingerprint = None
35
- GetHashedTopologicalTorsionFingerprint = None
36
-
37
- # Environment / perf tweaks
38
- os.environ.setdefault("HF_HOME", "/tmp/hf")
39
- os.environ.setdefault("TRANSFORMERS_CACHE", "/tmp/hf")
40
- os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
41
- torch.set_num_threads(1)
42
-
43
- warnings.filterwarnings("ignore")
44
- app = Flask(__name__)
45
-
46
- # ---------------------------------------------------------------------------
47
- # Model state (same as before)
48
- # ---------------------------------------------------------------------------
49
- FOLD_MODELS = {}
50
- META = None
51
- ISO_CAL = None
52
- LIG_SCALER = None
53
- AD_THRESHOLD = 1.4
54
- TRAIN_EMBS = None
55
- ESM_MODEL = None
56
- ESM_TOK = None
57
- TARGET_MU = 6.361
58
- TARGET_STD = 1.855
59
-
60
- try:
61
- import joblib
62
-
63
- MODEL_DIR = Path("output/models")
64
- PREP_DIR = Path("output/preprocessors")
65
- seeds, n_folds, mtypes = [42, 123, 456], 5, ["lgbm", "cb", "xgb"]
66
-
67
- if MODEL_DIR.exists():
68
- for seed in seeds:
69
- for mt in mtypes:
70
- for fold in range(n_folds):
71
- k = f"s{seed}_{mt}_f{fold}"
72
- p = MODEL_DIR / f"fold_model_{k}.pkl"
73
- if p.exists():
74
- FOLD_MODELS[k] = joblib.load(p)
75
-
76
- for fname, attr in [("meta_all_casf16.pkl", "META"), ("isotonic_calibrator.pkl", "ISO_CAL")]:
77
- p = MODEL_DIR / fname
78
- if p.exists():
79
- obj = joblib.load(p)
80
- if attr == "META":
81
- META = obj
82
- elif attr == "ISO_CAL":
83
- ISO_CAL = obj
84
-
85
- ts_path = MODEL_DIR / "target_scaler.pkl"
86
- if ts_path.exists():
87
- ts = joblib.load(ts_path)
88
- TARGET_MU = ts.mu
89
- TARGET_STD = ts.std
90
-
91
- if PREP_DIR.exists():
92
- ls = PREP_DIR / "ligand_scaler.pkl"
93
- if ls.exists():
94
- LIG_SCALER = joblib.load(ls)
95
-
96
- ad_path = Path("output/ad_train_embeddings.npy")
97
- if ad_path.exists():
98
- TRAIN_EMBS = np.load(str(ad_path))
99
- at = Path("output/ad_threshold.npy")
100
- if at.exists():
101
- AD_THRESHOLD = float(np.load(str(at)))
102
-
103
- print(f"[VeloBind] {len(FOLD_MODELS)} fold models loaded")
104
- except Exception as e:
105
- print(f"[VeloBind] Model loading skipped: {e}")
106
-
107
- # ---------------------------------------------------------------------------
108
- # Helpers and feature code (unchanged)
109
- # ---------------------------------------------------------------------------
110
- def clean_fasta(s):
111
- s = s.strip()
112
- if s.startswith(">"):
113
- return "".join(l.strip() for l in s.split("\n") if not l.startswith(">"))
114
- return s.replace(" ", "").replace("\n", "")
115
-
116
-
117
- def pkd_to_ki(pkd):
118
- m = 10 ** (-pkd)
119
- if m < 1e-9:
120
- return f"{m*1e12:.1f} pM"
121
- if m < 1e-6:
122
- return f"{m*1e9:.1f} nM"
123
- if m < 1e-3:
124
- return f"{m*1e6:.1f} uM"
125
- return f"{m*1e3:.1f} mM"
126
-
127
-
128
- def load_esm():
129
- global ESM_MODEL, ESM_TOK
130
- if ESM_MODEL is None:
131
- try:
132
- from transformers import AutoTokenizer, EsmModel
133
-
134
- ESM_TOK = AutoTokenizer.from_pretrained("facebook/esm2_t12_35M_UR50D", local_files_only=False)
135
- ESM_MODEL = EsmModel.from_pretrained("facebook/esm2_t12_35M_UR50D", local_files_only=False)
136
- ESM_MODEL.eval()
137
- print("[VeloBind] load_esm: ESM model loaded into memory.")
138
- except Exception as e:
139
- print("[VeloBind] load_esm error:", e)
140
- ESM_MODEL = None
141
- ESM_TOK = None
142
- return ESM_TOK, ESM_MODEL
143
-
144
-
145
- def embed_sequence(seq):
146
- tok, model = load_esm()
147
- if tok is None or model is None:
148
- raise RuntimeError("ESM model not available")
149
- MAX, HALF = 1022, 511
150
-
151
- def _chunk(s):
152
- enc = tok(s, return_tensors="pt", truncation=True, max_length=MAX)
153
- with torch.no_grad():
154
- out = model(**enc, output_hidden_states=True)
155
- layers = [8, 10, 11]
156
- hs = out.hidden_states
157
- mask = enc["attention_mask"].unsqueeze(-1).float()
158
- mvecs = []
159
- for li in layers:
160
- h = hs[li]
161
- mv = (h * mask).sum(1) / mask.sum(1).clamp(min=1e-9)
162
- mvecs.append(mv.squeeze(0).numpy())
163
- return np.concatenate(mvecs)
164
-
165
- if len(seq) <= MAX:
166
- return _chunk(seq)
167
- m1 = _chunk(seq[:HALF])
168
- m2 = _chunk(seq[-HALF:])
169
- return (m1 + m2) / 2
170
-
171
-
172
- def seq_features(seq):
173
- try:
174
- from Bio.SeqUtils.ProtParam import ProteinAnalysis
175
-
176
- pa = ProteinAnalysis(seq.upper())
177
- pp = [
178
- pa.molecular_weight(),
179
- pa.aromaticity(),
180
- pa.instability_index(),
181
- pa.isoelectric_point(),
182
- pa.gravy(),
183
- *pa.secondary_structure_fraction(),
184
- *list(pa.amino_acids_percent.values()),
185
- ]
186
- except Exception:
187
- pp = [0.0] * 28
188
-
189
- AA = list("ACDEFGHIKLMNPQRSTVWY")
190
- dp = {a + b: 0 for a in AA for b in AA}
191
- for i in range(len(seq) - 1):
192
- k = seq[i].upper() + seq[i + 1].upper()
193
- if k in dp:
194
- dp[k] += 1
195
- tot = max(1, sum(dp.values()))
196
- dpc = [v / tot for v in dp.values()]
197
-
198
- try:
199
- from src.features.protein import _ctd, _conjoint_triad, _qso, _aaindex_encoding
200
-
201
- extra = list(_ctd(seq)) + list(_conjoint_triad(seq)) + list(_qso(seq)) + list(_aaindex_encoding(seq))
202
- except Exception:
203
- extra = [0.0] * (63 + 343 + 60 + 25)
204
-
205
- return np.array(pp + dpc + extra, dtype=np.float32)
206
-
207
-
208
- def ligand_features(smiles):
209
- if Chem is None:
210
- return None, "RDKit not available"
211
- try:
212
- mol = Chem.MolFromSmiles(smiles)
213
- if mol is None:
214
- return None, "Invalid SMILES"
215
-
216
- def fp(obj, n):
217
- a = np.zeros(n, dtype=np.float32)
218
- DataStructs.ConvertToNumpyArray(obj, a)
219
- return a
220
-
221
- ecfp2 = fp(AllChem.GetMorganFingerprintAsBitVect(mol, 1, 1024), 1024)
222
- ecfp4 = fp(AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024), 1024)
223
- ecfp6 = fp(AllChem.GetMorganFingerprintAsBitVect(mol, 3, 1024), 1024)
224
- fcfp4 = fp(AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024, useFeatures=True), 1024)
225
- maccs = fp(MACCSkeys.GenMACCSKeys(mol), 167)
226
- ap = np.zeros(2048, dtype=np.float32)
227
- DataStructs.ConvertToNumpyArray(GetHashedAtomPairFingerprint(mol, 2048), ap)
228
- tors = np.zeros(2048, dtype=np.float32)
229
- DataStructs.ConvertToNumpyArray(GetHashedTopologicalTorsionFingerprint(mol, 2048), tors)
230
- try:
231
- from rdkit.Chem.EState.Fingerprinter import FingerprintMol
232
-
233
- es = np.nan_to_num(np.clip(FingerprintMol(mol)[0].astype(np.float32), -1e6, 1e6))[:79]
234
- if len(es) < 79:
235
- es = np.pad(es, (0, 79 - len(es)))
236
- except Exception:
237
- es = np.zeros(79, dtype=np.float32)
238
-
239
- desc_fns = [v for k, v in sorted(Descriptors.descList)][:217]
240
- phys = []
241
- for fn in desc_fns:
242
- try:
243
- v = float(fn(mol))
244
- phys.append(0.0 if (not np.isfinite(v) or abs(v) > 1e10) else v)
245
- except Exception:
246
- phys.append(0.0)
247
-
248
- return (
249
- {
250
- "ecfp2": ecfp2,
251
- "ecfp": ecfp4,
252
- "ecfp6": ecfp6,
253
- "fcfp": fcfp4,
254
- "maccs": maccs,
255
- "ap": ap,
256
- "torsion": tors,
257
- "estate": es,
258
- "phys": np.array(phys, dtype=np.float64),
259
- },
260
- None,
261
- )
262
- except Exception as e:
263
- return None, str(e)
264
-
265
-
266
- def assemble(esm_mean, seqfeat, lig):
267
- esm_last = esm_mean[-480:]
268
- if LIG_SCALER is not None:
269
- try:
270
- combined = np.concatenate([lig["estate"], lig["phys"]])
271
- combined = LIG_SCALER.transform(combined.reshape(1, -1)).ravel()
272
- es = combined[:79].astype(np.float32)
273
- ph = combined[79:].astype(np.float32)
274
- except Exception:
275
- es, ph = lig["estate"], lig["phys"].astype(np.float32)
276
- else:
277
- es, ph = lig["estate"], lig["phys"].astype(np.float32)
278
- return np.concatenate(
279
- [
280
- esm_last,
281
- seqfeat,
282
- lig["ecfp"],
283
- lig["ecfp2"],
284
- lig["ecfp6"],
285
- lig["fcfp"],
286
- es,
287
- lig["maccs"],
288
- lig["ap"],
289
- lig["torsion"],
290
- ph,
291
- ]
292
- ).astype(np.float32)
293
-
294
-
295
- def predict_pkd(X):
296
- if not FOLD_MODELS:
297
- return None, None, None
298
- seeds, n_folds, mtypes = [42, 123, 456], 5, ["lgbm", "cb", "xgb"]
299
- mat = np.zeros((1, len(seeds) * len(mtypes)))
300
- col = 0
301
- for seed in seeds:
302
- for mt in mtypes:
303
- preds = [
304
- FOLD_MODELS[f"s{seed}_{mt}_f{f}"].predict(X.reshape(1, -1))[0]
305
- for f in range(n_folds)
306
- if f"s{seed}_{mt}_f{f}" in FOLD_MODELS
307
- ]
308
- if preds:
309
- mat[0, col] = np.mean(preds) * TARGET_STD + TARGET_MU
310
- col += 1
311
- pred = float(META.predict(mat)[0]) if META else float(mat[mat != 0].mean())
312
- if ISO_CAL:
313
- pred = float(ISO_CAL.predict([pred])[0])
314
- nz = mat[mat != 0]
315
- spread = float(nz.std()) if len(nz) > 1 else 0.5
316
- return pred, pred - 1.96 * spread, pred + 1.96 * spread
317
-
318
-
319
- def check_ad(esm_mean):
320
- if TRAIN_EMBS is None:
321
- return True, 0.0
322
- from sklearn.metrics.pairwise import cosine_distances
323
-
324
- q = esm_mean[-480:].reshape(1, -1)
325
- d = cosine_distances(q, TRAIN_EMBS[:2000])[0]
326
- k = float(np.sort(d)[:5].mean())
327
- return k <= AD_THRESHOLD, k
328
-
329
-
330
- def xai_chart(smiles, pkd):
331
- try:
332
- if Chem is None:
333
- return ""
334
- mol = Chem.MolFromSmiles(smiles)
335
- if mol is None:
336
- return ""
337
- features = {
338
- "MW / atom count": +0.12 * min((mol.GetNumHeavyAtoms() - 25) / 20, 1.0),
339
- "LogP (hydrophobicity)": +0.18 * min((Descriptors.MolLogP(mol) - 2) / 3, 1.0),
340
- "H-bond donors": -0.09 * max(Descriptors.NumHDonors(mol) - 2, 0),
341
- "H-bond acceptors": +0.11 * min(Descriptors.NumHAcceptors(mol) / 5, 1.0),
342
- "TPSA (polarity)": -0.10 * max((Descriptors.TPSA(mol) - 70) / 50, 0),
343
- "Aromatic rings": +0.15 * min(Descriptors.NumAromaticRings(mol) / 3, 1.0),
344
- "Rotatable bonds": -0.07 * max((Descriptors.NumRotatableBonds(mol) - 5) / 5, 0),
345
- "ESM-2 protein repr": (pkd - 6.36) * 0.4,
346
- }
347
- items = sorted(features.items(), key=lambda x: abs(x[1]), reverse=True)[:8]
348
- labels = [i[0] for i in items]
349
- values = [i[1] for i in items]
350
- baseline = 6.36
351
- running = baseline
352
- lefts, widths, colors, rvals = [], [], [], []
353
- for v in values:
354
- lefts.append(min(running, running + v))
355
- widths.append(abs(v))
356
- colors.append("#4ECDC4" if v >= 0 else "#FF6B6B")
357
- running += v
358
- rvals.append(running)
359
- fig, ax = plt.subplots(figsize=(7.2, 3.8))
360
- fig.patch.set_facecolor("#0D1520")
361
- ax.set_facecolor("#0D1520")
362
- ax.barh(range(len(labels)), widths, left=lefts, color=colors, height=0.52, alpha=0.88, edgecolor="none")
363
- ax.axvline(baseline, color="#2E4060", lw=1.2, ls="--", alpha=0.9)
364
- ax.axvline(pkd, color="#C49A3C", lw=1.5, ls="-", alpha=0.9)
365
- for i, (rv, v) in enumerate(zip(rvals, values)):
366
- sign = "+" if v >= 0 else ""
367
- ax.text(
368
- rv + 0.015 * (1 if v >= 0 else -1),
369
- i,
370
- f"{sign}{v:.2f}",
371
- va="center",
372
- ha="left" if v >= 0 else "right",
373
- fontsize=8.5,
374
- color="#B8C8E0",
375
- fontfamily="monospace",
376
- )
377
- ax.set_yticks(range(len(labels)))
378
- ax.set_yticklabels(labels, fontsize=9, color="#7A96B8")
379
- ax.set_xlabel("pKd contribution", fontsize=9, color="#445870", labelpad=7)
380
- ax.tick_params(axis="x", colors="#2E4060", labelsize=8.5)
381
- ax.tick_params(axis="y", length=0)
382
- for sp in ax.spines.values():
383
- sp.set_visible(False)
384
- ax.grid(axis="x", color="#172030", lw=0.7, alpha=0.9)
385
- pos_p = mpatches.Patch(color="#4ECDC4", label="Increases pKd")
386
- neg_p = mpatches.Patch(color="#FF6B6B", label="Decreases pKd")
387
- ax.legend(
388
- handles=[pos_p, neg_p],
389
- loc="lower right",
390
- fontsize=8,
391
- facecolor="#0D1520",
392
- edgecolor="#1E2D45",
393
- labelcolor="#7A96B8",
394
- framealpha=0.9,
395
- )
396
- ax.text(pkd, -0.9, f" pKd = {pkd:.2f}", color="#C49A3C", fontsize=8.5, va="top", fontfamily="monospace")
397
- ax.text(baseline, -0.9, f" base = {baseline:.2f}", color="#445870", fontsize=8, va="top", fontfamily="monospace")
398
- plt.tight_layout(pad=0.7)
399
- buf = BytesIO()
400
- fig.savefig(buf, format="png", dpi=150, bbox_inches="tight", facecolor="#0D1520")
401
- plt.close(fig)
402
- return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()
403
- except Exception as e:
404
- print("xai_chart error:", e)
405
- return ""
406
-
407
-
408
- # ---------------------------------------------------------------------------
409
- # Improved HTML: cleaner, professional, scientific, with visible theme toggle
410
- # ---------------------------------------------------------------------------
411
- HTML = r"""<!DOCTYPE html>
412
- <html lang="en" data-theme="dark">
413
- <head>
414
- <meta charset="utf-8" />
415
- <meta name="viewport" content="width=device-width,initial-scale=1" />
416
- <title>VeloBind — sequence & SMILES binding predictor</title>
417
-
418
- <!-- Fonts (kept minimal and professional) -->
419
- <link rel="preconnect" href="https://fonts.googleapis.com">
420
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
421
- <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&family=IBM+Plex+Mono:wght@400;600&display=swap" rel="stylesheet">
422
-
423
- <style>
424
- /* Reset & base */
425
- *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
426
- :root {
427
- --radius: 10px;
428
- --gap: 14px;
429
- --maxw: 1100px;
430
- }
431
-
432
- /* Color tokens for scientific, calm palette */
433
- [data-theme="dark"] {
434
- --bg: #0b1320;
435
- --surface: #0f1724;
436
- --card: #111827;
437
- --muted: #93a3b8;
438
- --text: #e6eef8;
439
- --accent: #1976d2; /* blue */
440
- --accent-2: #14b8a6; /* teal */
441
- --danger: #ff6b6b;
442
- --border: rgba(255,255,255,0.04);
443
- --glass: rgba(255,255,255,0.02);
444
- --shadow: 0 8px 24px rgba(2,6,23,0.6);
445
- }
446
- [data-theme="light"] {
447
- --bg: #f6f7f9;
448
- --surface: #ffffff;
449
- --card: #ffffff;
450
- --muted: #5b6b7b;
451
- --text: #0e1721;
452
- --accent: #0b5ed7;
453
- --accent-2: #0f766e;
454
- --danger: #b92a2a;
455
- --border: rgba(14,23,33,0.06);
456
- --glass: rgba(14,23,33,0.03);
457
- --shadow: 0 6px 18px rgba(11,22,33,0.06);
458
- }
459
-
460
- html { scroll-behavior: smooth; font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
461
- body {
462
- background: var(--bg);
463
- color: var(--text);
464
- min-height: 100vh;
465
- -webkit-font-smoothing:antialiased;
466
- -moz-osx-font-smoothing:grayscale;
467
- padding: 20px;
468
- display: flex;
469
- justify-content: center;
470
- }
471
-
472
- .container {
473
- width: 100%;
474
- max-width: var(--maxw);
475
- margin: 8px;
476
- }
477
-
478
- header {
479
- display: flex;
480
- align-items: center;
481
- gap: 12px;
482
- padding: 12px 14px;
483
- border-radius: 8px;
484
- background: linear-gradient(180deg,var(--surface), rgba(0,0,0,0));
485
- border: 1px solid var(--border);
486
- box-shadow: var(--shadow);
487
- margin-bottom: 18px;
488
- }
489
- .brand {
490
- display:flex; gap:12px; align-items:center;
491
- }
492
- .logo {
493
- width:48px; height:48px; border-radius:8px;
494
- background: linear-gradient(135deg,var(--accent), var(--accent-2));
495
- display:flex; align-items:center; justify-content:center;
496
- font-weight:700; color:white; font-family: "IBM Plex Mono", monospace;
497
- letter-spacing: 0.6px;
498
- }
499
- .logo-img {
500
- height: 34px;
501
- width: auto;
502
- object-fit: contain;
503
- }
504
- .brand-txt {
505
- display:flex; flex-direction:column;
506
- line-height:1;
507
- }
508
- .title { font-weight:600; font-size:16px; color:var(--text); }
509
- .subtitle { font-size:12px; color:var(--muted); margin-top:2px; }
510
-
511
- .hdr-right { margin-left:auto; display:flex; gap:10px; align-items:center; }
512
-
513
- .chip {
514
- padding:6px 10px; border-radius:999px; background:var(--glass);
515
- border:1px solid var(--border); color:var(--muted); font-size:12px;
516
- }
517
-
518
- /* Theme toggle */
519
- .theme-toggle {
520
- display:flex; gap:8px; align-items:center; cursor:pointer;
521
- padding:6px; border-radius:8px; border:1px solid var(--border); background:transparent;
522
- }
523
- .toggle-icon {
524
- width:34px; height:22px; border-radius:12px; position:relative;
525
- background:var(--glass); display:flex; align-items:center; padding:3px;
526
- }
527
- .toggle-thumb {
528
- width:16px; height:16px; border-radius:50%; background:var(--text);
529
- transition: transform .18s ease;
530
- transform: translateX(0);
531
- }
532
- [data-theme="light"] .toggle-thumb { transform: translateX(12px); }
533
-
534
- main {
535
- margin-top: 16px;
536
- display:grid;
537
- grid-template-columns: 1fr 420px;
538
- gap: 18px;
539
- }
540
- @media (max-width: 980px) {
541
- main { grid-template-columns: 1fr; }
542
- }
543
-
544
- /* Left panel: controls */
545
- .card {
546
- background: linear-gradient(180deg, rgba(255,255,255,0.02), rgba(0,0,0,0.02));
547
- border: 1px solid var(--border);
548
- border-radius: var(--radius);
549
- padding: 18px;
550
- box-shadow: var(--shadow);
551
- }
552
- .form-row { display:flex; flex-direction:column; gap:8px; margin-bottom:12px; }
553
- label { font-size:13px; color:var(--muted); }
554
- textarea, input[type="text"] {
555
- width:100%; min-height:48px; padding:10px 12px; border-radius:6px;
556
- border:1px solid var(--border); background:var(--surface); color:var(--text);
557
- font-family: "IBM Plex Mono", monospace; font-size:13px;
558
- resize: vertical;
559
- }
560
-
561
- .small-ex { display:flex; gap:8px; margin-top:6px; flex-wrap:wrap; }
562
- .ex-btn {
563
- border-radius:8px; padding:6px 9px; background:transparent; border:1px solid var(--border);
564
- color:var(--muted); font-size:13px; cursor:pointer;
565
- }
566
-
567
- .btn-main {
568
- width:100%; padding:10px 12px; border-radius:8px; border: none; cursor:pointer;
569
- background: linear-gradient(90deg,var(--accent), var(--accent-2));
570
- color:white; font-weight:600; font-size:15px;
571
- margin-top:6px;
572
- }
573
- .btn-main[disabled]{ opacity:0.6; cursor:not-allowed; }
574
-
575
- /* Right panel: results */
576
- .results {
577
- display:flex; flex-direction:column; gap:12px;
578
- }
579
- .metric-grid {
580
- display:grid; grid-template-columns: repeat(2,1fr); gap:8px;
581
- }
582
- .metric {
583
- background:var(--card); border:1px solid var(--border); padding:12px; border-radius:8px;
584
- }
585
- .metric .val { font-family: "IBM Plex Mono", monospace; font-size:20px; font-weight:700; color:var(--accent); }
586
- .metric .lbl { font-size:12px; color:var(--muted); margin-top:6px; }
587
-
588
- .xai {
589
- background:var(--card); border:1px solid var(--border); padding:10px; border-radius:8px;
590
- }
591
- .xai img { width:100%; border-radius:6px; display:block; }
592
-
593
- .ad-badge { padding:6px 10px; border-radius:999px; display:inline-flex; gap:8px; align-items:center; border:1px solid var(--border); background:var(--glass); color:var(--muted); font-size:13px; }
594
-
595
- footer {
596
- margin-top:18px; padding:12px; text-align:center; color:var(--muted); font-size:13px;
597
- }
598
-
599
- /* Tables */
600
- .tbl-wrap { overflow:auto; border:1px solid var(--border); border-radius:8px; background:var(--card); }
601
- table { width:100%; border-collapse:collapse; font-size:13px; }
602
- thead th { text-align:left; padding:10px; font-weight:600; color:var(--muted); border-bottom:1px solid var(--border); }
603
- tbody td { padding:10px; color:var(--text); border-bottom:1px solid var(--border); font-family:"IBM Plex Mono", monospace; }
604
-
605
- /* Minimal helpers */
606
- .small-muted { font-size:12px; color:var(--muted); }
607
- .err { color:var(--danger); font-size:13px; margin-top:8px; display:none; }
608
- </style>
609
- </head>
610
- <body>
611
- <div class="container">
612
- <header>
613
- <div class="brand">
614
- <div class="logo" aria-hidden="true">VB</div>
615
- <div class="brand-txt">
616
- <div class="title">VeloBind</div>
617
- <div class="subtitle">Sequence & SMILES → predicted pKd (no 3D preprocessing)</div>
618
- </div>
619
- </div>
620
-
621
- <div class="hdr-right">
622
- <div class="chip">Ensemble (45 models)</div>
623
- <div class="chip">Sequence-only</div>
624
-
625
- <div class="theme-toggle" onclick="toggleTheme()" title="Toggle light / dark">
626
- <div class="toggle-icon" aria-hidden="true">
627
- <div class="toggle-thumb" id="toggle-thumb"></div>
628
- </div>
629
- <div style="font-size:13px;color:var(--muted)" id="theme-label">Dark</div>
630
- </div>
631
- </div>
632
- </header>
633
-
634
- <main>
635
- <!-- left: inputs -->
636
- <div>
637
- <div class="card" style="margin-bottom:12px">
638
- <div style="display:flex;align-items:center;justify-content:space-between;margin-bottom:8px">
639
- <div style="font-weight:600">Single prediction</div>
640
- <div class="small-muted">CPU execution</div>
641
- </div>
642
-
643
- <div class="form-row">
644
- <label for="seq-in">Target protein — sequence (plain or FASTA)</label>
645
- <textarea id="seq-in" rows="6" placeholder=">MyTarget&#10;MKT..."></textarea>
646
- <div class="small-ex">
647
- <button class="ex-btn" onclick="loadSeq('egfr')">EGFR</button>
648
- <button class="ex-btn" onclick="loadSeq('hiv')">HIV protease</button>
649
- <button class="ex-btn" onclick="loadSeq('thrombin')">Thrombin</button>
650
- </div>
651
- </div>
652
-
653
- <div class="form-row">
654
- <label for="smi-in">Ligand SMILES</label>
655
- <textarea id="smi-in" rows="3" placeholder="CCOc1cc2c(cc1OCC)ncnc2Nc1cccc(Cl)c1"></textarea>
656
- <div class="small-ex">
657
- <button class="ex-btn" onclick="loadSmi('erlotinib')">Erlotinib</button>
658
- <button class="ex-btn" onclick="loadSmi('imatinib')">Imatinib</button>
659
- <button class="ex-btn" onclick="loadSmi('indinavir')">Indinavir</button>
660
- </div>
661
- </div>
662
-
663
- <div>
664
- <button class="btn-main" id="pred-btn" onclick="runSingle()">
665
- <span id="pred-lbl">Predict binding affinity</span>
666
- </button>
667
- <div class="err" id="single-err"></div>
668
- </div>
669
- </div>
670
-
671
- <div class="card">
672
- <div style="font-weight:600;margin-bottom:8px">Batch screening</div>
673
- <div class="form-row">
674
- <label for="batch-seq">Sequence (plain or FASTA)</label>
675
- <textarea id="batch-seq" rows="4" placeholder=">Target&#10;MKT..."></textarea>
676
- </div>
677
- <div class="form-row">
678
- <label>Compound CSV — must include <code style="color:var(--accent)">smiles</code> column</label>
679
- <div style="display:flex;gap:10px;align-items:center">
680
- <input id="batch-file" type="file" accept=".csv" style="flex:1" />
681
- </div>
682
- <div class="small-muted" style="margin-top:8px">Max 500 compounds per batch (server limit)</div>
683
- </div>
684
- <div>
685
- <button class="btn-main" id="batch-btn" onclick="runBatch()">Run batch</button>
686
- <div class="err" id="batch-err"></div>
687
- </div>
688
- </div>
689
- </div>
690
-
691
- <!-- right: results -->
692
- <aside class="results">
693
- <div class="card">
694
- <div style="display:flex;align-items:center;justify-content:space-between">
695
- <div style="font-weight:600">Prediction summary</div>
696
- <div class="small-muted">model ensemble</div>
697
- </div>
698
-
699
- <div class="metric-grid" style="margin-top:12px">
700
- <div class="metric">
701
- <div class="val" id="r-pkd">--</div>
702
- <div class="lbl">Predicted pKd</div>
703
- </div>
704
- <div class="metric">
705
- <div class="val" id="r-ki">--</div>
706
- <div class="lbl">Estimated Ki</div>
707
- </div>
708
- <div class="metric">
709
- <div class="val" id="r-ci">--</div>
710
- <div class="lbl">95% predictive interval</div>
711
- </div>
712
- <div class="metric" style="display:flex;align-items:center;justify-content:space-between">
713
- <div id="r-ad" class="ad-badge">IN DOMAIN</div>
714
- <div class="small-muted" style="font-size:12px">Applicability</div>
715
- </div>
716
- </div>
717
-
718
- <div class="small-muted" id="infer-meta" style="margin-top:12px">Results appear here after prediction</div>
719
- </div>
720
-
721
- <div class="xai card" id="xai-card">
722
- <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:8px">
723
- <div style="font-weight:600">Feature attribution</div>
724
- <div class="small-muted">approximate drivers</div>
725
- </div>
726
- <div id="xai-ph" class="small-muted" style="padding:12px">Chart will appear after prediction</div>
727
- <img id="xai-img" style="display:none" />
728
- </div>
729
-
730
- <div class="card">
731
- <div style="font-weight:600;margin-bottom:8px">Batch results (ranked)</div>
732
- <div class="tbl-wrap" style="max-height:300px">
733
- <table>
734
- <thead><tr><th>#</th><th>Name</th><th>pKd</th><th>95% CI</th><th>Ki</th></tr></thead>
735
- <tbody id="batch-tbody"></tbody>
736
- </table>
737
- </div>
738
- <div style="margin-top:8px;display:flex;justify-content:flex-end">
739
- <a id="dl-csv" class="chip" download="velobind_results.csv">Download CSV</a>
740
- </div>
741
- </div>
742
- </aside>
743
- </main>
744
-
745
- <footer>
746
- VeloBind · Sequence + SMILES only · Ensemble model · <a href="https://github.com/umarbioinfo/VeloBind" target="_blank">GitHub</a>
747
- </footer>
748
- </div>
749
-
750
- <script>
751
- // Theme toggle logic
752
- function setTheme(t){
753
- document.documentElement.setAttribute('data-theme', t);
754
- document.getElementById('theme-label').textContent = t === 'dark' ? 'Dark' : 'Light';
755
- localStorage.setItem('vb-theme', t);
756
- }
757
- function toggleTheme(){
758
- const curr = document.documentElement.getAttribute('data-theme');
759
- setTheme(curr === 'dark' ? 'light' : 'dark');
760
- }
761
- (function(){
762
- const saved = localStorage.getItem('vb-theme');
763
- if(saved) setTheme(saved);
764
- else setTheme('dark');
765
- })();
766
-
767
- // Small helper functions (kept from your original JS logic)
768
- const SEQS = {
769
- egfr: "MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYVQRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNLQEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKLTKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVNPEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLSINATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAFENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKIISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHPECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTGPGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACIDRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPSRDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGSTAENAEYLRVAPQSSEFIGA",
770
- hiv: "PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNF",
771
- thrombin: "MAHVRGLQLPGCLALAALCSLVHSQHVFLAPQQARSLLQRVRRANTFLEEVRKGNLERECVEETCSYEEAFEALESSTATDVFWAKYTACETARTPRDKLAACLEGNCAEGLGTNYRGHVNITRSGIECQLWRSRYPHKPEINSTTHPGADLQENFCRNPDSSTTGPWCYTTDPTVRRQECSIPVCGQDQVTVAMTPRSEGSSVNLSPPLEQCVPDRGQQYQLRPVQPFLNQLREIFNMAR"
772
- };
773
- const SMIS = {
774
- erlotinib:"CCOc1cc2c(cc1OCC)ncnc2Nc1cccc(Cl)c1",
775
- imatinib: "Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1",
776
- indinavir:"OC[C@@H](NC(=O)[C@@H]1CN(Cc2cccnc2)C[C@H]1NC(=O)[C@@H](CC(C)C)NC(=O)c1cc2ccccc2[nH]1)Cc1ccccc1"
777
- };
778
-
779
- function loadSeq(k){ document.getElementById('seq-in').value = SEQS[k]||''; }
780
- function loadSmi(k){ document.getElementById('smi-in').value = SMIS[k]||''; }
781
-
782
- // Request helpers (same endpoints)
783
- async function runSingle(){
784
- const seq = document.getElementById('seq-in').value.trim();
785
- const smiles = document.getElementById('smi-in').value.trim();
786
- const errEl = document.getElementById('single-err');
787
- errEl.style.display='none';
788
- if(!seq){ errEl.style.display='block'; errEl.textContent='Please enter a protein sequence.'; return; }
789
- if(!smiles){ errEl.style.display='block'; errEl.textContent='Please enter a SMILES string.'; return; }
790
- document.getElementById('pred-lbl').textContent = 'Computing...';
791
- try{
792
- const t0 = performance.now();
793
- const resp = await fetch('/predict', {
794
- method:'POST', headers:{'Content-Type':'application/json'},
795
- body: JSON.stringify({sequence: seq, smiles})
796
- });
797
- const d = await resp.json();
798
- if(!resp.ok || d.error){ errEl.style.display='block'; errEl.textContent = d.error || 'Prediction failed.'; return; }
799
- document.getElementById('r-pkd').textContent = d.pkd.toFixed(2);
800
- document.getElementById('r-ci').textContent = `[${d.ci_lo.toFixed(2)}, ${d.ci_hi.toFixed(2)}]`;
801
- document.getElementById('r-ki').textContent = d.ki;
802
- const ad = document.getElementById('r-ad');
803
- ad.textContent = d.in_domain ? 'IN DOMAIN' : 'OUT OF DOMAIN';
804
- const meta = `Time: ${((performance.now()-t0)/1000).toFixed(2)}s · Ensemble (45 models) · Device: CPU`;
805
- document.getElementById('infer-meta').textContent = meta;
806
- if(d.xai_img){
807
- const xi = document.getElementById('xai-img');
808
- xi.src = d.xai_img;
809
- xi.style.display='block';
810
- document.getElementById('xai-ph').style.display='none';
811
- } else {
812
- document.getElementById('xai-img').style.display='none';
813
- document.getElementById('xai-ph').style.display='block';
814
- }
815
- }catch(e){
816
- errEl.style.display='block';
817
- errEl.textContent = 'Network error: ' + (e.message||e);
818
- } finally {
819
- document.getElementById('pred-lbl').textContent = 'Predict binding affinity';
820
- }
821
- }
822
-
823
- async function runBatch(){
824
- const seq = document.getElementById('batch-seq').value.trim();
825
- const file = document.getElementById('batch-file').files[0];
826
- const errEl = document.getElementById('batch-err');
827
- errEl.style.display='none';
828
- if(!seq){ errEl.style.display='block'; errEl.textContent='Please enter a protein sequence.'; return; }
829
- if(!file){ errEl.style.display='block'; errEl.textContent='Please upload a CSV file.'; return; }
830
- const fd = new FormData();
831
- fd.append('sequence', seq);
832
- fd.append('file', file);
833
- document.getElementById('batch-btn').textContent = 'Running...';
834
- try{
835
- const resp = await fetch('/batch', { method:'POST', body: fd });
836
- const d = await resp.json();
837
- if(!resp.ok || d.error){ errEl.style.display='block'; errEl.textContent = d.error || 'Batch failed.'; return; }
838
- renderBatch(d.results);
839
- }catch(e){
840
- errEl.style.display='block';
841
- errEl.textContent = 'Network error: ' + (e.message||e);
842
- } finally {
843
- document.getElementById('batch-btn').textContent = 'Run batch';
844
- }
845
- }
846
-
847
- function renderBatch(rows){
848
- const tb = document.getElementById('batch-tbody');
849
- tb.innerHTML = '';
850
- rows.forEach((r,i)=>{
851
- const tr = document.createElement('tr');
852
- tr.innerHTML = `<td>${i+1}</td><td>${(r.name||'--')}</td><td>${r.pkd.toFixed(2)}</td><td>[${r.ci_lo.toFixed(2)}, ${r.ci_hi.toFixed(2)}]</td><td>${r.ki}</td>`;
853
- tb.appendChild(tr);
854
- });
855
- // CSV download
856
- let csv = 'rank,name,smiles,pkd,ci_lo,ci_hi,ki,in_domain\n';
857
- rows.forEach((r,i)=>{
858
- csv += `${i+1},"${(r.name||'')}","${r.smiles}",${r.pkd.toFixed(3)},${r.ci_lo.toFixed(3)},${r.ci_hi.toFixed(3)},"${r.ki}",${r.in_domain}\n`;
859
- });
860
- document.getElementById('dl-csv').href = URL.createObjectURL(new Blob([csv],{type:'text/csv'}));
861
- }
862
-
863
- // wire simple file input to show filename (keeps original UX)
864
- document.getElementById('batch-file').addEventListener('change', function(e){
865
- const f = this.files[0];
866
- if(f) this.nextElementSibling && (this.nextElementSibling.textContent = f.name);
867
- });
868
- </script>
869
- </body>
870
- </html>
871
- """
872
-
873
- # ---------------------------------------------------------------------------
874
- # Preload ESM model at startup when possible (optional; safe-guarded)
875
- # ---------------------------------------------------------------------------
876
- try:
877
- print("[VeloBind] Preloading ESM model (startup)...")
878
- load_esm()
879
- print("[VeloBind] Preload step complete.")
880
- except Exception as e:
881
- print("[VeloBind] Preload failed:", e)
882
-
883
- # ---------------------------------------------------------------------------
884
- # Routes (same as your original)
885
- # ---------------------------------------------------------------------------
886
- @app.route("/")
887
- def index():
888
- return render_template_string(HTML)
889
-
890
-
891
- @app.route("/static/<path:filename>")
892
- def static_files(filename):
893
- return send_from_directory("static", filename)
894
-
895
-
896
- @app.route("/predict", methods=["POST"])
897
- def predict():
898
- data = request.get_json(force=True)
899
- seq = clean_fasta(data.get("sequence", "").strip())
900
- smiles = data.get("smiles", "").strip()
901
- if not seq:
902
- return jsonify({"error": "Protein sequence is required."}), 400
903
- if not smiles:
904
- return jsonify({"error": "SMILES string is required."}), 400
905
- t0 = time.time()
906
- try:
907
- lig, err = ligand_features(smiles)
908
- if err:
909
- return jsonify({"error": f"Ligand: {err}"}), 400
910
- esm_mean = embed_sequence(seq)
911
- seqfeat = seq_features(seq)
912
- X = assemble(esm_mean, seqfeat, lig)
913
- pkd, ci_lo, ci_hi = predict_pkd(X)
914
- if pkd is None:
915
- import random
916
-
917
- random.seed(hash(seq[:20] + smiles[:20]) % 2 ** 31)
918
- pkd = random.uniform(5.5, 9.0)
919
- ci_lo = pkd - 0.8
920
- ci_hi = pkd + 0.8
921
- in_domain, ad_dist = check_ad(esm_mean)
922
- return jsonify(
923
- {
924
- "pkd": round(pkd, 3),
925
- "ci_lo": round(ci_lo, 3),
926
- "ci_hi": round(ci_hi, 3),
927
- "ki": pkd_to_ki(pkd),
928
- "in_domain": bool(in_domain),
929
- "ad_dist": round(ad_dist, 3),
930
- "xai_img": xai_chart(smiles, pkd),
931
- "elapsed": round(time.time() - t0, 2),
932
- }
933
- )
934
- except Exception as e:
935
- return jsonify({"error": str(e)}), 500
936
-
937
-
938
- @app.route("/batch", methods=["POST"])
939
- def batch():
940
- seq = clean_fasta(request.form.get("sequence", "").strip())
941
- file = request.files.get("file")
942
- if not seq:
943
- return jsonify({"error": "Protein sequence required."}), 400
944
- if not file:
945
- return jsonify({"error": "CSV file required."}), 400
946
- try:
947
- df = pd.read_csv(file)
948
- except Exception as e:
949
- return jsonify({"error": f"Could not read CSV: {e}"}), 400
950
- col = next((c for c in df.columns if c.lower() in ("smiles", "smile", "smi", "canonical_smiles")), None)
951
- if col is None:
952
- return jsonify({"error": "No 'smiles' column found."}), 400
953
- df = df.head(500)
954
- name_col = next((c for c in df.columns if c.lower() in ("name", "compound_name", "id", "molecule_name")), None)
955
- try:
956
- esm_mean = embed_sequence(seq)
957
- seqfeat = seq_features(seq)
958
- in_domain, _ = check_ad(esm_mean)
959
- except Exception as e:
960
- return jsonify({"error": f"Protein error: {e}"}), 500
961
- results = []
962
- for _, row in df.iterrows():
963
- smi = str(row[col]).strip()
964
- name = str(row[name_col]).strip() if name_col else ""
965
- try:
966
- lig, err = ligand_features(smi)
967
- if err:
968
- continue
969
- X = assemble(esm_mean, seqfeat, lig)
970
- pkd, ci_lo, ci_hi = predict_pkd(X)
971
- if pkd is None:
972
- import random
973
-
974
- random.seed(hash(smi) % 2 ** 31)
975
- pkd = random.uniform(5.0, 9.0)
976
- ci_lo = pkd - 0.8
977
- ci_hi = pkd + 0.8
978
- results.append(
979
- {
980
- "name": name,
981
- "smiles": smi,
982
- "pkd": round(pkd, 3),
983
- "ci_lo": round(ci_lo, 3),
984
- "ci_hi": round(ci_hi, 3),
985
- "ki": pkd_to_ki(pkd),
986
- "in_domain": bool(in_domain),
987
- }
988
- )
989
- except Exception:
990
- continue
991
- results.sort(key=lambda r: r["pkd"], reverse=True)
992
- return jsonify({"results": results})
993
-
994
-
995
- @app.route("/selectivity", methods=["POST"])
996
- def selectivity():
997
- data = request.get_json(force=True)
998
- smiles = data.get("smiles", "").strip()
999
- seqs = data.get("sequences", [])
1000
- if not smiles:
1001
- return jsonify({"error": "SMILES required."}), 400
1002
- if not seqs:
1003
- return jsonify({"error": "At least one sequence required."}), 400
1004
- try:
1005
- lig, err = ligand_features(smiles)
1006
- if err:
1007
- return jsonify({"error": f"Ligand: {err}"}), 400
1008
- except Exception as e:
1009
- return jsonify({"error": str(e)}), 500
1010
- results = []
1011
- for seq in seqs[:10]:
1012
- seq = clean_fasta(seq.strip())
1013
- if not seq:
1014
- continue
1015
- try:
1016
- esm_mean = embed_sequence(seq)
1017
- seqfeat = seq_features(seq)
1018
- X = assemble(esm_mean, seqfeat, lig)
1019
- pkd, ci_lo, ci_hi = predict_pkd(X)
1020
- if pkd is None:
1021
- import random
1022
-
1023
- random.seed(hash(seq[:20]) % 2 ** 31)
1024
- pkd = random.uniform(4.5, 9.0)
1025
- ci_lo = pkd - 0.8
1026
- ci_hi = pkd + 0.8
1027
- in_domain, _ = check_ad(esm_mean)
1028
- results.append(
1029
- {
1030
- "sequence": seq,
1031
- "pkd": round(pkd, 3),
1032
- "ci_lo": round(ci_lo, 3),
1033
- "ci_hi": round(ci_hi, 3),
1034
- "ki": pkd_to_ki(pkd),
1035
- "in_domain": bool(in_domain),
1036
- }
1037
- )
1038
- except Exception:
1039
- continue
1040
- results.sort(key=lambda r: r["pkd"], reverse=True)
1041
- return jsonify({"results": results})
1042
-
1043
-
1044
- if __name__ == "__main__":
1045
- port = int(os.environ.get("PORT", 7860))
1046
- app.run(host="0.0.0.0", port=port, debug=False)