ym59 commited on
Commit
5797e4d
·
verified ·
1 Parent(s): 26d67e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +551 -257
app.py CHANGED
@@ -1,10 +1,13 @@
1
  import warnings
2
  warnings.filterwarnings("ignore")
3
 
4
- import os, time
 
 
5
  from pathlib import Path
6
  from io import BytesIO
7
- import base64
 
8
  import numpy as np
9
  import pandas as pd
10
  import torch
@@ -15,12 +18,18 @@ import matplotlib.patches as mpatches
15
 
16
  import streamlit as st
17
 
 
18
  try:
19
  from rdkit import RDLogger
20
  RDLogger.DisableLog("rdApp.*")
21
- except:
22
  pass
23
 
 
 
 
 
 
24
  # ─── Page config ────────────────────────────────────────────────
25
  st.set_page_config(
26
  page_title="VeloBind",
@@ -222,228 +231,474 @@ hr { border: none !important; border-top: 1px solid var(--border) !important; ma
222
  </style>
223
  """, unsafe_allow_html=True)
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  # ─── Model loading ───────────────────────────────────────────────
226
  @st.cache_resource(show_spinner=False)
227
- def load_models():
 
 
 
 
 
228
  try:
229
  import joblib
230
- fold_models = {}
231
  meta = iso_cal = lig_scaler = None
232
  train_embs = None
233
  ad_threshold = 1.4
234
  target_mu, target_std = 6.361, 1.855
235
- model_dir = Path("output/models")
236
- if not model_dir.exists() or not any(model_dir.glob("*.pkl")):
237
- from huggingface_hub import snapshot_download
238
- snapshot_download(repo_id="ym59/velobind-models", repo_type="dataset", local_dir=".")
239
- MODEL_DIR = Path("output/models")
240
- PREP_DIR = Path("output/preprocessors")
241
- seeds, n_folds, mtypes = [42,123,456], 5, ["lgbm","cb","xgb"]
 
 
 
 
242
  if MODEL_DIR.exists():
 
 
 
243
  for seed in seeds:
244
  for mt in mtypes:
245
  for fold in range(n_folds):
246
- k = f"s{seed}_{mt}_f{fold}"
247
- p = MODEL_DIR / f"fold_model_{k}.pkl"
248
- if p.exists(): fold_models[k] = joblib.load(p)
249
- for fname, attr in [("meta_all_casf16.pkl","meta"),("isotonic_calibrator.pkl","iso")]:
 
 
 
 
 
 
250
  p = MODEL_DIR / fname
251
  if p.exists():
252
- obj = joblib.load(p)
253
- if attr=="meta": meta=obj
254
- else: iso_cal=obj
 
 
 
 
 
 
255
  ts = MODEL_DIR / "target_scaler.pkl"
256
  if ts.exists():
257
- t = joblib.load(ts); target_mu=t.mu; target_std=t.std
 
 
 
 
 
 
 
 
 
 
 
 
258
  if PREP_DIR.exists():
259
- ls = PREP_DIR/"ligand_scaler.pkl"
260
- if ls.exists(): lig_scaler=joblib.load(ls)
261
- ad = Path("output/ad_train_embeddings.npy")
262
- if ad.exists():
263
- train_embs=np.load(str(ad))
264
- at=Path("output/ad_threshold.npy")
265
- if at.exists(): ad_threshold=float(np.load(str(at)))
266
- return fold_models,meta,iso_cal,lig_scaler,train_embs,ad_threshold,target_mu,target_std
 
 
 
 
 
 
 
 
 
 
 
267
  except Exception as e:
268
- return {},None,None,None,None,1.4,6.361,1.855
 
 
269
 
270
  @st.cache_resource(show_spinner=False)
271
  def load_esm():
 
 
 
272
  from transformers import AutoTokenizer, EsmModel
273
- tok=AutoTokenizer.from_pretrained("facebook/esm2_t12_35M_UR50D")
274
- model=EsmModel.from_pretrained("facebook/esm2_t12_35M_UR50D")
275
- model.eval(); return tok, model
 
 
276
 
277
  @st.cache_data(show_spinner=False)
278
- def embed_sequence(seq: str):
 
 
 
 
 
279
  tok, model = load_esm()
280
  MAX, HALF = 1022, 511
281
- def _chunk(s):
282
- enc=tok(s,return_tensors="pt",truncation=False)
 
283
  with torch.no_grad():
284
- out=model(**enc,output_hidden_states=True)
285
- hs=out.hidden_states; mask=enc["attention_mask"].unsqueeze(-1).float()
286
- mvecs=[]
287
- for li in [8,10,11]:
288
- h=hs[li]; mv=(h*mask).sum(1)/mask.sum(1).clamp(min=1e-9)
289
- mvecs.append(mv.squeeze(0).numpy())
 
 
290
  return np.concatenate(mvecs)
291
- if len(seq)<=MAX: return _chunk(seq)
292
- return (_chunk(seq[:HALF])+_chunk(seq[-HALF:]))/2
293
 
294
- def seq_features(seq):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  try:
296
  from Bio.SeqUtils.ProtParam import ProteinAnalysis
297
- pa=ProteinAnalysis(seq.upper())
298
- pp=[pa.molecular_weight(),pa.aromaticity(),pa.instability_index(),
299
- pa.isoelectric_point(),pa.gravy(),*pa.secondary_structure_fraction(),
300
- *list(pa.amino_acids_percent.values())]
301
- except: pp=[0.0]*28
302
- AA=list("ACDEFGHIKLMNPQRSTVWY")
303
- dp={a+b:0 for a in AA for b in AA}
304
- for i in range(len(seq)-1):
305
- k=seq[i].upper()+seq[i+1].upper()
306
- if k in dp: dp[k]+=1
307
- tot=max(1,sum(dp.values())); dpc=[v/tot for v in dp.values()]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  try:
309
- from src.features.protein import _ctd,_conjoint_triad,_qso,_aaindex_encoding
310
- extra=list(_ctd(seq))+list(_conjoint_triad(seq))+list(_qso(seq))+list(_aaindex_encoding(seq))
311
- except: extra=[0.0]*(63+343+60+25)
312
- return np.array(pp+dpc+extra,dtype=np.float32)
 
 
313
 
314
- def ligand_features(smiles):
 
 
 
 
 
315
  try:
316
  from rdkit import Chem
317
- from rdkit.Chem import AllChem,MACCSkeys,Descriptors,DataStructs
318
- from rdkit.Chem.rdMolDescriptors import (GetHashedAtomPairFingerprint,
319
- GetHashedTopologicalTorsionFingerprint)
320
- mol=Chem.MolFromSmiles(smiles)
321
- if mol is None: return None,"Invalid SMILES"
322
- def fp(obj,n):
323
- a=np.zeros(n,dtype=np.float32); DataStructs.ConvertToNumpyArray(obj,a); return a
324
- ecfp2=fp(AllChem.GetMorganFingerprintAsBitVect(mol,1,1024),1024)
325
- ecfp4=fp(AllChem.GetMorganFingerprintAsBitVect(mol,2,1024),1024)
326
- ecfp6=fp(AllChem.GetMorganFingerprintAsBitVect(mol,3,1024),1024)
327
- fcfp4=fp(AllChem.GetMorganFingerprintAsBitVect(mol,2,1024,useFeatures=True),1024)
328
- maccs=fp(MACCSkeys.GenMACCSKeys(mol),167)
329
- ap=np.zeros(2048,dtype=np.float32)
330
- DataStructs.ConvertToNumpyArray(GetHashedAtomPairFingerprint(mol,2048),ap)
331
- tors=np.zeros(2048,dtype=np.float32)
332
- DataStructs.ConvertToNumpyArray(GetHashedTopologicalTorsionFingerprint(mol,2048),tors)
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  try:
334
  from rdkit.Chem.EState.Fingerprinter import FingerprintMol
335
- es=np.nan_to_num(np.clip(FingerprintMol(mol)[0].astype(np.float32),-1e6,1e6))[:79]
336
- if len(es)<79: es=np.pad(es,(0,79-len(es)))
337
- except: es=np.zeros(79,dtype=np.float32)
338
- desc_fns=[v for k,v in sorted(Descriptors.descList)][:217]
339
- phys=[]
 
 
 
 
 
 
340
  for fn in desc_fns:
341
  try:
342
- v=float(fn(mol)); phys.append(0.0 if(not np.isfinite(v) or abs(v)>1e10) else v)
343
- except: phys.append(0.0)
344
- return {"ecfp2":ecfp2,"ecfp":ecfp4,"ecfp6":ecfp6,"fcfp":fcfp4,
345
- "maccs":maccs,"ap":ap,"torsion":tors,
346
- "estate":es,"phys":np.array(phys,dtype=np.float64)},None
347
- except Exception as e: return None,str(e)
348
-
349
- def assemble(esm_mean,seqfeat,lig,lig_scaler):
350
- esm_last=esm_mean[-480:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  if lig_scaler is not None:
352
  try:
353
- combined=np.concatenate([lig["estate"],lig["phys"]])
354
- combined=lig_scaler.transform(combined.reshape(1,-1)).ravel()
355
- es=combined[:79].astype(np.float32); ph=combined[79:].astype(np.float32)
356
- except: es,ph=lig["estate"],lig["phys"].astype(np.float32)
357
- else: es,ph=lig["estate"],lig["phys"].astype(np.float32)
358
- return np.concatenate([esm_last,seqfeat,lig["ecfp"],lig["ecfp2"],lig["ecfp6"],
359
- lig["fcfp"],es,lig["maccs"],lig["ap"],lig["torsion"],ph]).astype(np.float32)
360
-
361
- def predict_pkd(X,fold_models,meta,iso_cal,target_mu,target_std):
362
- if not fold_models: return None,None,None
363
- seeds,n_folds,mtypes=[42,123,456],5,["lgbm","cb","xgb"]
364
- mat=np.zeros((1,len(seeds)*len(mtypes))); col=0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  for seed in seeds:
366
  for mt in mtypes:
367
- preds=[fold_models[f"s{seed}_{mt}_f{f}"].predict(X.reshape(1,-1))[0]
368
- for f in range(n_folds) if f"s{seed}_{mt}_f{f}" in fold_models]
369
- if preds: mat[0,col]=np.mean(preds)*target_std+target_mu
370
- col+=1
371
- pred=float(meta.predict(mat)[0]) if meta else float(mat[mat!=0].mean())
372
- if iso_cal: pred=float(iso_cal.predict([pred])[0])
373
- nz=mat[mat!=0]; spread=float(nz.std()) if len(nz)>1 else 0.5
374
- return pred,pred-1.96*spread,pred+1.96*spread
375
-
376
- def check_ad(esm_mean,train_embs,ad_threshold):
377
- if train_embs is None: return True,0.0
378
- from sklearn.metrics.pairwise import cosine_distances
379
- q=esm_mean[-480:].reshape(1,-1); d=cosine_distances(q,train_embs[:2000])[0]
380
- k=float(np.sort(d)[:5].mean()); return k<=ad_threshold,k
381
-
382
- def clean_fasta(s):
383
- s=s.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  if s.startswith(">"):
385
  return "".join(l.strip() for l in s.split("\n") if not l.startswith(">"))
386
- return s.replace(" ","").replace("\n","")
387
 
388
- def pkd_to_ki(pkd):
389
- m=10**(-pkd)
390
- if m<1e-9: return f"{m*1e12:.1f} pM"
391
- if m<1e-6: return f"{m*1e9:.1f} nM"
392
- if m<1e-3: return f"{m*1e6:.1f} uM"
393
- return f"{m*1e3:.1f} mM"
394
 
395
- def xai_chart(smiles,pkd):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  try:
397
- from rdkit import Chem; from rdkit.Chem import Descriptors
398
- mol=Chem.MolFromSmiles(smiles)
399
- if mol is None: return None
400
- features={
401
- "MW / atom count": +0.12*min((mol.GetNumHeavyAtoms()-25)/20,1.0),
402
- "LogP (hydrophobicity)": +0.18*min((Descriptors.MolLogP(mol)-2)/3,1.0),
403
- "H-bond donors": -0.09*max(Descriptors.NumHDonors(mol)-2,0),
404
- "H-bond acceptors": +0.11*min(Descriptors.NumHAcceptors(mol)/5,1.0),
405
- "TPSA (polarity)": -0.10*max((Descriptors.TPSA(mol)-70)/50,0),
406
- "Aromatic rings": +0.15*min(Descriptors.NumAromaticRings(mol)/3,1.0),
407
- "Rotatable bonds": -0.07*max((Descriptors.NumRotatableBonds(mol)-5)/5,0),
408
- "ESM-2 protein repr": (pkd-6.36)*0.4,
 
 
 
409
  }
410
- items=sorted(features.items(),key=lambda x:abs(x[1]),reverse=True)[:8]
411
- labels=[i[0] for i in items]; values=[i[1] for i in items]
412
- baseline=6.36; running=baseline
413
- lefts,widths,colors,rvals=[],[],[],[]
 
 
 
 
414
  for v in values:
415
- lefts.append(min(running,running+v)); widths.append(abs(v))
416
- colors.append("#C9933A" if v>=0 else "#E05252")
417
- running+=v; rvals.append(running)
418
- fig,ax=plt.subplots(figsize=(7.2,3.8))
419
- fig.patch.set_facecolor("#0D1627"); ax.set_facecolor("#0D1627")
420
- ax.barh(range(len(labels)),widths,left=lefts,color=colors,height=0.50,alpha=0.90,edgecolor="none")
421
- ax.axvline(baseline,color="#243858",lw=1.1,ls="--",alpha=0.9)
422
- ax.axvline(pkd,color="#C9933A",lw=1.5,ls="-",alpha=0.9)
423
- for i,(rv,v) in enumerate(zip(rvals,values)):
424
- sign="+" if v>=0 else ""
425
- ax.text(rv+0.012*(1 if v>=0 else -1),i,f"{sign}{v:.2f}",va="center",
426
- ha="left" if v>=0 else "right",fontsize=8.5,color="#B8CCE0",fontfamily="monospace")
427
- ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels,fontsize=9,color="#7A9ABE")
428
- ax.set_xlabel("pKd contribution",fontsize=9,color="#3D5878",labelpad=7)
429
- ax.tick_params(axis="x",colors="#243858",labelsize=8.5,labelcolor="#7A9ABE")
430
- ax.tick_params(axis="y",length=0)
431
- for sp in ax.spines.values(): sp.set_visible(False)
432
- ax.grid(axis="x",color="#162035",lw=0.7,alpha=0.9)
433
- pos_p=mpatches.Patch(color="#C9933A",label="Increases pKd")
434
- neg_p=mpatches.Patch(color="#E05252",label="Decreases pKd")
435
- ax.legend(handles=[pos_p,neg_p],loc="lower right",fontsize=8,
436
- facecolor="#0D1627",edgecolor="#1C2E48",labelcolor="#7A9ABE",framealpha=0.95)
437
- ax.text(pkd,-0.9,f" pKd = {pkd:.2f}",color="#C9933A",fontsize=8.5,va="top",fontfamily="monospace")
438
- ax.text(baseline,-0.9,f" base = {baseline:.2f}",color="#3D5878",fontsize=8,va="top",fontfamily="monospace")
439
- plt.tight_layout(pad=0.6); return fig
440
- except: return None
441
-
442
- # ─── HTML helpers ────────────────────────────────────────────────
443
- def metric_card(label, value, accent=False):
444
- border="rgba(201,147,58,0.35)" if accent else "#1C2E48"
445
- bg="linear-gradient(135deg,#111E33 0%,rgba(201,147,58,0.04) 100%)" if accent else "#111E33"
446
- vc="#C9933A" if accent else "#DCE8F8"
 
 
 
 
 
 
 
 
 
 
 
447
  return st.markdown(f"""
448
  <div style="background:{bg};border:1px solid {border};border-radius:8px;
449
  padding:17px 14px;text-align:center;box-shadow:0 1px 5px rgba(0,0,0,0.4)">
@@ -453,11 +708,16 @@ def metric_card(label, value, accent=False):
453
  font-family:'Outfit',sans-serif">{label}</div>
454
  </div>""", unsafe_allow_html=True)
455
 
456
- def ad_badge(in_domain, dist):
 
457
  if in_domain:
458
- c,bc="#2ABFB3","rgba(42,191,179,0.12)"; bc2="rgba(42,191,179,0.22)"; txt="IN DOMAIN"
 
 
459
  else:
460
- c,bc="#E05252","rgba(224,82,82,0.10)"; bc2="rgba(224,82,82,0.22)"; txt="OUT OF DOMAIN"
 
 
461
  return st.markdown(f"""
462
  <div style="background:#111E33;border:1px solid #1C2E48;border-radius:8px;
463
  padding:17px 14px;text-align:center;box-shadow:0 1px 5px rgba(0,0,0,0.4)">
@@ -472,7 +732,8 @@ def ad_badge(in_domain, dist):
472
  font-family:'Outfit',sans-serif;margin-top:5px">Applicability domain</div>
473
  </div>""", unsafe_allow_html=True)
474
 
475
- def card_wrap(content_fn, head, sub=None):
 
476
  st.markdown(f"""
477
  <div style="background:#111E33;border:1px solid #1C2E48;border-radius:8px;
478
  padding:16px 18px 4px 18px;box-shadow:0 1px 4px rgba(0,0,0,0.4)">
@@ -482,34 +743,34 @@ def card_wrap(content_fn, head, sub=None):
482
  {f'<span style="font-family:monospace;font-size:10px;color:#3D5878;margin-left:6px;font-weight:400">{sub}</span>' if sub else ''}
483
  </div>
484
  </div>""", unsafe_allow_html=True)
485
- # Overlay the widget on top using negative margin trick
486
  st.markdown("""<div style="background:#111E33;border:1px solid #1C2E48;border-top:none;
487
  border-radius:0 0 8px 8px;padding:0 18px 16px;margin-top:-4px;
488
  box-shadow:0 2px 6px rgba(0,0,0,0.3)">""", unsafe_allow_html=True)
489
  content_fn()
490
  st.markdown("</div>", unsafe_allow_html=True)
491
 
492
- # ─── Example data ────────────────────────────────────────────────
 
493
  SEQS = {
494
  "EGFR kinase": "MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYVQRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNLQEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKLTKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVNPEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLSINATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAFENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKIISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHPECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTGPGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACIDRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPSRDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGSTAENAEYLRVAPQSSEFIGA",
495
- "HIV protease": "PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNF",
496
- "Thrombin": "MAHVRGLQLPGCLALAALCSLVHSQHVFLAPQQARSLLQRVRRANTFLEEVRKGNLERECVEETCSYEEAFEALESSTATDVFWAKYTACETARTPRDKLAACLEGNCAEGLGTNYRGHVNITRSGIECQLWRSRYPHKPEINSTTHPGADLQENFCRNPDSSTTGPWCYTTDPTVRRQECSIPVCGQDQVTVAMTPRSEGSSVNLSPPLEQCVPDRGQQYQLRPVQPFLNQLREIFNMAR",
497
  }
498
  SMIS = {
499
  "Erlotinib": "CCOc1cc2c(cc1OCC)ncnc2Nc1cccc(Cl)c1",
500
- "Imatinib": "Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1",
501
  "Indinavir": "OC[C@@H](NC(=O)[C@@H]1CN(Cc2cccnc2)C[C@H]1NC(=O)[C@@H](CC(C)C)NC(=O)c1cc2ccccc2[nH]1)Cc1ccccc1",
502
  }
503
 
504
  # ─── Init session state ───────────────────────────────────────────
505
- for k,v in [("seq_val",""),("smi_val",""),("bseq_val",""),
506
- ("ssel_val",""),("sseqs_val","")]:
507
  if k not in st.session_state:
508
  st.session_state[k] = v
509
 
510
  # ─── Load models ─────────────────────────────────────────────────
511
  with st.spinner("Loading VeloBind models..."):
512
- fold_models,meta,iso_cal,lig_scaler,train_embs,ad_threshold,target_mu,target_std = load_models()
513
  n_loaded = len(fold_models)
514
 
515
  # ─── HEADER ──────────────────────────────────────────────────────
@@ -542,8 +803,10 @@ st.markdown("""
542
  # ─── PAGE TITLE ───────────────────────────────────────────────────
543
  col_logo, col_title = st.columns([1, 11], gap="small")
544
  with col_logo:
545
- try: st.image("static/logo.png", width=72)
546
- except: pass
 
 
547
  with col_title:
548
  st.markdown("""
549
  <div style="padding-top:4px">
@@ -568,7 +831,7 @@ st.markdown("""
568
  </div>
569
  """, unsafe_allow_html=True)
570
 
571
- # ─── TABS ───────────────────────────────────────────────────────
572
  tab1, tab2, tab3 = st.tabs(["Single Query", "Batch Screening", "Selectivity Profile"])
573
 
574
  # ════════════════ TAB 1: SINGLE ══════════════════════════════════
@@ -593,7 +856,7 @@ with tab1:
593
 
594
  st.markdown('<p style="font-size:10.5px;color:#3D5878;margin:8px 0 4px">Load example:</p>', unsafe_allow_html=True)
595
  ex_cols = st.columns(3)
596
- for i,(name,seq) in enumerate(SEQS.items()):
597
  with ex_cols[i]:
598
  st.markdown('<div class="pill-btn">', unsafe_allow_html=True)
599
  if st.button(name, key=f"seq_ex_{i}"):
@@ -619,7 +882,7 @@ with tab1:
619
 
620
  st.markdown('<p style="font-size:10.5px;color:#3D5878;margin:8px 0 4px">Load example:</p>', unsafe_allow_html=True)
621
  sm_cols = st.columns(3)
622
- for i,(name,smi) in enumerate(SMIS.items()):
623
  with sm_cols[i]:
624
  st.markdown('<div class="pill-btn">', unsafe_allow_html=True)
625
  if st.button(name, key=f"smi_ex_{i}"):
@@ -630,8 +893,10 @@ with tab1:
630
  if st.button("Predict Binding Affinity", key="run_single", type="primary"):
631
  seq = clean_fasta(seq_input)
632
  smi = smi_input.strip()
633
- if not seq: st.error("Please enter a protein sequence.")
634
- elif not smi: st.error("Please enter a SMILES string.")
 
 
635
  else:
636
  t0 = time.time()
637
  with st.spinner("Running ESM-2 embedding..."):
@@ -643,20 +908,27 @@ with tab1:
643
  st.error(f"Ligand error: {err}")
644
  else:
645
  with st.spinner("Running 45-model ensemble..."):
646
- X = assemble(esm_mean,seqfeat,lig,lig_scaler)
647
- pkd,ci_lo,ci_hi = predict_pkd(X,fold_models,meta,iso_cal,target_mu,target_std)
648
  if pkd is None:
649
- import random; random.seed(hash(seq[:20]+smi[:20])%2**31)
650
- pkd=random.uniform(5.5,9.0); ci_lo=pkd-0.8; ci_hi=pkd+0.8
651
- in_domain,ad_dist = check_ad(esm_mean,train_embs,ad_threshold)
652
- elapsed = round(time.time()-t0,1)
 
 
 
653
 
654
  st.markdown("<hr>", unsafe_allow_html=True)
655
- mc1,mc2,mc3,mc4 = st.columns(4)
656
- with mc1: metric_card("Predicted pKd", f"{pkd:.2f}", accent=True)
657
- with mc2: metric_card("95% model interval", f"[{ci_lo:.2f}, {ci_hi:.2f}]")
658
- with mc3: metric_card("Estimated Ki", pkd_to_ki(pkd))
659
- with mc4: ad_badge(in_domain, ad_dist)
 
 
 
 
660
 
661
  st.markdown("""
662
  <div style="background:#111E33;border:1px solid #1C2E48;border-radius:8px;
@@ -674,9 +946,10 @@ with tab1:
674
  SHAP / LightGBM</span>
675
  </div>
676
  """, unsafe_allow_html=True)
677
- fig = xai_chart(smi,pkd)
678
  if fig:
679
- st.pyplot(fig,use_container_width=True); plt.close(fig)
 
680
  st.markdown("</div>", unsafe_allow_html=True)
681
 
682
  st.markdown(f"""
@@ -731,47 +1004,61 @@ with tab2:
731
 
732
  if st.button("Run Batch Screening", key="run_batch", type="primary"):
733
  seq = clean_fasta(batch_seq)
734
- if not seq: st.error("Please enter a protein sequence.")
735
- elif uploaded is None: st.error("Please upload a CSV file.")
 
 
736
  else:
737
  try:
738
  df = pd.read_csv(uploaded)
739
- col = next((c for c in df.columns if c.lower() in
740
- ("smiles","smile","smi","canonical_smiles")), None)
741
- if col is None: st.error("No 'smiles' column found.")
742
  else:
743
  df = df.head(500)
744
- name_col = next((c for c in df.columns if c.lower() in
745
- ("name","compound_name","id","molecule_name")), None)
746
  with st.spinner("Embedding protein..."):
747
  esm_mean = embed_sequence(seq)
748
- seqfeat = seq_features(seq)
749
- in_domain,_ = check_ad(esm_mean,train_embs,ad_threshold)
 
750
  results = []
751
  prog = st.progress(0, text="Screening...")
752
- for idx,row in df.iterrows():
753
- smi = str(row[col]).strip()
 
754
  name = str(row[name_col]).strip() if name_col else ""
755
  try:
756
- lig,err = ligand_features(smi)
757
- if err: continue
758
- X = assemble(esm_mean,seqfeat,lig,lig_scaler)
759
- pkd,ci_lo,ci_hi = predict_pkd(X,fold_models,meta,iso_cal,target_mu,target_std)
 
760
  if pkd is None:
761
- import random; random.seed(hash(smi)%2**31)
762
- pkd=random.uniform(5.0,9.0); ci_lo=pkd-0.8; ci_hi=pkd+0.8
763
- results.append({"Name":name,"SMILES":smi,"pKd":round(pkd,3),
764
- "95% CI":f"[{ci_lo:.2f}, {ci_hi:.2f}]",
765
- "Ki":pkd_to_ki(pkd),"In_domain":in_domain})
766
- except: continue
767
- prog.progress(min(int(len(results)/len(df)*100),100),
768
- text=f"{len(results)}/{len(df)} compounds screened")
 
 
 
 
 
 
 
 
 
 
769
  prog.empty()
770
  if results:
771
- res_df = pd.DataFrame(results).sort_values("pKd",ascending=False)
772
- res_df.insert(0,"Rank",range(1,len(res_df)+1))
773
  st.markdown("<hr>", unsafe_allow_html=True)
774
- rh,rd = st.columns([5,1])
775
  with rh:
776
  st.markdown(f"""<div style="font-family:'Source Serif 4',serif;
777
  font-size:18px;font-weight:600;color:#DCE8F8">
@@ -780,9 +1067,9 @@ with tab2:
780
  font-family:monospace">({len(res_df)} compounds)</span>
781
  </div>""", unsafe_allow_html=True)
782
  with rd:
783
- st.download_button("Download CSV",res_df.to_csv(index=False),
784
- "velobind_results.csv","text/csv")
785
- st.dataframe(res_df,use_container_width=True,hide_index=True)
786
  else:
787
  st.warning("No valid compounds processed.")
788
  except Exception as e:
@@ -826,46 +1113,53 @@ with tab3:
826
  if st.button("Run Selectivity Profile", key="run_sel", type="primary"):
827
  smi = sel_smi.strip()
828
  seqs_raw = sel_seqs.strip()
829
- if not smi: st.error("Please enter a SMILES string.")
830
- elif not seqs_raw: st.error("Please enter at least one sequence.")
 
 
831
  else:
832
  seqs_list = [clean_fasta(s) for s in seqs_raw.split("\n")
833
  if s.strip() and not s.strip().startswith(">")][:10]
834
- lig,err = ligand_features(smi)
835
- if err: st.error(f"Ligand error: {err}")
 
836
  else:
837
  results = []
838
  for seq in seqs_list:
839
  with st.spinner(f"Processing target {len(results)+1}/{len(seqs_list)}..."):
840
  try:
841
  esm_mean = embed_sequence(seq)
842
- seqfeat = seq_features(seq)
843
- X = assemble(esm_mean,seqfeat,lig,lig_scaler)
844
- pkd,ci_lo,ci_hi = predict_pkd(X,fold_models,meta,iso_cal,target_mu,target_std)
845
  if pkd is None:
846
- import random; random.seed(hash(seq[:20])%2**31)
847
- pkd=random.uniform(4.5,9.0); ci_lo=pkd-0.8; ci_hi=pkd+0.8
848
- in_domain,_ = check_ad(esm_mean,train_embs,ad_threshold)
849
- results.append({"seq":seq,"pkd":pkd,"ci_lo":ci_lo,
850
- "ci_hi":ci_hi,"ki":pkd_to_ki(pkd),
851
- "in_domain":in_domain})
852
- except: continue
 
 
 
 
853
 
854
  if results:
855
- results.sort(key=lambda r:r["pkd"],reverse=True)
856
  st.markdown("<hr>", unsafe_allow_html=True)
857
  st.markdown("""<div style="font-family:'Source Serif 4',serif;
858
  font-size:18px;font-weight:600;color:#DCE8F8;margin-bottom:14px">
859
  Selectivity profile</div>""", unsafe_allow_html=True)
860
- palette=["#C9933A","#2ABFB3","#8B5CF6","#E05252","#34D399"]
861
  scols = st.columns(2)
862
- for i,r in enumerate(results):
863
- ca=palette[i%len(palette)]
864
- with scols[i%2]:
865
  if r["in_domain"]:
866
- ad_txt=f'<span style="background:rgba(42,191,179,0.12);color:#2ABFB3;border:1px solid rgba(42,191,179,0.22);border-radius:4px;padding:2px 7px;font-size:10px">In domain</span>'
867
  else:
868
- ad_txt=f'<span style="background:rgba(224,82,82,0.10);color:#E05252;border:1px solid rgba(224,82,82,0.22);border-radius:4px;padding:2px 7px;font-size:10px">Out of domain</span>'
869
  st.markdown(f"""
870
  <div style="background:#162540;border:1px solid #1C2E48;border-radius:8px;
871
  padding:13px 16px;display:flex;align-items:center;gap:13px;
 
1
  import warnings
2
  warnings.filterwarnings("ignore")
3
 
4
+ import os
5
+ import time
6
+ import base64
7
  from pathlib import Path
8
  from io import BytesIO
9
+ from typing import Any, Dict, Optional, Tuple, List
10
+
11
  import numpy as np
12
  import pandas as pd
13
  import torch
 
18
 
19
  import streamlit as st
20
 
21
+ # optional rdkit logging mute
22
  try:
23
  from rdkit import RDLogger
24
  RDLogger.DisableLog("rdApp.*")
25
+ except Exception:
26
  pass
27
 
28
+ # light-weight logging for debugging (doesn't print unless configured)
29
+ import logging
30
+ logger = logging.getLogger("velobind")
31
+ # logger.setLevel(logging.INFO) # enable if needed for debugging
32
+
33
  # ─── Page config ────────────────────────────────────────────────
34
  st.set_page_config(
35
  page_title="VeloBind",
 
231
  </style>
232
  """, unsafe_allow_html=True)
233
 
234
+ # ─── Constants / paths ─────────────────────────────────────────
235
+ MODEL_REPO = "ym59/velobind-models"
236
+ MODEL_DIR = Path("output/models")
237
+ PREP_DIR = Path("output/preprocessors")
238
+ AD_EMB_PATH = Path("output/ad_train_embeddings.npy")
239
+
240
+ # Attempt to load descriptor functions once to avoid repeated sorting
241
+ _DESC_FNS: Optional[List[Any]] = None
242
+ try:
243
+ from rdkit.Chem import Descriptors
244
+ _DESC_FNS = [v for k, v in sorted(Descriptors.descList)][:217]
245
+ except Exception:
246
+ _DESC_FNS = None
247
+
248
+
249
  # ─── Model loading ───────────────────────────────────────────────
250
  @st.cache_resource(show_spinner=False)
251
+ def load_models() -> Tuple[Dict[str, Any], Optional[Any], Optional[Any], Optional[Any], Optional[np.ndarray], float, float, float]:
252
+ """
253
+ Load the ensemble fold models, metadata calibrator, isotonic calibrator,
254
+ ligand scaler, AD embeddings and target scaler statistics.
255
+ Returns the same tuple structure as the original implementation.
256
+ """
257
  try:
258
  import joblib
259
+ fold_models: Dict[str, Any] = {}
260
  meta = iso_cal = lig_scaler = None
261
  train_embs = None
262
  ad_threshold = 1.4
263
  target_mu, target_std = 6.361, 1.855
264
+
265
+ # Ensure model directory exists: if not, attempt to download snapshot
266
+ if not MODEL_DIR.exists() or not any(MODEL_DIR.glob("*.pkl")):
267
+ try:
268
+ from huggingface_hub import snapshot_download
269
+ snapshot_download(repo_id=MODEL_REPO, repo_type="dataset", local_dir=".")
270
+ except Exception as e:
271
+ # fallback: continue, perhaps running a reduced local demo
272
+ logger.debug("snapshot_download failed: %s", e)
273
+
274
+ # Load models if present
275
  if MODEL_DIR.exists():
276
+ seeds = [42, 123, 456]
277
+ n_folds = 5
278
+ mtypes = ["lgbm", "cb", "xgb"]
279
  for seed in seeds:
280
  for mt in mtypes:
281
  for fold in range(n_folds):
282
+ key = f"s{seed}_{mt}_f{fold}"
283
+ p = MODEL_DIR / f"fold_model_{key}.pkl"
284
+ if p.exists():
285
+ try:
286
+ fold_models[key] = joblib.load(p)
287
+ except Exception:
288
+ logger.debug("Failed to load %s", p)
289
+
290
+ # optional meta and isotonic calibrator
291
+ for fname, attr in [("meta_all_casf16.pkl", "meta"), ("isotonic_calibrator.pkl", "iso")]:
292
  p = MODEL_DIR / fname
293
  if p.exists():
294
+ try:
295
+ obj = joblib.load(p)
296
+ if attr == "meta":
297
+ meta = obj
298
+ else:
299
+ iso_cal = obj
300
+ except Exception:
301
+ logger.debug("Failed to load %s", p)
302
+
303
  ts = MODEL_DIR / "target_scaler.pkl"
304
  if ts.exists():
305
+ try:
306
+ t = joblib.load(ts)
307
+ # many scalers store attributes differently; handle common ones
308
+ if hasattr(t, "mu") and hasattr(t, "std"):
309
+ target_mu = float(t.mu)
310
+ target_std = float(t.std)
311
+ elif hasattr(t, "mean_") and hasattr(t, "scale_"):
312
+ target_mu = float(t.mean_)
313
+ target_std = float(t.scale_)
314
+ except Exception:
315
+ logger.debug("Failed to read target scaler %s", ts)
316
+
317
+ # load ligand scaler if present
318
  if PREP_DIR.exists():
319
+ ls = PREP_DIR / "ligand_scaler.pkl"
320
+ if ls.exists():
321
+ try:
322
+ import joblib as _job
323
+ lig_scaler = _job.load(ls)
324
+ except Exception:
325
+ logger.debug("Failed to load ligand scaler %s", ls)
326
+
327
+ # applicability domain embeddings
328
+ if AD_EMB_PATH.exists():
329
+ try:
330
+ train_embs = np.load(str(AD_EMB_PATH))
331
+ at = Path("output/ad_threshold.npy")
332
+ if at.exists():
333
+ ad_threshold = float(np.load(str(at)))
334
+ except Exception:
335
+ logger.debug("Failed to load AD embeddings")
336
+
337
+ return fold_models, meta, iso_cal, lig_scaler, train_embs, ad_threshold, target_mu, target_std
338
  except Exception as e:
339
+ logger.debug("load_models top-level exception: %s", e)
340
+ return {}, None, None, None, None, 1.4, 6.361, 1.855
341
+
342
 
343
  @st.cache_resource(show_spinner=False)
344
  def load_esm():
345
+ """
346
+ Load ESM tokenizer and model. Kept identical to original but wrapped.
347
+ """
348
  from transformers import AutoTokenizer, EsmModel
349
+ tok = AutoTokenizer.from_pretrained("facebook/esm2_t12_35M_UR50D")
350
+ model = EsmModel.from_pretrained("facebook/esm2_t12_35M_UR50D")
351
+ model.eval()
352
+ return tok, model
353
+
354
 
355
  @st.cache_data(show_spinner=False)
356
+ def embed_sequence(seq: str) -> np.ndarray:
357
+ """
358
+ Embed a protein sequence using ESM-2 and return concatenated mean vectors.
359
+ This retains original chunking behavior but is slightly more robust to
360
+ tokenizer/model changes.
361
+ """
362
  tok, model = load_esm()
363
  MAX, HALF = 1022, 511
364
+
365
+ def _chunk(s: str) -> np.ndarray:
366
+ enc = tok(s, return_tensors="pt", truncation=False)
367
  with torch.no_grad():
368
+ out = model(**enc, output_hidden_states=True)
369
+ hs = out.hidden_states
370
+ mask = enc["attention_mask"].unsqueeze(-1).float()
371
+ mvecs = []
372
+ for li in [8, 10, 11]:
373
+ h = hs[li]
374
+ mv = (h * mask).sum(1) / mask.sum(1).clamp(min=1e-9)
375
+ mvecs.append(mv.squeeze(0).cpu().numpy())
376
  return np.concatenate(mvecs)
 
 
377
 
378
+ seq = seq.strip()
379
+ if len(seq) <= MAX:
380
+ return _chunk(seq)
381
+ # preserve original behavior of averaging first/last halves
382
+ return (_chunk(seq[:HALF]) + _chunk(seq[-HALF:])) / 2.0
383
+
384
+
385
+ def seq_features(seq: str) -> np.ndarray:
386
+ """
387
+ Compute a vector of protein sequence features. Tries Bio.SeqUtils.ProtParam,
388
+ then falls back to placeholder zeros if something fails.
389
+ Also computes dipeptide frequencies and attempts to append additional
390
+ custom features from src.features.protein if available.
391
+ """
392
+ seq = seq.strip().upper()
393
  try:
394
  from Bio.SeqUtils.ProtParam import ProteinAnalysis
395
+ pa = ProteinAnalysis(seq)
396
+ pp = [
397
+ pa.molecular_weight(),
398
+ pa.aromaticity(),
399
+ pa.instability_index(),
400
+ pa.isoelectric_point(),
401
+ pa.gravy(),
402
+ *pa.secondary_structure_fraction(),
403
+ *list(pa.amino_acids_percent.values()),
404
+ ]
405
+ except Exception:
406
+ # fallback: maintain same length (28) as prior
407
+ pp = [0.0] * 28
408
+
409
+ # dipeptide frequency (400 features for 20x20)
410
+ AA = list("ACDEFGHIKLMNPQRSTVWY")
411
+ dp = {a + b: 0 for a in AA for b in AA}
412
+ for i in range(len(seq) - 1):
413
+ k = seq[i].upper() + seq[i + 1].upper()
414
+ if k in dp:
415
+ dp[k] += 1
416
+ tot = max(1, sum(dp.values()))
417
+ dpc = [v / tot for v in dp.values()]
418
+
419
+ # optional extra features from project
420
  try:
421
+ from src.features.protein import _ctd, _conjoint_triad, _qso, _aaindex_encoding
422
+ extra = list(_ctd(seq)) + list(_conjoint_triad(seq)) + list(_qso(seq)) + list(_aaindex_encoding(seq))
423
+ except Exception:
424
+ extra = [0.0] * (63 + 343 + 60 + 25)
425
+
426
+ return np.array(pp + dpc + extra, dtype=np.float32)
427
 
428
+
429
+ def ligand_features(smiles: str) -> Tuple[Optional[Dict[str, np.ndarray]], Optional[str]]:
430
+ """
431
+ Generate ligand fingerprints and descriptors from a SMILES string using RDKit.
432
+ Returns tuple (feature_dict, error_message). On success, error_message is None.
433
+ """
434
  try:
435
  from rdkit import Chem
436
+ from rdkit.Chem import AllChem, MACCSkeys, Descriptors, DataStructs
437
+ from rdkit.Chem.rdMolDescriptors import (
438
+ GetHashedAtomPairFingerprint,
439
+ GetHashedTopologicalTorsionFingerprint,
440
+ )
441
+ mol = Chem.MolFromSmiles(smiles)
442
+ if mol is None:
443
+ return None, "Invalid SMILES"
444
+
445
+ def fp(obj, n):
446
+ a = np.zeros(n, dtype=np.float32)
447
+ DataStructs.ConvertToNumpyArray(obj, a)
448
+ return a
449
+
450
+ # ECFP variants (bit vectors)
451
+ ecfp2 = fp(AllChem.GetMorganFingerprintAsBitVect(mol, 1, 1024), 1024)
452
+ ecfp4 = fp(AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024), 1024)
453
+ ecfp6 = fp(AllChem.GetMorganFingerprintAsBitVect(mol, 3, 1024), 1024)
454
+ fcfp4 = fp(AllChem.GetMorganFingerprintAsBitVect(mol, 2, 1024, useFeatures=True), 1024)
455
+
456
+ maccs = fp(MACCSkeys.GenMACCSKeys(mol), 167)
457
+
458
+ ap = np.zeros(2048, dtype=np.float32)
459
+ DataStructs.ConvertToNumpyArray(GetHashedAtomPairFingerprint(mol, 2048), ap)
460
+
461
+ tors = np.zeros(2048, dtype=np.float32)
462
+ DataStructs.ConvertToNumpyArray(GetHashedTopologicalTorsionFingerprint(mol, 2048), tors)
463
+
464
+ # E-state fingerprint (79) if available
465
  try:
466
  from rdkit.Chem.EState.Fingerprinter import FingerprintMol
467
+ es = np.nan_to_num(np.clip(FingerprintMol(mol)[0].astype(np.float32), -1e6, 1e6))[:79]
468
+ if len(es) < 79:
469
+ es = np.pad(es, (0, 79 - len(es)))
470
+ except Exception:
471
+ es = np.zeros(79, dtype=np.float32)
472
+
473
+ # physico-chemical descriptors: attempt to use precomputed list
474
+ phys = []
475
+ desc_fns = _DESC_FNS
476
+ if desc_fns is None:
477
+ desc_fns = [v for k, v in sorted(Descriptors.descList)][:217]
478
  for fn in desc_fns:
479
  try:
480
+ v = float(fn(mol))
481
+ if not np.isfinite(v) or abs(v) > 1e10:
482
+ phys.append(0.0)
483
+ else:
484
+ phys.append(v)
485
+ except Exception:
486
+ phys.append(0.0)
487
+
488
+ return {
489
+ "ecfp2": ecfp2,
490
+ "ecfp": ecfp4,
491
+ "ecfp6": ecfp6,
492
+ "fcfp": fcfp4,
493
+ "maccs": maccs,
494
+ "ap": ap,
495
+ "torsion": tors,
496
+ "estate": es,
497
+ "phys": np.array(phys, dtype=np.float64),
498
+ }, None
499
+ except Exception as e:
500
+ logger.debug("ligand_features error: %s", e)
501
+ return None, str(e)
502
+
503
+
504
+ def assemble(esm_mean: np.ndarray, seqfeat: np.ndarray, lig: Dict[str, np.ndarray], lig_scaler: Any) -> np.ndarray:
505
+ """
506
+ Combine ESM embedding tail, sequence features and ligand features into a single input vector.
507
+ Preserves original ordering and composition.
508
+ """
509
+ # use last 480 dims from esm_mean (same as original)
510
+ esm_last = esm_mean[-480:]
511
  if lig_scaler is not None:
512
  try:
513
+ combined = np.concatenate([lig["estate"], lig["phys"]])
514
+ combined = lig_scaler.transform(combined.reshape(1, -1)).ravel()
515
+ es = combined[:79].astype(np.float32)
516
+ ph = combined[79:].astype(np.float32)
517
+ except Exception:
518
+ es, ph = lig["estate"], lig["phys"].astype(np.float32)
519
+ else:
520
+ es, ph = lig["estate"], lig["phys"].astype(np.float32)
521
+
522
+ out = np.concatenate(
523
+ [
524
+ esm_last,
525
+ seqfeat,
526
+ lig["ecfp"],
527
+ lig["ecfp2"],
528
+ lig["ecfp6"],
529
+ lig["fcfp"],
530
+ es,
531
+ lig["maccs"],
532
+ lig["ap"],
533
+ lig["torsion"],
534
+ ph,
535
+ ]
536
+ ).astype(np.float32)
537
+ return out
538
+
539
+
540
+ def predict_pkd(X: np.ndarray, fold_models: Dict[str, Any], meta: Any, iso_cal: Any, target_mu: float, target_std: float
541
+ ) -> Tuple[Optional[float], Optional[float], Optional[float]]:
542
+ """
543
+ Predict pKd using ensemble fold_models + meta model + optional isotonic calibration.
544
+ Returns (pred, ci_lo, ci_hi). If no models are loaded, returns (None, None, None)
545
+ """
546
+ if not fold_models:
547
+ return None, None, None
548
+
549
+ seeds, n_folds, mtypes = [42, 123, 456], 5, ["lgbm", "cb", "xgb"]
550
+ mat = np.zeros((1, len(seeds) * len(mtypes)))
551
+ col = 0
552
  for seed in seeds:
553
  for mt in mtypes:
554
+ preds = []
555
+ for f in range(n_folds):
556
+ key = f"s{seed}_{mt}_f{f}"
557
+ if key in fold_models:
558
+ try:
559
+ preds.append(fold_models[key].predict(X.reshape(1, -1))[0])
560
+ except Exception:
561
+ logger.debug("predict failed for %s", key)
562
+ if preds:
563
+ # convert to target scale
564
+ mat[0, col] = np.mean(preds) * target_std + target_mu
565
+ col += 1
566
+
567
+ # if meta exists, use it, else mean of non-zero entries
568
+ nonzero = mat[mat != 0]
569
+ if meta is not None:
570
+ try:
571
+ pred = float(meta.predict(mat)[0])
572
+ except Exception:
573
+ pred = float(np.mean(nonzero)) if nonzero.size else float(mat.mean())
574
+ else:
575
+ pred = float(np.mean(nonzero)) if nonzero.size else float(mat.mean())
576
+
577
+ # isotonic calibrator if available
578
+ if iso_cal is not None:
579
+ try:
580
+ pred = float(iso_cal.predict([pred])[0])
581
+ except Exception:
582
+ logger.debug("isotonic calibration failed")
583
+
584
+ nz = nonzero
585
+ spread = float(nz.std()) if nz.size > 1 else 0.5
586
+ return pred, pred - 1.96 * spread, pred + 1.96 * spread
587
+
588
+
589
+ def check_ad(esm_mean: np.ndarray, train_embs: Optional[np.ndarray], ad_threshold: float) -> Tuple[bool, float]:
590
+ """
591
+ Check applicability domain using cosine distances to a subset of train embeddings.
592
+ Returns (in_domain_bool, distance_value).
593
+ """
594
+ if train_embs is None:
595
+ return True, 0.0
596
+ try:
597
+ from sklearn.metrics.pairwise import cosine_distances
598
+ q = esm_mean[-480:].reshape(1, -1)
599
+ # guard: use at most first 2000 embeddings for speed
600
+ d = cosine_distances(q, train_embs[:2000])[0]
601
+ k = float(np.sort(d)[:5].mean())
602
+ return k <= ad_threshold, k
603
+ except Exception as e:
604
+ logger.debug("check_ad error: %s", e)
605
+ return True, 0.0
606
+
607
+
608
+ def clean_fasta(s: str) -> str:
609
+ s = s.strip()
610
  if s.startswith(">"):
611
  return "".join(l.strip() for l in s.split("\n") if not l.startswith(">"))
612
+ return s.replace(" ", "").replace("\n", "")
613
 
 
 
 
 
 
 
614
 
615
+ def pkd_to_ki(pkd: float) -> str:
616
+ """
617
+ Convert pKd to Ki string with appropriate unit formatting without changing original logic.
618
+ """
619
+ m = 10 ** (-pkd)
620
+ if m < 1e-9:
621
+ return f"{m * 1e12:.1f} pM"
622
+ if m < 1e-6:
623
+ return f"{m * 1e9:.1f} nM"
624
+ if m < 1e-3:
625
+ return f"{m * 1e6:.1f} uM"
626
+ return f"{m * 1e3:.1f} mM"
627
+
628
+
629
+ def xai_chart(smiles: str, pkd: float):
630
+ """
631
+ Build the feature-attribution waterfall-like chart. Returns matplotlib Figure or None.
632
+ """
633
  try:
634
+ from rdkit import Chem
635
+ from rdkit.Chem import Descriptors
636
+ mol = Chem.MolFromSmiles(smiles)
637
+ if mol is None:
638
+ return None
639
+
640
+ features = {
641
+ "MW / atom count": +0.12 * min((mol.GetNumHeavyAtoms() - 25) / 20, 1.0),
642
+ "LogP (hydrophobicity)": +0.18 * min((Descriptors.MolLogP(mol) - 2) / 3, 1.0),
643
+ "H-bond donors": -0.09 * max(Descriptors.NumHDonors(mol) - 2, 0),
644
+ "H-bond acceptors": +0.11 * min(Descriptors.NumHAcceptors(mol) / 5, 1.0),
645
+ "TPSA (polarity)": -0.10 * max((Descriptors.TPSA(mol) - 70) / 50, 0),
646
+ "Aromatic rings": +0.15 * min(Descriptors.NumAromaticRings(mol) / 3, 1.0),
647
+ "Rotatable bonds": -0.07 * max((Descriptors.NumRotatableBonds(mol) - 5) / 5, 0),
648
+ "ESM-2 protein repr": (pkd - 6.36) * 0.4,
649
  }
650
+
651
+ items = sorted(features.items(), key=lambda x: abs(x[1]), reverse=True)[:8]
652
+ labels = [i[0] for i in items]
653
+ values = [i[1] for i in items]
654
+
655
+ baseline = 6.36
656
+ running = baseline
657
+ lefts, widths, colors, rvals = [], [], [], []
658
  for v in values:
659
+ lefts.append(min(running, running + v))
660
+ widths.append(abs(v))
661
+ colors.append("#C9933A" if v >= 0 else "#E05252")
662
+ running += v
663
+ rvals.append(running)
664
+
665
+ fig, ax = plt.subplots(figsize=(7.2, 3.8))
666
+ fig.patch.set_facecolor("#0D1627")
667
+ ax.set_facecolor("#0D1627")
668
+ ax.barh(range(len(labels)), widths, left=lefts, color=colors, height=0.50, alpha=0.90, edgecolor="none")
669
+ ax.axvline(baseline, color="#243858", lw=1.1, ls="--", alpha=0.9)
670
+ ax.axvline(pkd, color="#C9933A", lw=1.5, ls="-", alpha=0.9)
671
+
672
+ for i, (rv, v) in enumerate(zip(rvals, values)):
673
+ sign = "+" if v >= 0 else ""
674
+ ax.text(rv + 0.012 * (1 if v >= 0 else -1), i, f"{sign}{v:.2f}", va="center",
675
+ ha="left" if v >= 0 else "right", fontsize=8.5, color="#B8CCE0", fontfamily="monospace")
676
+ ax.set_yticks(range(len(labels)))
677
+ ax.set_yticklabels(labels, fontsize=9, color="#7A9ABE")
678
+ ax.set_xlabel("pKd contribution", fontsize=9, color="#3D5878", labelpad=7)
679
+ ax.tick_params(axis="x", colors="#243858", labelsize=8.5, labelcolor="#7A9ABE")
680
+ ax.tick_params(axis="y", length=0)
681
+ for sp in ax.spines.values():
682
+ sp.set_visible(False)
683
+ ax.grid(axis="x", color="#162035", lw=0.7, alpha=0.9)
684
+ pos_p = mpatches.Patch(color="#C9933A", label="Increases pKd")
685
+ neg_p = mpatches.Patch(color="#E05252", label="Decreases pKd")
686
+ ax.legend(handles=[pos_p, neg_p], loc="lower right", fontsize=8,
687
+ facecolor="#0D1627", edgecolor="#1C2E48", labelcolor="#7A9ABE", framealpha=0.95)
688
+ ax.text(pkd, -0.9, f" pKd = {pkd:.2f}", color="#C9933A", fontsize=8.5, va="top", fontfamily="monospace")
689
+ ax.text(baseline, -0.9, f" base = {baseline:.2f}", color="#3D5878", fontsize=8, va="top", fontfamily="monospace")
690
+ plt.tight_layout(pad=0.6)
691
+ return fig
692
+ except Exception as e:
693
+ logger.debug("xai_chart error: %s", e)
694
+ return None
695
+
696
+
697
+ # ─── HTML helpers ───────────────────────────────────────────────
698
+ def metric_card(label: str, value: str, accent: bool = False):
699
+ border = "rgba(201,147,58,0.35)" if accent else "#1C2E48"
700
+ bg = "linear-gradient(135deg,#111E33 0%,rgba(201,147,58,0.04) 100%)" if accent else "#111E33"
701
+ vc = "#C9933A" if accent else "#DCE8F8"
702
  return st.markdown(f"""
703
  <div style="background:{bg};border:1px solid {border};border-radius:8px;
704
  padding:17px 14px;text-align:center;box-shadow:0 1px 5px rgba(0,0,0,0.4)">
 
708
  font-family:'Outfit',sans-serif">{label}</div>
709
  </div>""", unsafe_allow_html=True)
710
 
711
+
712
+ def ad_badge(in_domain: bool, dist: float):
713
  if in_domain:
714
+ c, bc = "#2ABFB3", "rgba(42,191,179,0.12)"
715
+ bc2 = "rgba(42,191,179,0.22)"
716
+ txt = "IN DOMAIN"
717
  else:
718
+ c, bc = "#E05252", "rgba(224,82,82,0.10)"
719
+ bc2 = "rgba(224,82,82,0.22)"
720
+ txt = "OUT OF DOMAIN"
721
  return st.markdown(f"""
722
  <div style="background:#111E33;border:1px solid #1C2E48;border-radius:8px;
723
  padding:17px 14px;text-align:center;box-shadow:0 1px 5px rgba(0,0,0,0.4)">
 
732
  font-family:'Outfit',sans-serif;margin-top:5px">Applicability domain</div>
733
  </div>""", unsafe_allow_html=True)
734
 
735
+
736
+ def card_wrap(content_fn, head: str, sub: Optional[str] = None):
737
  st.markdown(f"""
738
  <div style="background:#111E33;border:1px solid #1C2E48;border-radius:8px;
739
  padding:16px 18px 4px 18px;box-shadow:0 1px 4px rgba(0,0,0,0.4)">
 
743
  {f'<span style="font-family:monospace;font-size:10px;color:#3D5878;margin-left:6px;font-weight:400">{sub}</span>' if sub else ''}
744
  </div>
745
  </div>""", unsafe_allow_html=True)
 
746
  st.markdown("""<div style="background:#111E33;border:1px solid #1C2E48;border-top:none;
747
  border-radius:0 0 8px 8px;padding:0 18px 16px;margin-top:-4px;
748
  box-shadow:0 2px 6px rgba(0,0,0,0.3)">""", unsafe_allow_html=True)
749
  content_fn()
750
  st.markdown("</div>", unsafe_allow_html=True)
751
 
752
+
753
+ # ─── Example data ───────────────────────────────────────────────
754
  SEQS = {
755
  "EGFR kinase": "MRPSGTAGAALLALLAALCPASRALEEKKVCQGTSNKLTQLGTFEDHFLSLQRMFNNCEVVLGNLEITYVQRNYDLSFLKTIQEVAGYVLIALNTVERIPLENLQIIRGNMYYENSYALAVLSNYDANKTGLKELPMRNLQEILHGAVRFSNNPALCNVESIQWRDIVSSDFLSNMSMDFQNHLGSCQKCDPSCPNGSCWGAGEENCQKLTKIICAQQCSGRCRGKSPSDCCHNQCAAGCTGPRESDCLVCRKFRDEATCKDTCPPLMLYNPTTYQMDVNPEGKYSFGATCVKKCPRNYVVTDHGSCVRACGADSYEMEEDGVRKCKKCEGPCRKVCNGIGIGEFKDSLSINATNIKHFKNCTSISGDLHILPVAFRGDSFTHTPPLDPQELDILKTVKEITGFLLIQAWPENRTDLHAFENLEIIRGRTKQHGQFSLAVVSLNITSLGLRSLKEISDGDVIISGNKNLCYANTINWKKLFGTSGQKTKIISNRGENSCKATGQVCHALCSPEGCWGPEPRDCVSCRNVSRGRECVDKCNLLEGEPREFVENSECIQCHPECLPQAMNITCTGRGPDNCIQCAHYIDGPHCVKTCPAGVMGENNTLVWKYADAGHVCHLCHPNCTYGCTGPGLEGCPTNGPKIPSIATGMVGALLLLLVVALGIGLFMRRRHIVRKRTLRRLLQERELVEPLTPSGEAPNQALLRILKETEFKKIKVLGSGAFGTVYKGLWIPEGEKVKIPVAIKELREATSPKANKEILDEAYVMASVDNPHVCRLLGICLTSTVQLITQLMPFGCLLDYVREHKDNIGSQYLLNWCVQIAKGMNYLEDRRLVHRDLAARNVLVKTPQHVKITDFGLAKLLGAEEKEYHAEGGKVPIKWMALESILHRIYTHQSDVWSYGVTVWELMTFGSKPYDGIPASEISSILEKGERLPQPPICTIDVYMIMVKCWMIDADSRPKFRELIIEFSKMARDPQRYLVIQGDERMHLPSPTDSNFYRALMDEEDMDDVVDADEYLIPQQGFFSSPSTSRTPLLSSLSATSNNSTVACIDRNGLQSCPIKEDSFLQRYSSDPTGALTEDSIDDTFLPVPEYINQSVPKRPAGSVQNPVYHNQPLNPAPSRDPHYQDPHSTAVGNPEYLNTVQPTCVNSTFDSPAHWAQKGSHQISLDNPDYQQDFFPKEAKPNGIFKGSTAENAEYLRVAPQSSEFIGA",
756
+ "HIV protease": "PQITLWQRPLVTIKIGGQLKEALLDTGADDTVLEEMNLPGRWKPKMIGGIGGFIKVRQYDQILIEICGHKAIGTVLVGPTPVNIIGRNLLTQIGCTLNF",
757
+ "Thrombin": "MAHVRGLQLPGCLALAALCSLVHSQHVFLAPQQARSLLQRVRRANTFLEEVRKGNLERECVEETCSYEEAFEALESSTATDVFWAKYTACETARTPRDKLAACLEGNCAEGLGTNYRGHVNITRSGIECQLWRSRYPHKPEINSTTHPGADLQENFCRNPDSSTTGPWCYTTDPTVRRQECSIPVCGQDQVTVAMTPRSEGSSVNLSPPLEQCVPDRGQQYQLRPVQPFLNQLREIFNMAR",
758
  }
759
  SMIS = {
760
  "Erlotinib": "CCOc1cc2c(cc1OCC)ncnc2Nc1cccc(Cl)c1",
761
+ "Imatinib": "Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nccc(-c2cccnc2)n1",
762
  "Indinavir": "OC[C@@H](NC(=O)[C@@H]1CN(Cc2cccnc2)C[C@H]1NC(=O)[C@@H](CC(C)C)NC(=O)c1cc2ccccc2[nH]1)Cc1ccccc1",
763
  }
764
 
765
  # ─── Init session state ───────────────────────────────────────────
766
+ for k, v in [("seq_val", ""), ("smi_val", ""), ("bseq_val", ""),
767
+ ("ssel_val", ""), ("sseqs_val", "")]:
768
  if k not in st.session_state:
769
  st.session_state[k] = v
770
 
771
  # ─── Load models ─────────────────────────────────────────────────
772
  with st.spinner("Loading VeloBind models..."):
773
+ fold_models, meta, iso_cal, lig_scaler, train_embs, ad_threshold, target_mu, target_std = load_models()
774
  n_loaded = len(fold_models)
775
 
776
  # ─── HEADER ──────────────────────────────────────────────────────
 
803
  # ─── PAGE TITLE ───────────────────────────────────────────────────
804
  col_logo, col_title = st.columns([1, 11], gap="small")
805
  with col_logo:
806
+ try:
807
+ st.image("static/logo.png", width=72)
808
+ except Exception:
809
+ pass
810
  with col_title:
811
  st.markdown("""
812
  <div style="padding-top:4px">
 
831
  </div>
832
  """, unsafe_allow_html=True)
833
 
834
+ # ─── TABS ───────────────────────────────────────────────────────
835
  tab1, tab2, tab3 = st.tabs(["Single Query", "Batch Screening", "Selectivity Profile"])
836
 
837
  # ════════════════ TAB 1: SINGLE ══════════════════════════════════
 
856
 
857
  st.markdown('<p style="font-size:10.5px;color:#3D5878;margin:8px 0 4px">Load example:</p>', unsafe_allow_html=True)
858
  ex_cols = st.columns(3)
859
+ for i, (name, seq) in enumerate(SEQS.items()):
860
  with ex_cols[i]:
861
  st.markdown('<div class="pill-btn">', unsafe_allow_html=True)
862
  if st.button(name, key=f"seq_ex_{i}"):
 
882
 
883
  st.markdown('<p style="font-size:10.5px;color:#3D5878;margin:8px 0 4px">Load example:</p>', unsafe_allow_html=True)
884
  sm_cols = st.columns(3)
885
+ for i, (name, smi) in enumerate(SMIS.items()):
886
  with sm_cols[i]:
887
  st.markdown('<div class="pill-btn">', unsafe_allow_html=True)
888
  if st.button(name, key=f"smi_ex_{i}"):
 
893
  if st.button("Predict Binding Affinity", key="run_single", type="primary"):
894
  seq = clean_fasta(seq_input)
895
  smi = smi_input.strip()
896
+ if not seq:
897
+ st.error("Please enter a protein sequence.")
898
+ elif not smi:
899
+ st.error("Please enter a SMILES string.")
900
  else:
901
  t0 = time.time()
902
  with st.spinner("Running ESM-2 embedding..."):
 
908
  st.error(f"Ligand error: {err}")
909
  else:
910
  with st.spinner("Running 45-model ensemble..."):
911
+ X = assemble(esm_mean, seqfeat, lig, lig_scaler)
912
+ pkd, ci_lo, ci_hi = predict_pkd(X, fold_models, meta, iso_cal, target_mu, target_std)
913
  if pkd is None:
914
+ import random
915
+ random.seed(hash(seq[:20] + smi[:20]) % 2 ** 31)
916
+ pkd = random.uniform(5.5, 9.0)
917
+ ci_lo = pkd - 0.8
918
+ ci_hi = pkd + 0.8
919
+ in_domain, ad_dist = check_ad(esm_mean, train_embs, ad_threshold)
920
+ elapsed = round(time.time() - t0, 1)
921
 
922
  st.markdown("<hr>", unsafe_allow_html=True)
923
+ mc1, mc2, mc3, mc4 = st.columns(4)
924
+ with mc1:
925
+ metric_card("Predicted pKd", f"{pkd:.2f}", accent=True)
926
+ with mc2:
927
+ metric_card("95% model interval", f"[{ci_lo:.2f}, {ci_hi:.2f}]")
928
+ with mc3:
929
+ metric_card("Estimated Ki", pkd_to_ki(pkd))
930
+ with mc4:
931
+ ad_badge(in_domain, ad_dist)
932
 
933
  st.markdown("""
934
  <div style="background:#111E33;border:1px solid #1C2E48;border-radius:8px;
 
946
  SHAP / LightGBM</span>
947
  </div>
948
  """, unsafe_allow_html=True)
949
+ fig = xai_chart(smi, pkd)
950
  if fig:
951
+ st.pyplot(fig, use_container_width=True)
952
+ plt.close(fig)
953
  st.markdown("</div>", unsafe_allow_html=True)
954
 
955
  st.markdown(f"""
 
1004
 
1005
  if st.button("Run Batch Screening", key="run_batch", type="primary"):
1006
  seq = clean_fasta(batch_seq)
1007
+ if not seq:
1008
+ st.error("Please enter a protein sequence.")
1009
+ elif uploaded is None:
1010
+ st.error("Please upload a CSV file.")
1011
  else:
1012
  try:
1013
  df = pd.read_csv(uploaded)
1014
+ col = next((c for c in df.columns if c.lower() in ("smiles", "smile", "smi", "canonical_smiles")), None)
1015
+ if col is None:
1016
+ st.error("No 'smiles' column found.")
1017
  else:
1018
  df = df.head(500)
1019
+ name_col = next((c for c in df.columns if c.lower() in ("name", "compound_name", "id", "molecule_name")), None)
 
1020
  with st.spinner("Embedding protein..."):
1021
  esm_mean = embed_sequence(seq)
1022
+ seqfeat = seq_features(seq)
1023
+ in_domain, _ = check_ad(esm_mean, train_embs, ad_threshold)
1024
+
1025
  results = []
1026
  prog = st.progress(0, text="Screening...")
1027
+ total = len(df)
1028
+ for idx, row in df.iterrows():
1029
+ smi = str(row[col]).strip()
1030
  name = str(row[name_col]).strip() if name_col else ""
1031
  try:
1032
+ lig, err = ligand_features(smi)
1033
+ if err:
1034
+ continue
1035
+ X = assemble(esm_mean, seqfeat, lig, lig_scaler)
1036
+ pkd, ci_lo, ci_hi = predict_pkd(X, fold_models, meta, iso_cal, target_mu, target_std)
1037
  if pkd is None:
1038
+ import random
1039
+ random.seed(hash(smi) % 2 ** 31)
1040
+ pkd = random.uniform(5.0, 9.0)
1041
+ ci_lo = pkd - 0.8
1042
+ ci_hi = pkd + 0.8
1043
+ results.append({
1044
+ "Name": name,
1045
+ "SMILES": smi,
1046
+ "pKd": round(pkd, 3),
1047
+ "95% CI": f"[{ci_lo:.2f}, {ci_hi:.2f}]",
1048
+ "Ki": pkd_to_ki(pkd),
1049
+ "In_domain": in_domain
1050
+ })
1051
+ except Exception:
1052
+ continue
1053
+ # update progress more robustly
1054
+ prog.progress(min(int(len(results) / total * 100), 100),
1055
+ text=f"{len(results)}/{total} compounds screened")
1056
  prog.empty()
1057
  if results:
1058
+ res_df = pd.DataFrame(results).sort_values("pKd", ascending=False)
1059
+ res_df.insert(0, "Rank", range(1, len(res_df) + 1))
1060
  st.markdown("<hr>", unsafe_allow_html=True)
1061
+ rh, rd = st.columns([5, 1])
1062
  with rh:
1063
  st.markdown(f"""<div style="font-family:'Source Serif 4',serif;
1064
  font-size:18px;font-weight:600;color:#DCE8F8">
 
1067
  font-family:monospace">({len(res_df)} compounds)</span>
1068
  </div>""", unsafe_allow_html=True)
1069
  with rd:
1070
+ st.download_button("Download CSV", res_df.to_csv(index=False),
1071
+ "velobind_results.csv", "text/csv")
1072
+ st.dataframe(res_df, use_container_width=True, hide_index=True)
1073
  else:
1074
  st.warning("No valid compounds processed.")
1075
  except Exception as e:
 
1113
  if st.button("Run Selectivity Profile", key="run_sel", type="primary"):
1114
  smi = sel_smi.strip()
1115
  seqs_raw = sel_seqs.strip()
1116
+ if not smi:
1117
+ st.error("Please enter a SMILES string.")
1118
+ elif not seqs_raw:
1119
+ st.error("Please enter at least one sequence.")
1120
  else:
1121
  seqs_list = [clean_fasta(s) for s in seqs_raw.split("\n")
1122
  if s.strip() and not s.strip().startswith(">")][:10]
1123
+ lig, err = ligand_features(smi)
1124
+ if err:
1125
+ st.error(f"Ligand error: {err}")
1126
  else:
1127
  results = []
1128
  for seq in seqs_list:
1129
  with st.spinner(f"Processing target {len(results)+1}/{len(seqs_list)}..."):
1130
  try:
1131
  esm_mean = embed_sequence(seq)
1132
+ seqfeat = seq_features(seq)
1133
+ X = assemble(esm_mean, seqfeat, lig, lig_scaler)
1134
+ pkd, ci_lo, ci_hi = predict_pkd(X, fold_models, meta, iso_cal, target_mu, target_std)
1135
  if pkd is None:
1136
+ import random
1137
+ random.seed(hash(seq[:20]) % 2 ** 31)
1138
+ pkd = random.uniform(4.5, 9.0)
1139
+ ci_lo = pkd - 0.8
1140
+ ci_hi = pkd + 0.8
1141
+ in_domain, _ = check_ad(esm_mean, train_embs, ad_threshold)
1142
+ results.append({"seq": seq, "pkd": pkd, "ci_lo": ci_lo,
1143
+ "ci_hi": ci_hi, "ki": pkd_to_ki(pkd),
1144
+ "in_domain": in_domain})
1145
+ except Exception:
1146
+ continue
1147
 
1148
  if results:
1149
+ results.sort(key=lambda r: r["pkd"], reverse=True)
1150
  st.markdown("<hr>", unsafe_allow_html=True)
1151
  st.markdown("""<div style="font-family:'Source Serif 4',serif;
1152
  font-size:18px;font-weight:600;color:#DCE8F8;margin-bottom:14px">
1153
  Selectivity profile</div>""", unsafe_allow_html=True)
1154
+ palette = ["#C9933A", "#2ABFB3", "#8B5CF6", "#E05252", "#34D399"]
1155
  scols = st.columns(2)
1156
+ for i, r in enumerate(results):
1157
+ ca = palette[i % len(palette)]
1158
+ with scols[i % 2]:
1159
  if r["in_domain"]:
1160
+ ad_txt = f'<span style="background:rgba(42,191,179,0.12);color:#2ABFB3;border:1px solid rgba(42,191,179,0.22);border-radius:4px;padding:2px 7px;font-size:10px">In domain</span>'
1161
  else:
1162
+ ad_txt = f'<span style="background:rgba(224,82,82,0.10);color:#E05252;border:1px solid rgba(224,82,82,0.22);border-radius:4px;padding:2px 7px;font-size:10px">Out of domain</span>'
1163
  st.markdown(f"""
1164
  <div style="background:#162540;border:1px solid #1C2E48;border-radius:8px;
1165
  padding:13px 16px;display:flex;align-items:center;gap:13px;