Spaces:
Sleeping
Sleeping
Zhaohan Meng
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,81 +1,36 @@
|
|
| 1 |
-
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
_orig_get_type = _gc_utils.get_type
|
| 6 |
-
_orig_json2py = _gc_utils._json_schema_to_python_type
|
| 7 |
-
|
| 8 |
-
def _patched_get_type(schema):
|
| 9 |
-
# treat any boolean schema as if it were an empty dict
|
| 10 |
-
if isinstance(schema, bool):
|
| 11 |
-
schema = {}
|
| 12 |
-
return _orig_get_type(schema)
|
| 13 |
-
|
| 14 |
-
def _patched_json_schema_to_python_type(schema, defs=None):
|
| 15 |
-
# treat any boolean schema as if it were an empty dict
|
| 16 |
-
if isinstance(schema, bool):
|
| 17 |
-
schema = {}
|
| 18 |
-
return _orig_json2py(schema, defs)
|
| 19 |
-
|
| 20 |
-
_gc_utils.get_type = _patched_get_type
|
| 21 |
-
_gc_utils._json_schema_to_python_type = _patched_json_schema_to_python_type
|
| 22 |
-
|
| 23 |
-
# โโโ now itโs safe to import Gradio and build your interface โโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 24 |
-
import gradio as gr
|
| 25 |
-
import os
|
| 26 |
-
import sys
|
| 27 |
-
import argparse
|
| 28 |
-
import tempfile
|
| 29 |
-
import shutil
|
| 30 |
-
import base64
|
| 31 |
-
import io
|
| 32 |
-
|
| 33 |
-
import torch
|
| 34 |
import selfies
|
| 35 |
from rdkit import Chem
|
|
|
|
|
|
|
|
|
|
| 36 |
import matplotlib
|
| 37 |
matplotlib.use("Agg")
|
| 38 |
import matplotlib.pyplot as plt
|
| 39 |
from matplotlib import cm
|
| 40 |
from typing import Optional
|
| 41 |
|
| 42 |
-
from transformers import EsmForMaskedLM, EsmTokenizer, AutoModel
|
| 43 |
-
from torch.utils.data import DataLoader
|
| 44 |
-
from Bio.PDB import PDBParser, MMCIFParser
|
| 45 |
-
from Bio.Data import IUPACData
|
| 46 |
-
|
| 47 |
from utils.drug_tokenizer import DrugTokenizer
|
|
|
|
| 48 |
from utils.metric_learning_models_att_maps import Pre_encoded, FusionDTI
|
| 49 |
from utils.foldseek_util import get_struc_seq
|
| 50 |
|
| 51 |
-
# โโโโโ
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
def simple_seq_from_structure(path: str) -> str:
|
| 56 |
-
parser = MMCIFParser(QUIET=True) if path.endswith(".cif") else PDBParser(QUIET=True)
|
| 57 |
-
structure = parser.get_structure("P", path)
|
| 58 |
-
chains = list(structure.get_chains())
|
| 59 |
-
if not chains:
|
| 60 |
-
return ""
|
| 61 |
-
chain = max(chains, key=lambda c: len(list(c.get_residues())))
|
| 62 |
-
return "".join(three2one.get(res.get_resname().upper(), "X") for res in chain)
|
| 63 |
-
|
| 64 |
-
def smiles_to_selfies(smiles: str) -> Optional[str]:
|
| 65 |
-
try:
|
| 66 |
-
mol = Chem.MolFromSmiles(smiles)
|
| 67 |
-
if mol is None:
|
| 68 |
-
return None
|
| 69 |
-
return selfies.encoder(smiles)
|
| 70 |
-
except Exception:
|
| 71 |
-
return None
|
| 72 |
|
| 73 |
def parse_config():
|
| 74 |
p = argparse.ArgumentParser()
|
|
|
|
| 75 |
p.add_argument("--prot_encoder_path", default="westlake-repl/SaProt_650M_AF2")
|
| 76 |
p.add_argument("--drug_encoder_path", default="HUBioDataLab/SELFormer")
|
| 77 |
-
p.add_argument("--agg_mode", type=str,
|
| 78 |
p.add_argument("--group_size", type=int, default=1)
|
|
|
|
| 79 |
p.add_argument("--fusion", default="CAN")
|
| 80 |
p.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
|
| 81 |
p.add_argument("--save_path_prefix", default="save_model_ckp/")
|
|
@@ -85,13 +40,16 @@ def parse_config():
|
|
| 85 |
args = parse_config()
|
| 86 |
DEVICE = args.device
|
| 87 |
|
| 88 |
-
# โโโโโ
|
| 89 |
prot_tokenizer = EsmTokenizer.from_pretrained(args.prot_encoder_path)
|
| 90 |
prot_model = EsmForMaskedLM.from_pretrained(args.prot_encoder_path)
|
| 91 |
-
|
|
|
|
| 92 |
drug_model = AutoModel.from_pretrained(args.drug_encoder_path)
|
| 93 |
-
encoding = Pre_encoded(prot_model, drug_model, args).to(DEVICE)
|
| 94 |
|
|
|
|
|
|
|
|
|
|
| 95 |
def collate_fn(batch):
|
| 96 |
query1, query2, scores = zip(*batch)
|
| 97 |
|
|
@@ -117,8 +75,20 @@ def collate_fn(batch):
|
|
| 117 |
attention_mask2 = query_encodings2["attention_mask"].bool()
|
| 118 |
|
| 119 |
return query_encodings1["input_ids"], attention_mask1, query_encodings2["input_ids"], attention_mask2, scores
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
|
|
|
|
| 122 |
def get_case_feature(model, loader):
|
| 123 |
model.eval()
|
| 124 |
with torch.no_grad():
|
|
@@ -130,12 +100,17 @@ def get_case_feature(model, loader):
|
|
| 130 |
p_ids.cpu(), d_ids.cpu(),
|
| 131 |
p_mask.cpu(), d_mask.cpu(), None)]
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
|
| 136 |
"""
|
| 137 |
Render a Protein โ Drug cross-attention heat-map and, optionally, a
|
| 138 |
-
Top-
|
| 139 |
|
| 140 |
The token index shown on the x-axis (and accepted via *drug_idx*) is **the
|
| 141 |
position of that token in the *original* drug sequence**, *after* the
|
|
@@ -234,8 +209,8 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
|
|
| 234 |
plt.close(fig)
|
| 235 |
html = f'<img src="data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}" />'
|
| 236 |
|
| 237 |
-
# โโโโโโโโโโโโโโโโโโโโโ Top-
|
| 238 |
-
table_html = ""
|
| 239 |
if drug_idx is not None:
|
| 240 |
# map original 0-based drug_idx โ current column position
|
| 241 |
if (drug_idx + 1) in d_indices:
|
|
@@ -247,7 +222,7 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
|
|
| 247 |
|
| 248 |
if col_pos is not None:
|
| 249 |
col_vec = attn[:, col_pos]
|
| 250 |
-
topk = torch.topk(col_vec, k=min(
|
| 251 |
|
| 252 |
rank_hdr = "".join(f"<th>{r+1}</th>" for r in range(len(topk)))
|
| 253 |
res_row = "".join(f"<td>{p_tokens[i]}</td>" for i in topk)
|
|
@@ -255,58 +230,24 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
|
|
| 255 |
|
| 256 |
drug_tok_text = d_tokens[col_pos]
|
| 257 |
orig_idx = d_indices[col_pos]
|
| 258 |
-
|
| 259 |
-
# 1) build the header row: leading โRankโ, then 1โฆ30
|
| 260 |
-
header_cells = (
|
| 261 |
-
"<th style='border:1px solid #ccc; padding:6px; "
|
| 262 |
-
"background:#f7f7f7; text-align:center;'>Rank</th>"
|
| 263 |
-
+ "".join(
|
| 264 |
-
f"<th style='border:1px solid #ccc; padding:6px; "
|
| 265 |
-
f"background:#f7f7f7; text-align:center'>{r+1}</th>"
|
| 266 |
-
for r in range(len(topk))
|
| 267 |
-
)
|
| 268 |
-
)
|
| 269 |
-
|
| 270 |
-
# 2) build the residue row: leading โResidueโ, then the residue tokens
|
| 271 |
-
residue_cells = (
|
| 272 |
-
"<th style='border:1px solid #ccc; padding:6px; "
|
| 273 |
-
"background:#f7f7f7; text-align:center;'>Residue</th>"
|
| 274 |
-
+ "".join(
|
| 275 |
-
f"<td style='border:1px solid #ccc; padding:6px; "
|
| 276 |
-
f"text-align:center'>{p_tokens[i]}</td>"
|
| 277 |
-
for i in topk
|
| 278 |
-
)
|
| 279 |
-
)
|
| 280 |
-
|
| 281 |
-
# 3) build the position row: leading โPositionโ, then the residue positions
|
| 282 |
-
position_cells = (
|
| 283 |
-
"<th style='border:1px solid #ccc; padding:6px; "
|
| 284 |
-
"background:#f7f7f7; text-align:center;'>Position</th>"
|
| 285 |
-
+ "".join(
|
| 286 |
-
f"<td style='border:1px solid #ccc; padding:6px; "
|
| 287 |
-
f"text-align:center'>{p_indices[i]}</td>"
|
| 288 |
-
for i in topk
|
| 289 |
-
)
|
| 290 |
-
)
|
| 291 |
-
|
| 292 |
-
# 4) assemble your table_html
|
| 293 |
-
table_html = (
|
| 294 |
-
f"<h4 style='margin-bottom:12px'>"
|
| 295 |
-
f"Drug atom #{orig_idx} <code>{drug_tok_text}</code> โ Top-30 Protein residues"
|
| 296 |
-
f"</h4>"
|
| 297 |
-
f"<table style='border-collapse:collapse; margin:0 auto 24px;'>"
|
| 298 |
-
f"<tr>{header_cells}</tr>"
|
| 299 |
-
f"<tr>{residue_cells}</tr>"
|
| 300 |
-
f"<tr>{position_cells}</tr>"
|
| 301 |
-
f"</table>"
|
| 302 |
-
)
|
| 303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
buf_png = io.BytesIO()
|
| 305 |
-
fig.savefig(buf_png, format="png", dpi=140)
|
| 306 |
buf_png.seek(0)
|
| 307 |
|
| 308 |
buf_pdf = io.BytesIO()
|
| 309 |
-
fig.savefig(buf_pdf, format="pdf")
|
| 310 |
buf_pdf.seek(0)
|
| 311 |
plt.close(fig)
|
| 312 |
|
|
@@ -314,253 +255,228 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
|
|
| 314 |
pdf_b64 = base64.b64encode(buf_pdf.getvalue()).decode()
|
| 315 |
|
| 316 |
html_heat = (
|
| 317 |
-
f"<
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
"text-decoration: none;'>"
|
| 325 |
-
"Download PDF"
|
| 326 |
-
"</a>"
|
| 327 |
-
# the clickable heatโmap image
|
| 328 |
-
f"<a href='data:image/png;base64,{png_b64}' target='_blank' title='Click to enlarge'>"
|
| 329 |
-
f"<img src='data:image/png;base64,{png_b64}' "
|
| 330 |
-
"style='display: block; width: 100%; height: auto; cursor: zoom-in;'/>"
|
| 331 |
-
"</a>"
|
| 332 |
-
"</div>"
|
| 333 |
)
|
| 334 |
|
|
|
|
| 335 |
return table_html + html_heat
|
| 336 |
-
|
| 337 |
-
# โโโโโ Gradio Callbacks โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 338 |
-
|
| 339 |
-
ROOT = os.path.dirname(os.path.abspath(__file__))
|
| 340 |
-
FOLDSEEK_BIN = os.path.join(ROOT, "bin", "foldseek")
|
| 341 |
-
|
| 342 |
-
def extract_sequence_cb(structure_file):
|
| 343 |
-
if structure_file is None or not os.path.exists(structure_file.name):
|
| 344 |
-
return ""
|
| 345 |
-
parsed = get_struc_seq(FOLDSEEK_BIN, structure_file.name, None, plddt_mask=False)
|
| 346 |
-
first_chain = next(iter(parsed))
|
| 347 |
-
_, _, struct_seq = parsed[first_chain]
|
| 348 |
-
return struct_seq
|
| 349 |
-
|
| 350 |
-
def inference_cb(prot_seq, drug_seq, atom_idx):
|
| 351 |
-
if not prot_seq:
|
| 352 |
-
return "<p style='color:red'>Please extract or enter a protein sequence first.</p>"
|
| 353 |
-
if not drug_seq.strip():
|
| 354 |
-
return "<p style='color:red'>Please enter a drug sequence.</p>"
|
| 355 |
-
if not drug_seq.strip().startswith("["):
|
| 356 |
-
conv = smiles_to_selfies(drug_seq.strip())
|
| 357 |
-
if conv is None:
|
| 358 |
-
return "<p style='color:red'>SMILESโSELFIES conversion failed.</p>"
|
| 359 |
-
drug_seq = conv
|
| 360 |
-
loader = DataLoader([(prot_seq, drug_seq, 1)], batch_size=1, collate_fn=collate_fn)
|
| 361 |
-
feats = get_case_feature(encoding, loader)
|
| 362 |
-
model = FusionDTI(446, 768, args).to(DEVICE)
|
| 363 |
-
ckpt = os.path.join(f"{args.save_path_prefix}{args.dataset}_{args.fusion}", "best_model.ckpt")
|
| 364 |
-
if os.path.isfile(ckpt):
|
| 365 |
-
model.load_state_dict(torch.load(ckpt, map_location=DEVICE))
|
| 366 |
-
return visualize_attention(model, feats, int(atom_idx)-1 if atom_idx else None)
|
| 367 |
-
|
| 368 |
-
def clear_cb():
|
| 369 |
-
return None, "", "", None, ""
|
| 370 |
-
|
| 371 |
-
# โโโโโ Gradio Interface Definition โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 372 |
-
|
| 373 |
-
css = """
|
| 374 |
-
:root {
|
| 375 |
-
--bg: #f3f4f6;
|
| 376 |
-
--card: #ffffff;
|
| 377 |
-
--border: #e5e7eb;
|
| 378 |
-
--primary: #6366f1;
|
| 379 |
-
--primary-dark: #4f46e5;
|
| 380 |
-
--text: #111827;
|
| 381 |
-
}
|
| 382 |
-
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 383 |
-
body { background: var(--bg); color: var(--text); font-family: Inter,system-ui,Arial,sans-serif; }
|
| 384 |
-
h1 { font-family: Poppins,Inter,sans-serif; font-weight: 600; font-size: 2rem; text-align: center; margin: 24px 0; }
|
| 385 |
-
button, .gr-button { font-family: Inter,sans-serif; font-weight: 600; }
|
| 386 |
-
#project-links { text-align: center; margin-bottom: 32px; }
|
| 387 |
-
#project-links .gr-button { margin: 0 8px; min-width: 160px; }
|
| 388 |
-
#project-links .gr-button:nth-child(1) { background: #10b981; }
|
| 389 |
-
#project-links .gr-button:nth-child(2) { background: #ef4444; }
|
| 390 |
-
#project-links .gr-button:nth-child(3) { background: #3b82f6; }
|
| 391 |
-
#project-links .gr-button:hover { opacity: 0.9; }
|
| 392 |
-
.link-btn{display:inline-block;margin:0 8px;padding:10px 20px;border-radius:8px;
|
| 393 |
-
color:white;font-weight:600;text-decoration:none;box-shadow:0 2px 6px rgba(0,0,0,0.12);
|
| 394 |
-
transition:all .2s ease-in-out;}
|
| 395 |
-
.link-btn:hover{opacity:.9;}
|
| 396 |
-
.link-btn.project{background:linear-gradient(to right,#10b981,#059669);}
|
| 397 |
-
.link-btn.arxiv {background:linear-gradient(to right,#ef4444,#dc2626);}
|
| 398 |
-
.link-btn.github {background:linear-gradient(to right,#3b82f6,#2563eb);}
|
| 399 |
-
|
| 400 |
-
/* make *all* gradio buttons a bit taller */
|
| 401 |
-
.gr-button { min-height: 10px !important; }
|
| 402 |
-
|
| 403 |
-
/* now target just our two big action buttons */
|
| 404 |
-
#extract-btn, #inference-btn {
|
| 405 |
-
width: 5px !important;
|
| 406 |
-
min-height: 36px !important;
|
| 407 |
-
margin-top: 12px !important;
|
| 408 |
-
}
|
| 409 |
-
|
| 410 |
-
/* and make clear button full width but shorter */
|
| 411 |
-
#clear-btn {
|
| 412 |
-
width: 10px !important;
|
| 413 |
-
min-height: 36px !important;
|
| 414 |
-
margin-top: 12px !important;
|
| 415 |
-
}
|
| 416 |
-
|
| 417 |
-
#input-card label {
|
| 418 |
-
font-weight: 600 !important; /* make the text bold */
|
| 419 |
-
color: var(--text) !important; /* use your standard text color */
|
| 420 |
-
}
|
| 421 |
-
|
| 422 |
-
.card {
|
| 423 |
-
background: var(--card);
|
| 424 |
-
border: 1px solid var(--border);
|
| 425 |
-
border-radius: 12px;
|
| 426 |
-
padding: 24px;
|
| 427 |
-
max-width: 1000px;
|
| 428 |
-
margin: 0 auto 32px;
|
| 429 |
-
box-shadow: 0 2px 6px rgba(0,0,0,0.05);
|
| 430 |
-
}
|
| 431 |
-
|
| 432 |
-
#guidelines-card h2 {
|
| 433 |
-
font-size: 1.4rem;
|
| 434 |
-
margin-bottom: 16px;
|
| 435 |
-
text-align: center;
|
| 436 |
-
}
|
| 437 |
-
#guidelines-card ol {
|
| 438 |
-
margin-left: 20px;
|
| 439 |
-
line-height: 1.6;
|
| 440 |
-
font-size: 1rem;
|
| 441 |
-
}
|
| 442 |
-
#input-card .gr-row, #input-card .gr-cols {
|
| 443 |
-
gap: 16px;
|
| 444 |
-
}
|
| 445 |
-
#input-card .gr-button {
|
| 446 |
-
flex: 1;
|
| 447 |
-
}
|
| 448 |
-
#output-card {
|
| 449 |
-
padding-top: 0;
|
| 450 |
-
}
|
| 451 |
-
"""
|
| 452 |
-
|
| 453 |
-
with gr.Blocks(css=css) as demo:
|
| 454 |
-
# โโโโโโโโโโโโโ Title โโโโโโโโโโโโโ
|
| 455 |
-
gr.Markdown("<h1>Token-level Visualiser for Drug-Target Interaction</h1>")
|
| 456 |
-
|
| 457 |
-
# โโโโโโโโโโโโโ Project Links โโโโโโโโโโโโโ
|
| 458 |
-
gr.Markdown("""
|
| 459 |
-
<div style="text-align:center;margin-bottom:32px;">
|
| 460 |
-
<a class="link-btn project" href="https://zhaohanm.github.io/FusionDTI.github.io/" target="_blank">๐ Project Page</a>
|
| 461 |
-
<a class="link-btn arxiv" href="https://arxiv.org/abs/2406.01651" target="_blank">๐ ArXiv: 2406.01651</a>
|
| 462 |
-
<a class="link-btn github" href="https://github.com/ZhaohanM/FusionDTI" target="_blank">๐ป GitHub Repo</a>
|
| 463 |
-
</div>
|
| 464 |
-
""")
|
| 465 |
-
# โโโโโโโโโโโโโ Guidelines Card โโโโโโโโโโโโโ
|
| 466 |
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
if __name__ == "__main__":
|
| 566 |
-
|
|
|
|
| 1 |
+
import os, sys, argparse, tempfile, shutil, base64, io
|
| 2 |
+
from flask import Flask, request, render_template_string
|
| 3 |
+
from werkzeug.utils import secure_filename
|
| 4 |
+
from torch.utils.data import DataLoader
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
import selfies
|
| 6 |
from rdkit import Chem
|
| 7 |
+
import app as gr
|
| 8 |
+
|
| 9 |
+
import torch
|
| 10 |
import matplotlib
|
| 11 |
matplotlib.use("Agg")
|
| 12 |
import matplotlib.pyplot as plt
|
| 13 |
from matplotlib import cm
|
| 14 |
from typing import Optional
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
from utils.drug_tokenizer import DrugTokenizer
|
| 17 |
+
from transformers import EsmForMaskedLM, EsmTokenizer, AutoModel
|
| 18 |
from utils.metric_learning_models_att_maps import Pre_encoded, FusionDTI
|
| 19 |
from utils.foldseek_util import get_struc_seq
|
| 20 |
|
| 21 |
+
# โโโโโ global paths / args โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 22 |
+
FOLDSEEK_BIN = shutil.which("foldseek")
|
| 23 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 24 |
+
sys.path.append("..")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
def parse_config():
|
| 27 |
p = argparse.ArgumentParser()
|
| 28 |
+
p.add_argument("-f")
|
| 29 |
p.add_argument("--prot_encoder_path", default="westlake-repl/SaProt_650M_AF2")
|
| 30 |
p.add_argument("--drug_encoder_path", default="HUBioDataLab/SELFormer")
|
| 31 |
+
p.add_argument("--agg_mode", default="mean_all_tok", type=str, help="{cls|mean|mean_all_tok}")
|
| 32 |
p.add_argument("--group_size", type=int, default=1)
|
| 33 |
+
p.add_argument("--lr", type=float, default=1e-4)
|
| 34 |
p.add_argument("--fusion", default="CAN")
|
| 35 |
p.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
|
| 36 |
p.add_argument("--save_path_prefix", default="save_model_ckp/")
|
|
|
|
| 40 |
args = parse_config()
|
| 41 |
DEVICE = args.device
|
| 42 |
|
| 43 |
+
# โโโโโ tokenisers & encoders โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 44 |
prot_tokenizer = EsmTokenizer.from_pretrained(args.prot_encoder_path)
|
| 45 |
prot_model = EsmForMaskedLM.from_pretrained(args.prot_encoder_path)
|
| 46 |
+
|
| 47 |
+
drug_tokenizer = DrugTokenizer() # SELFIES
|
| 48 |
drug_model = AutoModel.from_pretrained(args.drug_encoder_path)
|
|
|
|
| 49 |
|
| 50 |
+
encoding = Pre_encoded(prot_model, drug_model, args).to(DEVICE)
|
| 51 |
+
|
| 52 |
+
# โโโ collate fn โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 53 |
def collate_fn(batch):
|
| 54 |
query1, query2, scores = zip(*batch)
|
| 55 |
|
|
|
|
| 75 |
attention_mask2 = query_encodings2["attention_mask"].bool()
|
| 76 |
|
| 77 |
return query_encodings1["input_ids"], attention_mask1, query_encodings2["input_ids"], attention_mask2, scores
|
| 78 |
+
# def collate_fn_batch_encoding(batch):
|
| 79 |
+
|
| 80 |
+
def smiles_to_selfies(smiles: str) -> Optional[str]:
|
| 81 |
+
try:
|
| 82 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 83 |
+
if mol is None:
|
| 84 |
+
return None
|
| 85 |
+
selfies_str = selfies.encoder(smiles)
|
| 86 |
+
return selfies_str
|
| 87 |
+
except Exception:
|
| 88 |
+
return None
|
| 89 |
|
| 90 |
|
| 91 |
+
# โโโโโ single-case embedding โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 92 |
def get_case_feature(model, loader):
|
| 93 |
model.eval()
|
| 94 |
with torch.no_grad():
|
|
|
|
| 100 |
p_ids.cpu(), d_ids.cpu(),
|
| 101 |
p_mask.cpu(), d_mask.cpu(), None)]
|
| 102 |
|
| 103 |
+
# โโโโโ helper๏ผ่ฟๆปค็นๆฎ token โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 104 |
+
def clean_tokens(ids, tokenizer):
|
| 105 |
+
toks = tokenizer.convert_ids_to_tokens(ids.tolist())
|
| 106 |
+
return [t for t in toks if t not in tokenizer.all_special_tokens]
|
| 107 |
+
|
| 108 |
+
# โโโโโ visualisation โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 109 |
+
|
| 110 |
def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
|
| 111 |
"""
|
| 112 |
Render a Protein โ Drug cross-attention heat-map and, optionally, a
|
| 113 |
+
Top-20 protein-residue table for a chosen drug-token index.
|
| 114 |
|
| 115 |
The token index shown on the x-axis (and accepted via *drug_idx*) is **the
|
| 116 |
position of that token in the *original* drug sequence**, *after* the
|
|
|
|
| 209 |
plt.close(fig)
|
| 210 |
html = f'<img src="data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}" />'
|
| 211 |
|
| 212 |
+
# โโโโโโโโโโโโโโโโโโโโโ ็ๆ Top-20 ่กจ๏ผ่ฅ้่ฆ๏ผ โโโโโโโโโโโโโโโโโโโโโ
|
| 213 |
+
table_html = "" # ๅ
่ฎพ็ฉบไธฒ๏ผๆนไพฟๅ้ข็ปไธๆผๆฅ
|
| 214 |
if drug_idx is not None:
|
| 215 |
# map original 0-based drug_idx โ current column position
|
| 216 |
if (drug_idx + 1) in d_indices:
|
|
|
|
| 222 |
|
| 223 |
if col_pos is not None:
|
| 224 |
col_vec = attn[:, col_pos]
|
| 225 |
+
topk = torch.topk(col_vec, k=min(20, len(col_vec))).indices.tolist()
|
| 226 |
|
| 227 |
rank_hdr = "".join(f"<th>{r+1}</th>" for r in range(len(topk)))
|
| 228 |
res_row = "".join(f"<td>{p_tokens[i]}</td>" for i in topk)
|
|
|
|
| 230 |
|
| 231 |
drug_tok_text = d_tokens[col_pos]
|
| 232 |
orig_idx = d_indices[col_pos]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
+
table_html = (
|
| 235 |
+
f"<h4 style='margin-bottom:6px'>"
|
| 236 |
+
f"Drug token #{orig_idx} <code>{drug_tok_text}</code> "
|
| 237 |
+
f"โ Top-20 Protein residues</h4>"
|
| 238 |
+
"<table class='tg' style='margin-bottom:8px'>"
|
| 239 |
+
f"<tr><th>Rank</th>{rank_hdr}</tr>"
|
| 240 |
+
f"<tr><td>Residue</td>{res_row}</tr>"
|
| 241 |
+
f"<tr><td>Position</td>{pos_row}</tr>"
|
| 242 |
+
"</table>")
|
| 243 |
+
|
| 244 |
+
# โโโโโโโโโโโโโโโโโโ ็ๆๅฏๆพๅคง + ๅฏไธ่ฝฝ็็ญๅพ โโโโโโโโโโโโโโโโโโโโ
|
| 245 |
buf_png = io.BytesIO()
|
| 246 |
+
fig.savefig(buf_png, format="png", dpi=140) # ้ข่ง๏ผๅ
ๆ
๏ผ
|
| 247 |
buf_png.seek(0)
|
| 248 |
|
| 249 |
buf_pdf = io.BytesIO()
|
| 250 |
+
fig.savefig(buf_pdf, format="pdf") # ้ซๆธ
ไธ่ฝฝ๏ผ็ข้๏ผ
|
| 251 |
buf_pdf.seek(0)
|
| 252 |
plt.close(fig)
|
| 253 |
|
|
|
|
| 255 |
pdf_b64 = base64.b64encode(buf_pdf.getvalue()).decode()
|
| 256 |
|
| 257 |
html_heat = (
|
| 258 |
+
f"<a href='data:image/png;base64,{png_b64}' target='_blank' "
|
| 259 |
+
f"title='Click to enlarge'>"
|
| 260 |
+
f"<img src='data:image/png;base64,{png_b64}' "
|
| 261 |
+
f"style='max-width:100%;height:auto;cursor:zoom-in' /></a>"
|
| 262 |
+
f"<div style='margin-top:6px'>"
|
| 263 |
+
f"<a href='data:application/pdf;base64,{pdf_b64}' "
|
| 264 |
+
f"download='attention_heatmap.pdf'>Download PDF</a></div>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
)
|
| 266 |
|
| 267 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโ ่ฟๅๆ็ป HTML โโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 268 |
return table_html + html_heat
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
+
def inference(protein_seq, drug_seq, drug_idx, structure_file):
|
| 271 |
+
# โโ ่ฟไธๅๆขๆ Gradio ๅๆไปถ่ทฏๅพ โโ
|
| 272 |
+
if structure_file is not None and os.path.exists(structure_file.name):
|
| 273 |
+
tmp_structure_path = structure_file.name
|
| 274 |
+
else:
|
| 275 |
+
return "<p style='color:red'>่ฏทๅ
ไธไผ ไธไธชๆๆ็ .pdb ๆ .cif ๆไปถใ</p>"
|
| 276 |
+
|
| 277 |
+
# ่ฐ็จ foldseek
|
| 278 |
+
try:
|
| 279 |
+
parsed = get_struc_seq(FOLDSEEK_BIN, tmp_structure_path, ["A"], plddt_mask=False)
|
| 280 |
+
chain = next(iter(parsed))
|
| 281 |
+
protein_seq = parsed[chain][2]
|
| 282 |
+
except Exception as e:
|
| 283 |
+
return f"<p style='color:red'>Foldseek ๆๅๅคฑ่ดฅ๏ผ{e}</p>"
|
| 284 |
+
|
| 285 |
+
# โโโโโ Flask app โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 286 |
+
app = Flask(__name__)
|
| 287 |
+
|
| 288 |
+
@app.route("/", methods=["GET", "POST"])
|
| 289 |
+
def index():
|
| 290 |
+
protein_seq = drug_seq = structure_seq = ""; result_html = None
|
| 291 |
+
tmp_structure_path = ""; drug_idx = None
|
| 292 |
+
|
| 293 |
+
if request.method == "POST":
|
| 294 |
+
drug_idx_raw = request.form.get("drug_idx", "")
|
| 295 |
+
drug_idx = int(drug_idx_raw)-1 if drug_idx_raw.isdigit() else None
|
| 296 |
+
|
| 297 |
+
struct = request.files.get("structure_file")
|
| 298 |
+
if struct and struct.filename:
|
| 299 |
+
tmp_dir = tempfile.mkdtemp(prefix="foldseek_")
|
| 300 |
+
safe_name = secure_filename(struct.filename)
|
| 301 |
+
tmp_structure_path = os.path.join(tmp_dir, safe_name)
|
| 302 |
+
struct.save(tmp_structure_path)
|
| 303 |
+
else:
|
| 304 |
+
tmp_structure_path = request.form.get("tmp_structure_path", "")
|
| 305 |
+
|
| 306 |
+
if "clear" in request.form:
|
| 307 |
+
protein_seq = drug_seq = structure_seq = ""; tmp_structure_path = ""
|
| 308 |
+
|
| 309 |
+
elif "confirm_structure" in request.form and tmp_structure_path:
|
| 310 |
+
try:
|
| 311 |
+
parsed_seqs = get_struc_seq(FOLDSEEK_BIN, tmp_structure_path, ["A"], plddt_mask=False)["A"]
|
| 312 |
+
seq, foldseek_seq, structure_seq = parsed_seqs # ็จๅฎๅๆธ
้ค็ฎๅฝ
|
| 313 |
+
except Exception as e:
|
| 314 |
+
result_html = (
|
| 315 |
+
"<p style='color:red'><strong>Foldseek failed to extract sequence "
|
| 316 |
+
f"from structure: {e}</strong></p>")
|
| 317 |
+
structure_seq = ""
|
| 318 |
+
|
| 319 |
+
protein_seq = structure_seq
|
| 320 |
+
drug_input = request.form.get("drug_sequence", "")
|
| 321 |
+
# Heuristically check if input is SMILES (not starting with [) and convert
|
| 322 |
+
if not drug_input.strip().startswith("["):
|
| 323 |
+
converted = smiles_to_selfies(drug_input.strip())
|
| 324 |
+
if converted:
|
| 325 |
+
drug_seq = converted
|
| 326 |
+
else:
|
| 327 |
+
drug_seq = ""
|
| 328 |
+
result_html = "<p style='color:red'><strong>Failed to convert SMILES to SELFIES. Please check the input string.</strong></p>"
|
| 329 |
+
else:
|
| 330 |
+
drug_seq = drug_input
|
| 331 |
+
|
| 332 |
+
elif "Inference" in request.form:
|
| 333 |
+
protein_seq = request.form.get("protein_sequence", "")
|
| 334 |
+
drug_seq = request.form.get("drug_sequence", "")
|
| 335 |
+
if protein_seq and drug_seq:
|
| 336 |
+
loader = DataLoader([(protein_seq, drug_seq, 1)], batch_size=1,
|
| 337 |
+
collate_fn=collate_fn)
|
| 338 |
+
feats = get_case_feature(encoding, loader)
|
| 339 |
+
model = FusionDTI(446, 768, args).to(DEVICE)
|
| 340 |
+
ckpt = os.path.join(f"{args.save_path_prefix}{args.dataset}_{args.fusion}",
|
| 341 |
+
"best_model.ckpt")
|
| 342 |
+
if os.path.isfile(ckpt):
|
| 343 |
+
model.load_state_dict(torch.load(ckpt, map_location=DEVICE))
|
| 344 |
+
result_html = visualize_attention(model, feats, drug_idx)
|
| 345 |
+
|
| 346 |
+
return render_template_string(
|
| 347 |
+
# โโโโโโโโโโโโโ HTML (ๅ UI + ๆฐ่พๅ
ฅๆก) โโโโโโโโโโโโโ
|
| 348 |
+
"""
|
| 349 |
+
<!doctype html>
|
| 350 |
+
<html lang="en"><head><meta charset="utf-8"><title>FusionDTI </title>
|
| 351 |
+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Poppins:wght@500;600&display=swap" rel="stylesheet">
|
| 352 |
+
|
| 353 |
+
<style>
|
| 354 |
+
:root{--bg:#f3f4f6;--card:#fff;--primary:#6366f1;--primary-dark:#4f46e5;--text:#111827;--border:#e5e7eb;}
|
| 355 |
+
*{box-sizing:border-box;margin:0;padding:0}
|
| 356 |
+
body{background:var(--bg);color:var(--text);font-family:Inter,system-ui,Arial,sans-serif;line-height:1.5;padding:32px 12px;}
|
| 357 |
+
h1{font-family:Poppins,Inter,sans-serif;font-weight:600;font-size:1.7rem;text-align:center;margin-bottom:28px;letter-spacing:-.2px;}
|
| 358 |
+
.card{max-width:1000px;margin:0 auto;background:var(--card);border:1px solid var(--border);
|
| 359 |
+
border-radius:12px;box-shadow:0 2px 6px rgba(0,0,0,.05);padding:32px 36px;}
|
| 360 |
+
label{font-weight:500;margin-bottom:6px;display:block}
|
| 361 |
+
textarea,input[type=file]{width:100%;font-size:.9rem;font-family:monospace;padding:10px 12px;
|
| 362 |
+
border:1px solid var(--border);border-radius:8px;background:#fff;resize:vertical;}
|
| 363 |
+
textarea{min-height:90px}
|
| 364 |
+
.btn{appearance:none;border:none;cursor:pointer;padding:12px 22px;border-radius:8px;font-weight:500;
|
| 365 |
+
font-family:Inter,sans-serif;transition:all .18s ease;color:#fff;}
|
| 366 |
+
.btn-primary{background:var(--primary)}.btn-primary:hover{background:var(--primary-dark)}
|
| 367 |
+
.btn-neutral{background:#9ca3af;}.btn-neutral:hover{background:#6b7280}
|
| 368 |
+
.grid{display:grid;gap:22px}.grid-2{grid-template-columns:1fr 1fr}
|
| 369 |
+
.vis-box{margin-top:28px;border:1px solid var(--border);border-radius:10px;overflow:auto;max-height:72vh;}
|
| 370 |
+
pre{white-space:pre-wrap;word-break:break-all;font-family:monospace;margin-top:8px}
|
| 371 |
+
|
| 372 |
+
/* โโ tidy table for Top-20 list โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ */
|
| 373 |
+
table.tg{border-collapse:collapse;margin-top:4px;font-size:0.83rem}
|
| 374 |
+
table.tg th,table.tg td{border:1px solid var(--border);padding:6px 8px;text-align:left}
|
| 375 |
+
table.tg th{background:var(--bg);font-weight:600}
|
| 376 |
+
</style>
|
| 377 |
+
</head>
|
| 378 |
+
<body>
|
| 379 |
+
<h1> Token-level Visualiser for Drug-Target Interaction</h1>
|
| 380 |
+
|
| 381 |
+
<!-- โโโโโโโโโโโโโ Project Links (larger + spaced) โโโโโโโโโโโโโ -->
|
| 382 |
+
<div style="margin-top:24px; text-align:center;">
|
| 383 |
+
<a href="https://zhaohanm.github.io/FusionDTI.github.io/" target="_blank"
|
| 384 |
+
style="display:inline-block;margin:8px 18px;padding:10px 20px;
|
| 385 |
+
background:linear-gradient(to right,#10b981,#059669);color:white;
|
| 386 |
+
font-weight:600;border-radius:8px;font-size:0.9rem;
|
| 387 |
+
font-family:Inter,sans-serif;text-decoration:none;
|
| 388 |
+
box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
|
| 389 |
+
onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
|
| 390 |
+
๐ Project Page
|
| 391 |
+
</a>
|
| 392 |
+
|
| 393 |
+
<a href="https://arxiv.org/abs/2406.01651" target="_blank"
|
| 394 |
+
style="display:inline-block;margin:8px 18px;padding:10px 20px;
|
| 395 |
+
background:linear-gradient(to right,#ef4444,#dc2626);color:white;
|
| 396 |
+
font-weight:600;border-radius:8px;font-size:0.9rem;
|
| 397 |
+
font-family:Inter,sans-serif;text-decoration:none;
|
| 398 |
+
box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
|
| 399 |
+
onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
|
| 400 |
+
๐ ArXiv: 2406.01651
|
| 401 |
+
</a>
|
| 402 |
+
|
| 403 |
+
<a href="https://github.com/ZhaohanM/FusionDTI" target="_blank"
|
| 404 |
+
style="display:inline-block;margin:8px 18px;padding:10px 20px;
|
| 405 |
+
background:linear-gradient(to right,#3b82f6,#2563eb);color:white;
|
| 406 |
+
font-weight:600;border-radius:8px;font-size:0.9rem;
|
| 407 |
+
font-family:Inter,sans-serif;text-decoration:none;
|
| 408 |
+
box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
|
| 409 |
+
onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
|
| 410 |
+
๐ป GitHub Repo
|
| 411 |
+
</a>
|
| 412 |
+
</div>
|
| 413 |
+
|
| 414 |
+
<!-- โโโโโโโโโโโโโ Guidelines for Use โโโโโโโโโโโโโ -->
|
| 415 |
+
<div class="card" style="margin-bottom:24px">
|
| 416 |
+
<h2 style="font-size:1.2rem;margin-bottom:14px">Guidelines for Use</h2>
|
| 417 |
+
<ul style="margin-left:18px;line-height:1.55;list-style:decimal;">
|
| 418 |
+
<li><strong>Convert protein structure into a structure-aware sequence:</strong>
|
| 419 |
+
Upload a <code>.pdb</code> or <code>.cif</code> file. A structure-aware
|
| 420 |
+
sequence will be generated using
|
| 421 |
+
<a href="https://github.com/steineggerlab/foldseek" target="_blank">Foldseek</a>,
|
| 422 |
+
based on 3D structures from
|
| 423 |
+
<a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold DB</a> or the
|
| 424 |
+
<a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>.</li>
|
| 425 |
+
|
| 426 |
+
<li><strong>If you only have an amino acid sequence or a UniProt ID,</strong>
|
| 427 |
+
you must first visit the
|
| 428 |
+
<a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>
|
| 429 |
+
or <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold DB</a>
|
| 430 |
+
to search and download the corresponding <code>.cif</code> or <code>.pdb</code> file.</li>
|
| 431 |
+
|
| 432 |
+
<li><strong>Drug input supports both SELFIES and SMILES:</strong><br>
|
| 433 |
+
You can enter a SELFIES string directly, or paste a SMILES string.
|
| 434 |
+
SMILES will be automatically converted to SELFIES using
|
| 435 |
+
<a href="https://github.com/aspuru-guzik-group/selfies" target="_blank">SELFIES encoder</a>.
|
| 436 |
+
If conversion fails, a red error message will be displayed.</li>
|
| 437 |
+
|
| 438 |
+
<li>Optionally enter a <strong>1-based</strong> drug atom or substructure index
|
| 439 |
+
to highlight the Top-10 interacting protein residues.</li>
|
| 440 |
+
|
| 441 |
+
<li>After inference, you can use the
|
| 442 |
+
โDownload PDFโ link to export a high-resolution vector version.</li>
|
| 443 |
+
</ul>
|
| 444 |
+
</div>
|
| 445 |
+
|
| 446 |
+
<div class="card">
|
| 447 |
+
<form method="POST" enctype="multipart/form-data" class="grid">
|
| 448 |
+
|
| 449 |
+
<div><label>Protein Structure (.pdb / .cif)</label>
|
| 450 |
+
<input type="file" name="structure_file">
|
| 451 |
+
<input type="hidden" name="tmp_structure_path" value="{{ tmp_structure_path }}"></div>
|
| 452 |
+
|
| 453 |
+
<div><label>Protein Sequence</label>
|
| 454 |
+
<textarea name="protein_sequence" placeholder="Confirm / paste sequenceโฆ">{{ protein_seq }}</textarea></div>
|
| 455 |
+
|
| 456 |
+
<div><label>Drug Sequence (SELFIES/SMILES)</label>
|
| 457 |
+
<textarea name="drug_sequence" placeholder="[C][C][O]/cco โฆ">{{ drug_seq }}</textarea></div>
|
| 458 |
+
|
| 459 |
+
<label>Drug atom/substructure index (1-based) โ show Top-10 related protein residue</label>
|
| 460 |
+
<input type="number" name="drug_idx" min="1" style="width:120px">
|
| 461 |
+
|
| 462 |
+
<div class="grid grid-2">
|
| 463 |
+
<button class="btn btn-primary" type="Inference" name="confirm_structure">Confirm Structure</button>
|
| 464 |
+
<button class="btn btn-primary" type="Inference" name="Inference">Inference</button>
|
| 465 |
+
</div>
|
| 466 |
+
<button class="btn btn-neutral" style="width:100%" type="Inference" name="clear">Clear</button>
|
| 467 |
+
</form>
|
| 468 |
+
|
| 469 |
+
{% if structure_seq %}
|
| 470 |
+
<div style="margin-top:18px"><strong>Structure-aware sequence:</strong><pre>{{ structure_seq }}</pre></div>
|
| 471 |
+
{% endif %}
|
| 472 |
+
{% if result_html %}
|
| 473 |
+
<div class="vis-box" style="margin-top:26px">{{ result_html|safe }}</div>
|
| 474 |
+
{% endif %}
|
| 475 |
+
</div></body></html>
|
| 476 |
+
""",
|
| 477 |
+
protein_seq=protein_seq, drug_seq=drug_seq, structure_seq=structure_seq,
|
| 478 |
+
result_html=result_html, tmp_structure_path=tmp_structure_path)
|
| 479 |
+
|
| 480 |
+
# โโโโโ run โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 481 |
if __name__ == "__main__":
|
| 482 |
+
app.run(debug=True, host="0.0.0.0", port=7860)
|