Spaces:

Zhaohan-Meng
/

ExplainBind

Sleeping

App Files Files Community

Zhaohan-Meng commited on Feb 19

Commit

a40c3c7

verified ·

1 Parent(s): 11f3a1d

Update app.py

Browse files

Files changed (1) hide show

app.py +185 -67

app.py CHANGED Viewed

@@ -425,7 +425,7 @@ def visualize_attention_and_ranges(
     mode="residue":
         - Aggregate attention over ligand dimension
         - Rank residues by aggregated score
-        - Select Top-K residues (1–20)
         - Default K = 1 (binding pocket discovery)
     Notes
@@ -437,7 +437,7 @@ def visualize_attention_and_ranges(
     assert mode in {"pair", "residue"}
     assert topk_pairs >= 1
-    assert 1 <= topk_residues <= 20
     model.eval()
     with torch.no_grad():
@@ -451,8 +451,9 @@ def visualize_attention_and_ranges(
         # --------------------------------------------------
         # Forward
         # --------------------------------------------------
-        _, att_pd = model(p_emb, d_emb, p_mask, d_mask)
         att = att_pd.squeeze(0)
         # expected: [Ld, Lp, 8] or [8, Ld, Lp]
         # --------------------------------------------------
@@ -514,42 +515,101 @@ def visualize_attention_and_ranges(
                 d_tokens = labels
                 d_indices = list(range(1, len(labels) + 1))
         # --------------------------------------------------
-        # Top-K selection (two modes)
         # --------------------------------------------------
         if mode == "pair":
-            # --- Top-K interaction pairs ---
             flat = att2d.reshape(-1)
             k_eff = min(topk_pairs, flat.numel())
-            idx = torch.topk(flat, k=k_eff).indices
             mask_top = torch.zeros_like(flat, dtype=torch.bool)
-            mask_top[idx] = True
             mask_top = mask_top.view_as(att2d)
         else:
-            # --- Top-K residues ---
-            residue_score = att2d.sum(dim=0)          # [protein]
             k_eff = min(topk_residues, residue_score.numel())
-            topk_res_idx = torch.topk(residue_score, k=k_eff).indices
             mask_top = torch.zeros_like(att2d, dtype=torch.bool)
-            mask_top[:, topk_res_idx] = True           # keep all ligand rows
         # --------------------------------------------------
         # Connected components (visual coherence)
         # --------------------------------------------------
-        p_tokens_orig = p_tokens.copy()
-        d_tokens_orig = d_tokens.copy()
-        components = _connected_components_2d(mask_top)
-        ranges_html = _format_component_table(
-            components,
-            p_tokens_orig,
-            d_tokens_orig,
-            mode=mode,
-        )
         # --------------------------------------------------
@@ -630,8 +690,42 @@ def visualize_attention_and_ranges(
           <img src='data:image/png;base64,{png_b64}' />
         </div>
         """
-        return heat_html, ranges_html
@@ -775,11 +869,30 @@ def inference_cb(prot_seq, drug_seq, head_choice, topk_choice, mode_choice):
     else:
         drug_seq_for_tokenizer = drug_seq_in
     ltype = "selfies"
     ligand_type_flag = "selfies"
     raw_selfies = drug_seq_for_tokenizer
     folder = f"{ptype}_selfies"
     # ------------------------------
     # Load encoders
     # ------------------------------
@@ -846,7 +959,7 @@ def inference_cb(prot_seq, drug_seq, head_choice, topk_choice, mode_choice):
     if mode_choice_clean == "Top-K residues":
         mode = "residue"
         topk_pairs = 1
-        topk_residues = min(20, topk)
     elif mode_choice_clean == "Top-K residues-atom pairs":
         mode = "pair"
@@ -859,11 +972,10 @@ def inference_cb(prot_seq, drug_seq, head_choice, topk_choice, mode_choice):
         topk_pairs = topk
         topk_residues = 1
     # ------------------------------
     # Visualisation
     # ------------------------------
-    heat_html, table_html = visualize_attention_and_ranges(
         model,
         feats,
         head_idx,
@@ -875,12 +987,12 @@ def inference_cb(prot_seq, drug_seq, head_choice, topk_choice, mode_choice):
         ligand_type=ligand_type_flag,
         raw_selfies=raw_selfies,
     )
-    return table_html, heat_html
 def clear_cb():
-    return "", "", "", "", None, ""
 # protein, drug, table, heat, file, status
@@ -1014,6 +1126,12 @@ h1{
   margin-bottom:32px !important;
 }
 """
 with gr.Blocks() as demo:
@@ -1065,47 +1183,53 @@ with gr.Blocks() as demo:
         gr.HTML("""
         <ol style="font-size:1rem;line-height:1.6;margin-left:22px;">
           <li>
-            <strong>Input types:</strong>
-            The model supports protein <em> structure-aware (SA)</em> or <em>FASTA</em> sequences,
-            and ligand <em>SMILES</em> or <em>SELFIES</em>.
           </li>
           <li>
-            <strong>Extract sequence:</strong>
-            (1) Converts <em>SMILES</em> to <em>SELFIES</em>;
-            (2) Extracts an <em>SA</em> sequence from uploaded
-            <code>.pdb</code> or <code>.cif</code> files.
           </li>
           <li>
             <strong>Top-K mode:</strong>
             <ul style="margin-top:6px;">
               <li>
-                <em>Top-K residues-atom pairs</em>:
-                ranks individual protein-residue and ligand-atom pairs by attention score.
               </li>
               <li>
                 <em>Top-K residues</em>:
-                ranks protein residues by attention aggregated over all ligand tokens.
               </li>
             </ul>
           </li>
           <li>
-            <strong>Inference output:</strong>
-            Generates a token-level attention heat map
-            and a corresponding results table
-            based on the selected Top-K mode.
           </li>
         </ol>
         """)
     # ───────────────────────────────
-    # Inputs (left) + Controls (right)
     # ───────────────────────────────
     with gr.Row():
         with gr.Column(elem_classes=["card", "grid-2"]):
             with gr.Column(elem_id="left"):
                 protein_seq = gr.Textbox(
                     label="Protein structure-aware / FASTA sequence",
                     lines=3,
@@ -1135,8 +1259,8 @@ with gr.Blocks() as demo:
                     gr.Examples(
                         examples=[[
-                            "MTLSILVAHDLQRVIGFENQLPWHLPNDLKHVKKLSTGHTLVMGRKTFESIGKPLPNRRNVVLTSDTSFNVEGVDVIHSIEDIYQLPGHVFIFGGQTLFEEMIDKVDDMYITVIEGKFRGDTFFPPYTFEDWEVASSVEGKLDEKNTIPHTFLHLIRKK",
-                            "[C][O][C][=C][C][Branch1][=C][C][C][=C][N][=C][Branch1][C][N][N][=C][Ring1][#Branch1][N][=C][C][Branch1][Ring1][O][C][=C][Ring1][P][O][C]"
                         ]],
                         inputs=[protein_seq, drug_seq],
                         label="Click to load an example",
@@ -1145,16 +1269,15 @@ with gr.Blocks() as demo:
                     btn_load_example = gr.Button(
                         "Load Example",
                         elem_id="example-btn",
-                        variant="secondary"
                     )
                 protein_seq.render()
                 drug_seq.render()
                 btn_extract = gr.Button(
                     "Extract sequences",
                     elem_id="extract-btn"
                 )
-                structure_file.render()
             # ────────────────
             # RIGHT PANEL
@@ -1162,7 +1285,7 @@ with gr.Blocks() as demo:
             with gr.Column(elem_id="right", elem_classes=["right-pane"]):
                 head_dd = gr.Dropdown(
-                    label="Interaction Type/Overall",
                     choices=INTERACTION_NAMES,
                     value="Overall Interaction",
                     interactive=True,
@@ -1200,8 +1323,10 @@ with gr.Blocks() as demo:
     # ───────────────────────────────
     with gr.Column(elem_classes=["card"]):
         status_box   = gr.HTML(elem_id="status-box")
-        output_table = gr.HTML(elem_id="result-table")
-        output_heat  = gr.HTML(elem_id="result-heat")
     # ───────────────────────────────
     # Example Loader Callback
@@ -1209,7 +1334,7 @@ with gr.Blocks() as demo:
     def load_example_cb():
         return (
             "MTLSILVAHDLQRVIGFENQLPWHLPNDLKHVKKLSTGHTLVMGRKTFESIGKPLPNRRNVVLTSDTSFNVEGVDVIHSIEDIYQLPGHVFIFGGQTLFEEMIDKVDDMYITVIEGKFRGDTFFPPYTFEDWEVASSVEGKLDEKNTIPHTFLHLIRKK",
-            "[C][O][C][=C][C][Branch1][=C][C][C][=C][N][=C][Branch1][C][N][N][=C][Ring1][#Branch1][N][=C][C][Branch1][Ring1][O][C][=C][Ring1][P][O][C]"
         )
     # ───────────────────────────────
@@ -1223,22 +1348,14 @@ with gr.Blocks() as demo:
     btn_extract.click(
         fn=extract_sequence_cb,
-        inputs=[
-            structure_file,
-            drug_seq,
-            protein_seq,
-        ],
-        outputs=[
-            protein_seq,
-            drug_seq,
-            status_box,
-        ],
     )
     btn_infer.click(
         fn=inference_cb,
         inputs=[protein_seq, drug_seq, head_dd, top_k_dd, mode_dd],
-        outputs=[output_table, output_heat],
     )
     clear_btn.click(
@@ -1247,13 +1364,14 @@ with gr.Blocks() as demo:
         outputs=[
             protein_seq,
             drug_seq,
-            output_table,
-            output_heat,
             structure_file,
             status_box,
         ],
     )
 demo.launch(
     theme=gr.themes.Default(),
     css=css,

     mode="residue":
         - Aggregate attention over ligand dimension
         - Rank residues by aggregated score
+        - Select Top-K residues (1–100)
         - Default K = 1 (binding pocket discovery)
     Notes
     assert mode in {"pair", "residue"}
     assert topk_pairs >= 1
+    assert 1 <= topk_residues <= 100
     model.eval()
     with torch.no_grad():
         # --------------------------------------------------
         # Forward
         # --------------------------------------------------
+        prob, att_pd = model(p_emb, d_emb, p_mask, d_mask)
         att = att_pd.squeeze(0)
+        prob = prob.item()
         # expected: [Ld, Lp, 8] or [8, Ld, Lp]
         # --------------------------------------------------
                 d_tokens = labels
                 d_indices = list(range(1, len(labels) + 1))
         # --------------------------------------------------
+        # Top-K selection (two modes, STRICT RANKING)
         # --------------------------------------------------
         if mode == "pair":
             flat = att2d.reshape(-1)
             k_eff = min(topk_pairs, flat.numel())
+            topk_vals, topk_idx = torch.topk(flat, k=k_eff)
             mask_top = torch.zeros_like(flat, dtype=torch.bool)
+            mask_top[topk_idx] = True
             mask_top = mask_top.view_as(att2d)
+            rows = []
+            n_cols = att2d.size(1)
+            for rank, (val, linear_idx) in enumerate(zip(topk_vals, topk_idx), start=1):
+                i = (linear_idx // n_cols).item()
+                j = (linear_idx % n_cols).item()
+                rows.append(
+                    f"<tr>"
+                    f"<td style='border:1px solid #ddd;padding:6px'><strong>Top {rank}</strong></td>"
+                    f"<td style='border:1px solid #ddd;padding:6px'>Protein: <strong>{j+1}:{p_tokens[j]}</strong></td>"
+                    f"<td style='border:1px solid #ddd;padding:6px'>Ligand: <strong>{i+1}:{d_tokens[i]}</strong></td>"
+                    f"<td style='border:1px solid #ddd;padding:6px'>Score: <strong>{val.item():.6f}</strong></td>"
+                    f"</tr>"
+                )
+            ranges_html = (
+                "<h4 style='margin:12px 0 6px'>Top-K Interaction Pairs (ranked by attention score)</h4>"
+                "<table style='border-collapse:collapse;margin:6px 0 16px;width:100%'>"
+                "<thead><tr style='background:#f5f5f5'>"
+                "<th style='border:1px solid #ddd;padding:6px'>Rank</th>"
+                "<th style='border:1px solid #ddd;padding:6px'>Protein</th>"
+                "<th style='border:1px solid #ddd;padding:6px'>Ligand</th>"
+                "<th style='border:1px solid #ddd;padding:6px'>Attention Score</th>"
+                "</tr></thead>"
+                f"<tbody>{''.join(rows)}</tbody></table>"
+            )
         else:
+            # --- STRICT Top-K residue ranking ---
+            residue_score = att2d.sum(dim=0)
             k_eff = min(topk_residues, residue_score.numel())
+            topk_vals, topk_res_idx = torch.topk(residue_score, k=k_eff)
             mask_top = torch.zeros_like(att2d, dtype=torch.bool)
+            mask_top[:, topk_res_idx] = True
+            rows = []
+            for rank, (val, j) in enumerate(zip(topk_vals, topk_res_idx), start=1):
+                j = j.item()
+                rows.append(
+                    f"<tr>"
+                    f"<td style='border:1px solid #ddd;padding:6px'><strong>Top {rank}</strong></td>"
+                    f"<td style='border:1px solid #ddd;padding:6px'>"
+                    f"Protein residue: <strong>{j+1}:{p_tokens[j]}</strong>"
+                    f"</td>"
+                    f"<td style='border:1px solid #ddd;padding:6px'>"
+                    f"Aggregated Score: <strong>{val.item():.6f}</strong>"
+                    f"</td>"
+                    f"</tr>"
+                )
+            ranges_html = (
+                "<h4 style='margin:12px 0 6px'>Top-K Residues (ranked by aggregated attention)</h4>"
+                "<table style='border-collapse:collapse;margin:6px 0 16px;width:100%'>"
+                "<thead><tr style='background:#f5f5f5'>"
+                "<th style='border:1px solid #ddd;padding:6px'>Rank</th>"
+                "<th style='border:1px solid #ddd;padding:6px'>Protein Residue</th>"
+                "<th style='border:1px solid #ddd;padding:6px'>Aggregated Score</th>"
+                "</tr></thead>"
+                f"<tbody>{''.join(rows)}</tbody></table>"
+            )
         # --------------------------------------------------
         # Connected components (visual coherence)
         # --------------------------------------------------
+        # p_tokens_orig = p_tokens.copy()
+        # d_tokens_orig = d_tokens.copy()
+        # components = _connected_components_2d(mask_top)
+        # ranges_html = _format_component_table(
+        #     components,
+        #     p_tokens_orig,
+        #     d_tokens_orig,
+        #     mode=mode,
+        # )
         # --------------------------------------------------
           <img src='data:image/png;base64,{png_b64}' />
         </div>
         """
+        # ------------------------------
+        # Probability display card
+        # ------------------------------
+        if prob >= 0.8:
+            bg = "#ecfdf5"
+            border = "#10b981"
+            label = "High binding confidence"
+        elif prob >= 0.4:
+            bg = "#eff6ff"
+            border = "#3b82f6"
+            label = "Moderate binding confidence"
+        else:
+            bg = "#fef2f2"
+            border = "#ef4444"
+            label = "Low binding confidence"
+        prob_html = f"""
+        <div style='margin:10px 0 18px;
+                    padding:14px 16px;
+                    border-left:5px solid {border};
+                    border-radius:12px;
+                    background:{bg};
+                    font-size:1rem'>
+            <div style='font-weight:600;margin-bottom:4px'>
+                Predicted Binding Probability
+            </div>
+            <div style='font-size:1.4rem;font-weight:700'>
+                {prob:.4f}
+            </div>
+            <div style='font-size:0.85rem;color:#64748b;margin-top:4px'>
+                {label}
+            </div>
+        </div>
+        """
+        return prob_html, ranges_html, heat_html
     else:
         drug_seq_for_tokenizer = drug_seq_in
+    # 🔒 强制统一类型
     ltype = "selfies"
     ligand_type_flag = "selfies"
     raw_selfies = drug_seq_for_tokenizer
     folder = f"{ptype}_selfies"
+    # # Ligand normalisation: always tokenise as SELFIES
+    # if ltype == "smiles":
+    #     conv = smiles_to_selfies(drug_seq_in)
+    #     if conv is None:
+    #         return (
+    #             "<p style='color:red'>SMILES→SELFIES conversion failed. "
+    #             "The SMILES appears invalid.</p>",
+    #             "",
+    #         )
+    #     drug_seq_for_tokenizer = conv
+    #     ligand_type_flag = "selfies"
+    # else:
+    #     drug_seq_for_tokenizer = drug_seq_in
+    #     ligand_type_flag = "selfies"
+    # raw_selfies = drug_seq_for_tokenizer if ligand_type_flag == "selfies" else None
     # ------------------------------
     # Load encoders
     # ------------------------------
     if mode_choice_clean == "Top-K residues":
         mode = "residue"
         topk_pairs = 1
+        topk_residues = min(100, topk)
     elif mode_choice_clean == "Top-K residues-atom pairs":
         mode = "pair"
         topk_pairs = topk
         topk_residues = 1
     # ------------------------------
     # Visualisation
     # ------------------------------
+    prob_html, table_html, heat_html = visualize_attention_and_ranges(
         model,
         feats,
         head_idx,
         ligand_type=ligand_type_flag,
         raw_selfies=raw_selfies,
     )
+    full_html = prob_html + table_html + heat_html   # ✅ 强制上下顺序
+    return full_html
 def clear_cb():
+    return "", "", "", None, ""
 # protein, drug, table, heat, file, status
   margin-bottom:32px !important;
 }
+#example-btn {
+    background: #979ea8 !important;
+    color: #1e293b !important;
+}
 """
 with gr.Blocks() as demo:
         gr.HTML("""
         <ol style="font-size:1rem;line-height:1.6;margin-left:22px;">
           <li>
+            <strong>Input formats:</strong>
+            The model accepts <em>structure-aware (SA)</em> or <em>FASTA</em> protein sequences,
+            and <em>SMILES</em> or <em>SELFIES</em> representations for ligands.
+            For SA mode, <code>.pdb</code> or <code>.cif</code> files can be uploaded directly.
           </li>
           <li>
+            <strong>Interaction type selection:</strong>
+            Choose the desired non-covalent interaction type
+            (e.g., overall interaction or specific physicochemical channels)
+            to visualise token-level binding patterns.
           </li>
           <li>
             <strong>Top-K mode:</strong>
             <ul style="margin-top:6px;">
               <li>
+                <em>Top-K residue–atom pairs</em>:
+                ranks individual protein residue–ligand atom pairs
+                according to their attention scores.
               </li>
               <li>
                 <em>Top-K residues</em>:
+                ranks protein residues by aggregating attention
+                across all ligand tokens.
               </li>
             </ul>
           </li>
           <li>
+            <strong>Output:</strong>
+            The demo system reports a predicted binding probability, a ranked Top-K interaction table, and a token-level attention heat map.
           </li>
         </ol>
         """)
     # ───────────────────────────────
+    # Inputs + Controls
     # ───────────────────────────────
     with gr.Row():
         with gr.Column(elem_classes=["card", "grid-2"]):
+            # ────────────────
+            # LEFT PANEL
+            # ────────────────
             with gr.Column(elem_id="left"):
                 protein_seq = gr.Textbox(
                     label="Protein structure-aware / FASTA sequence",
                     lines=3,
                     gr.Examples(
                         examples=[[
+                            "SLALSLTADQMVSALLDAEPPILYSEYDPTRPFSEASMMGLLTNLADRELVHMINWAKRVPGFVDLTSHDQVHLLECAWLEILMIGLVWRSMEHPGKLLFAPNLLLDRNQGKCVEGMVEIFDMLLATSSRFRMMNLQGEEFVCLKSIILLNSGVYTFLSSTLKSLEEKDHIHRVLDKITDTLIHLMAKAGLTLQQQHQRLAQLLLILSHIRHMSNKGMEHLYSMKCKNVVPSYDLLLEMLDA",
+                            "[C][=C][C][=Branch2][Branch1][#C][=C][C][=C][Ring1][=Branch1][C][=C][Branch2][Ring2][#Branch2][C@H1][C@@H1][Branch1][Branch2][C][C@@H1][Ring1][=Branch1][O][Ring1][Branch1][S][=Branch1][C][=O][=Branch1][C][=O][N][Branch1][#Branch2][C][C][Branch1][C][F][Branch1][C][F][F][C][=C][C][=C][Branch1][Branch1][C][=C][Ring1][=Branch1][Cl][C][=C][C][=C][Branch1][Branch1][C][=C][Ring1][=Branch1][O][O]"
                         ]],
                         inputs=[protein_seq, drug_seq],
                         label="Click to load an example",
                     btn_load_example = gr.Button(
                         "Load Example",
                         elem_id="example-btn",
+                        # variant="secondary"
                     )
                 protein_seq.render()
                 drug_seq.render()
+                structure_file.render()
                 btn_extract = gr.Button(
                     "Extract sequences",
                     elem_id="extract-btn"
                 )
             # ────────────────
             # RIGHT PANEL
             with gr.Column(elem_id="right", elem_classes=["right-pane"]):
                 head_dd = gr.Dropdown(
+                    label="Non-covalent interaction type/Overall",
                     choices=INTERACTION_NAMES,
                     value="Overall Interaction",
                     interactive=True,
     # ───────────────────────────────
     with gr.Column(elem_classes=["card"]):
         status_box   = gr.HTML(elem_id="status-box")
+        output_full  = gr.HTML(elem_id="result-full")
     # ───────────────────────────────
     # Example Loader Callback
     def load_example_cb():
         return (
             "MTLSILVAHDLQRVIGFENQLPWHLPNDLKHVKKLSTGHTLVMGRKTFESIGKPLPNRRNVVLTSDTSFNVEGVDVIHSIEDIYQLPGHVFIFGGQTLFEEMIDKVDDMYITVIEGKFRGDTFFPPYTFEDWEVASSVEGKLDEKNTIPHTFLHLIRKK",
+            "[C][=C][C][=Branch2][Branch1][#C][=C][C][=C][Ring1][=Branch1][C][=C][Branch2][Ring2][#Branch2][C@H1][C@@H1][Branch1][Branch2][C][C@@H1][Ring1][=Branch1][O][Ring1][Branch1][S][=Branch1][C][=O][=Branch1][C][=O][N][Branch1][#Branch2][C][C][Branch1][C][F][Branch1][C][F][F][C][=C][C][=C][Branch1][Branch1][C][=C][Ring1][=Branch1][Cl][C][=C][C][=C][Branch1][Branch1][C][=C][Ring1][=Branch1][O][O]"
         )
     # ───────────────────────────────
     btn_extract.click(
         fn=extract_sequence_cb,
+        inputs=[structure_file, drug_seq, protein_seq],
+        outputs=[protein_seq, drug_seq, status_box],
     )
     btn_infer.click(
         fn=inference_cb,
         inputs=[protein_seq, drug_seq, head_dd, top_k_dd, mode_dd],
+        outputs=[output_full],
     )
     clear_btn.click(
         outputs=[
             protein_seq,
             drug_seq,
+            output_full,
             structure_file,
             status_box,
         ],
     )
 demo.launch(
     theme=gr.themes.Default(),
     css=css,