Spaces:

EZ4Fanta
/

bindana

Sleeping

App Files Files Community

EZ4Fanta commited on May 29, 2025

Commit

4d885c1

1 Parent(s): e58b2fc

update

Browse files

Files changed (4) hide show

.gitignore +3 -1
app.py +52 -25
requirements.txt +2 -2
utils.py +183 -42

.gitignore CHANGED Viewed

@@ -1,7 +1,9 @@
 static/dna.html
 static/dna.pdb
 static/test.cif
 test.py
 *.ipynb
 __pycache__
-*.pyc

 static/dna.html
 static/dna.pdb
 static/test.cif
+static/tmp
 test.py
 *.ipynb
 __pycache__
+*.pyc
+/pdbfixer

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import gradio as gr
 import py3Dmol
 import json
@@ -47,12 +48,19 @@ def render_structure(structure_str, summary, entity_color_dict):
 def render_html(view, entity_color_dict):
-    output = view._make_html().replace("'", '"').replace(
-        'height: 233px;',
-        'height: 700px; max-height: 100%;'
     ).replace(
-        'width: 233px;',
-        'width: 100%;'
     )
     # 构建图例
@@ -64,15 +72,22 @@ def render_html(view, entity_color_dict):
     ])
     legend_html = f"<div style='margin-top:20px; text-align:center;'>{legend_items}</div>".replace("'", '"')
-    html_content = f"""<!DOCTYPE html><html><body><center>{output}</center>{legend_html}</body></html>"""
     html_framework = f"""<iframe style=\"width: 100%; height: 800px;\" name=\"result\"
                         allow=\"midi; geolocation; microphone; camera; display-capture; encrypted-media;\"
                         sandbox=\"allow-modals allow-forms allow-scripts allow-same-origin allow-popups
                         allow-top-navigation-by-user-activation allow-downloads\"
                         allowfullscreen=\"\" allowpaymentrequest=\"\" frameborder=\"0\"
                         srcdoc='{html_content}'></iframe>"""
     return html_framework
@@ -80,7 +95,7 @@ def analyze_contacts(selected_str, cutoff, structure_cache):
     keys = selected_str
     if len(keys) < 2:
         debug_text = "<b style='color:red'>请至少选择两个实体进行分析</b>"
-        return debug_text, debug_text
     summary = structure_cache["summary"]
     structure_str = structure_cache["structure_str"]
     entity_color_dict = structure_cache["entity_color_dict"]
@@ -99,7 +114,7 @@ def load_structure(file_path):
     structure_str, summary, entity_color_dict, structure_dict = read_file(file_path)
     view = render_structure(structure_str, summary, entity_color_dict)
     html_out = render_html(view, entity_color_dict)
-    return html_out, gr.Dropdown(label="选择实体", choices=list(summary.keys()), interactive=True), structure_dict
 def update_selected(selected, current):
     if selected in current:
@@ -114,25 +129,37 @@ def delete_selected(selected, current):
 def clear_selected():
     return ""
 with gr.Blocks() as demo:
     gr.HTML(get_text_content("static/gr_head.html"))
     gr.Markdown(get_text_content("static/gr_head.md"))
     structure_cache = gr.State(value={"structure_str": None, "summary": None, "entity_color_dict": None})
-    output = gr.HTML()
-    with gr.Row():
-        # TODO: 增加对 CIF 文件的支持
-        file_input = gr.File(label="上传PDB文件", file_types=[".pdb"], scale=1)
-        with gr.Column(scale=2):
-            with gr.Row():
-                with gr.Column(scale=2):
-                    entity_selector = gr.Dropdown(choices=[], interactive=True, multiselect=True, label="选择实体")
-                with gr.Column(scale=1):
-                    cutoff_slider = gr.Slider(1, 10, value=4.0, step=0.5, label="Cutoff 距离 (Å)")
-            run_btn = gr.Button("分析并渲染")
     debug_text = gr.Textbox(label="调试信息", interactive=False)
@@ -143,7 +170,7 @@ with gr.Blocks() as demo:
     )
     file_input.change(
-        fn=lambda f: load_structure(f.name) if f else "<b style='color:red'>请上传文件</b>",
         inputs=file_input,
         outputs=[output, entity_selector, structure_cache]
     )

+import html
 import gradio as gr
 import py3Dmol
 import json
 def render_html(view, entity_color_dict):
+    # output = view._make_html().replace("'", '"').replace(
+    #     'height: 233px;',
+    #     'height: 700px; max-height: 100%;'
+    # ).replace(
+    #     'width: 233px;',
+    #     'width: 100%;'
+    # )
+    output = view._make_html().replace(
+        "height: 233px;",
+        "height: 700px; max-height: 100%;"
     ).replace(
+        "width: 233px;",
+        "width: 100%;"
     )
     # 构建图例
     ])
     legend_html = f"<div style='margin-top:20px; text-align:center;'>{legend_items}</div>".replace("'", '"')
+    # 对 output 和 legend_html 进行 HTML 转义
+    escaped_output = html.escape(output)
+    escaped_legend_html = html.escape(legend_html)
+    # 构建完整的 HTML 内容
+    html_content = f"""<!DOCTYPE html><html><body><center>{escaped_output}</center>{escaped_legend_html}</body></html>"""
     html_framework = f"""<iframe style=\"width: 100%; height: 800px;\" name=\"result\"
                         allow=\"midi; geolocation; microphone; camera; display-capture; encrypted-media;\"
                         sandbox=\"allow-modals allow-forms allow-scripts allow-same-origin allow-popups
                         allow-top-navigation-by-user-activation allow-downloads\"
                         allowfullscreen=\"\" allowpaymentrequest=\"\" frameborder=\"0\"
                         srcdoc='{html_content}'></iframe>"""
+    # save the HTML content to a static
+    with open(os.path.join(TEMP_DIR, "structure_view.html"), "w") as f:
+        f.write(html_content)
     return html_framework
     keys = selected_str
     if len(keys) < 2:
         debug_text = "<b style='color:red'>请至少选择两个实体进行分析</b>"
+        return gr.update(), debug_text
     summary = structure_cache["summary"]
     structure_str = structure_cache["structure_str"]
     entity_color_dict = structure_cache["entity_color_dict"]
     structure_str, summary, entity_color_dict, structure_dict = read_file(file_path)
     view = render_structure(structure_str, summary, entity_color_dict)
     html_out = render_html(view, entity_color_dict)
+    return html_out, gr.Dropdown(label="选择实体", choices=list(summary.keys()), interactive=True, value=[]), structure_dict
 def update_selected(selected, current):
     if selected in current:
 def clear_selected():
     return ""
+def handle_file_upload(file):
+    if file:
+        return load_structure(file.name)
+    else:
+        # 如果文件为空，保持当前状态
+        return gr.update(), gr.update(), gr.update()
 with gr.Blocks() as demo:
     gr.HTML(get_text_content("static/gr_head.html"))
     gr.Markdown(get_text_content("static/gr_head.md"))
     structure_cache = gr.State(value={"structure_str": None, "summary": None, "entity_color_dict": None})
+    with gr.Tab("Single Structure"):
+        output = gr.HTML()
+        with gr.Row():
+            # TODO: 增加对 CIF 文件的支持
+            file_input = gr.File(label="上传PDB文件", file_types=[".pdb"], scale=1)
+            with gr.Column(scale=2):
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        entity_selector = gr.Dropdown(choices=[], interactive=True, multiselect=True, label="选择实体")
+                    with gr.Column(scale=1):
+                        cutoff_slider = gr.Slider(1, 10, value=3.5, step=0.5, label="Cutoff 距离 (Å)")
+                run_btn = gr.Button("分析并渲染")
+    with gr.Tab("Multi Structure"):
+        pass
     debug_text = gr.Textbox(label="调试信息", interactive=False)
     )
     file_input.change(
+        fn=handle_file_upload,
         inputs=file_input,
         outputs=[output, entity_selector, structure_cache]
     )

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 Bio==1.8.0
 biopython==1.85
-gradio==5.30.0
-py3Dmol==2.4.2
 py3Dmol==2.4.2

 Bio==1.8.0
 biopython==1.85
+gradio==5.31.0
 py3Dmol==2.4.2
+py3Dmol==2.4.2

utils.py CHANGED Viewed

@@ -1,66 +1,141 @@
-from Bio.PDB import MMCIFParser, PDBParser
 from collections import defaultdict
 import colorsys
 import hashlib
-from Bio.PDB import MMCIFParser, PDBParser
 from collections import defaultdict
 import colorsys
 import hashlib
 from Bio.PDB.NeighborSearch import NeighborSearch
 def get_text_content(file_path="static/gr_head.md"):
     with open(file_path, "r", encoding="utf-8") as f:
         return f.read()
 # 自动生成科研风格低饱和度颜色
-def generate_color(name):
     hash_digest = hashlib.md5(name.encode()).hexdigest()
-    hue = int(hash_digest[:6], 16) % 360 / 360.0
     lightness = 0.75
-    saturation = 0.4
     rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
     return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
 # 自动生成科研风格高饱和度颜色
-def generate_high_saturation_color(name):
     hash_digest = hashlib.md5(name.encode()).hexdigest()
-    hue = int(hash_digest[:6], 16) % 360 / 360.0
-    lightness = 0.5
-    saturation = 0.9
     rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
     return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
 # 给定实体名列表，生成颜色映射字典
 def build_entity_color_dict(entity_list):
-    return {name: generate_color(name) for name in entity_list}
 def classify_residue(residue):
     hetfield, resseq, icode = residue.id
     resname = residue.resname.strip()
-    nucleic_acids = {
-        "A", "G", "C", "U", "T",
-        "DA", "DG", "DC", "DT", "DU"
-    }
     if resname == 'HOH':
         return 'Ignore', None
-    elif hetfield.startswith("H_"):
         ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"}
         if resname in ions:
-            return 'Ion', f"{resname} (ion)"
-        return 'Ligand', f"{resname} (ligand)"
     elif hetfield == " ":
-        if resname in nucleic_acids:
             if resname.startswith("D"):
                 return 'DNA', f"Chain {residue.get_parent().id} (DNA)"
             else:
                 return 'RNA', f"Chain {residue.get_parent().id} (RNA)"
-        return 'Protein', f"Chain {residue.get_parent().id} (protein)"
     else:
-        return 'Other', f"{resname} (other)"
 def analyze_structure_combined(file_path):
     if file_path.endswith(".cif"):
@@ -91,13 +166,12 @@ def analyze_structure_combined(file_path):
     return summary
-def extract_contact_residues(results, selected_keys, cutoff=4.0):
     entity_atoms = {key: [] for key in selected_keys}
     atom_to_residue_info = {}
-    # 从 results 中提取 atom
     for key in selected_keys:
-        for entry in results[key]:
             residue = entry['residue']
             for atom in residue:
                 entity_atoms[key].append(atom)
@@ -105,24 +179,27 @@ def extract_contact_residues(results, selected_keys, cutoff=4.0):
     all_atoms = sum(entity_atoms.values(), [])
     ns = NeighborSearch(all_atoms)
     contact_summary = defaultdict(set)
     seen = set()
-    for i, key1 in enumerate(selected_keys):
-        for key2 in selected_keys[i+1:]:
-            for atom1 in entity_atoms[key1]:
-                neighbors = ns.search(atom1.coord, cutoff)
-                for atom2 in neighbors:
-                    if atom2 in entity_atoms[key2]:
-                        res1 = atom1.get_parent()
-                        res2 = atom2.get_parent()
-                        tag1 = (key1, res1.id)
-                        tag2 = (key2, res2.id)
-                        if (tag1, tag2) not in seen and (tag2, tag1) not in seen:
-                            seen.add((tag1, tag2))
-                            contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
-                            contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
     contact_summary_final = {
         key: [
@@ -134,6 +211,56 @@ def extract_contact_residues(results, selected_keys, cutoff=4.0):
     return contact_summary_final
 def set_default_styles(viewer, summary, entity_color_dict,
                        add_label=False):
@@ -142,7 +269,7 @@ def set_default_styles(viewer, summary, entity_color_dict,
         label_style = {
                 'fontOpacity':1,
                 'backgroundColor': 'black',
-                'fontColor': color,
                 'fontSize': 10,
             }
         # 只处理蛋白、DNA、RNA等链，不处理ligand等小分子
@@ -152,6 +279,20 @@ def set_default_styles(viewer, summary, entity_color_dict,
             viewer.setStyle({'chain': chain_id}, {'cartoon': {'color': color}})
             if add_label:
                 viewer.addLabel(entity, label_style, {'chain':chain_id})
         else:
             # 例如 ligand，stick 显示
             for entry in summary[entity]:
@@ -178,9 +319,9 @@ def highlight_residues(viewer, residue_list, name='name',
         {'chain': 'A', 'resn': 'ASP', 'resi': '40'},
     ]
     """
-    color = generate_high_saturation_color(name)
-    label_color = color
-    background_color = generate_color(name)
     label_style = {
         'fontOpacity':1,
         'showBackground': True,

 from collections import defaultdict
 import colorsys
 import hashlib
+import numpy as np
+from tempfile import NamedTemporaryFile
+from Bio.PDB import MMCIFParser, PDBParser, PDBList
 from collections import defaultdict
 import colorsys
 import hashlib
 from Bio.PDB.NeighborSearch import NeighborSearch
+from Bio.PDB import PDBList
+from tempfile import NamedTemporaryFile
+import os
+def fetch_pdb_by_id(pdb_id):
+    pdbl = PDBList()
+    with NamedTemporaryFile(delete=False, suffix=f'.{pdb_id}.pdb') as temp_file:
+        save_dir = os.path.dirname(temp_file.name)
+        file_path = pdbl.retrieve_pdb_file(pdb_id, pdir=save_dir, file_format='pdb')
+        # Biopython返回的路径通常为 `pdbxxxx.ent` 形式
+        if not os.path.exists(file_path):
+            print(f"Download failed: File for {pdb_id} not found.")
+            return "not found"
+        # 检查文件是否为有效 PDB 文件（简单判断）
+        with open(file_path, 'r') as f:
+            content = f.read()
+            if not (content.startswith('HEADER') or 'ATOM' in content or 'HETATM' in content):
+                print(f"Invalid PDB file for {pdb_id}")
+                return "invalid"
+        # 移动到期望的临时文件路径
+        os.rename(file_path, temp_file.name)
+        print(f"PDB file for {pdb_id} saved in {temp_file.name}")
+        return temp_file.name
+NUCLEIC_ACIDS = {
+    "A", "G", "C", "U", "T",
+    "DA", "DG", "DC", "DT", "DU"
+}
+AMINO_ACIDS = {
+    "ALA", "ARG", "ASN", "ASP", "CYS",
+    "GLN", "GLU", "GLY", "HIS", "ILE",
+    "LEU", "LYS", "MET", "PHE", "PRO",
+    "SER", "THR", "TRP", "TYR", "VAL",
+    "SEC", "PYL"
+}
+def find_donor_hydrogens(atom):
+    if atom.element not in {'N', 'O'}:
+        return []
+    hydrogens = []
+    for neighbor in atom.get_parent():
+        if neighbor.element == 'H':
+            hydrogens.append(neighbor)
+    return hydrogens
+def is_acceptor(atom):
+    return atom.element in {'N', 'O'}
+def calculate_angle(atom1, atom2, atom3):
+    v1 = atom1.coord - atom2.coord
+    v2 = atom3.coord - atom2.coord
+    cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
+    cos_theta = np.clip(cos_theta, -1.0, 1.0)
+    return np.degrees(np.arccos(cos_theta))
 def get_text_content(file_path="static/gr_head.md"):
     with open(file_path, "r", encoding="utf-8") as f:
         return f.read()
 # 自动生成科研风格低饱和度颜色
+def generate_color_low(name):
     hash_digest = hashlib.md5(name.encode()).hexdigest()
+    hue = int(hash_digest, 16) % 360 / 360.0
+    # 让 hue 分桶，减少颜色接近
+    # hash_int = int(hash_digest, 16)
+    # hue_bins = 24  # 24 个颜色桶
+    # hue = (hash_int % hue_bins) / hue_bins
     lightness = 0.75
+    saturation = 0.3
     rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
     return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
 # 自动生成科研风格高饱和度颜色
+def generate_color(name, lightness=0.5, saturation=0.9):
     hash_digest = hashlib.md5(name.encode()).hexdigest()
+    hue = int(hash_digest, 16) % 360 / 360.0
     rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
     return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
+def generate_color_high(name):
+    return generate_color(name, lightness=0.5, saturation=0.9)
+def generate_color_dark(name):
+    return generate_color(name, lightness=0.2, saturation=0.9)
+def generate_color_bright(name):
+    return generate_color(name, lightness=0.9, saturation=0.9)
 # 给定实体名列表，生成颜色映射字典
 def build_entity_color_dict(entity_list):
+    return {name: generate_color_low(name) for name in entity_list}
 def classify_residue(residue):
     hetfield, resseq, icode = residue.id
     resname = residue.resname.strip()
     if resname == 'HOH':
         return 'Ignore', None
+    # 判断 HETATM 中的氨基酸是否为 peptide
+    if hetfield.startswith("H_"):
+        if resname in AMINO_ACIDS:
+            return 'Peptide', f"Chain {residue.get_parent().id} (Peptide)"
         ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"}
         if resname in ions:
+            return 'Ion', f"{resname} (Ion)"
+        return 'Ligand', f"{resname} (Ligand)"
     elif hetfield == " ":
+        if resname in NUCLEIC_ACIDS:
             if resname.startswith("D"):
                 return 'DNA', f"Chain {residue.get_parent().id} (DNA)"
             else:
                 return 'RNA', f"Chain {residue.get_parent().id} (RNA)"
+        return 'Protein', f"Chain {residue.get_parent().id} (Protein)"
     else:
+        return 'Other', f"{resname} (Other)"
 def analyze_structure_combined(file_path):
     if file_path.endswith(".cif"):
     return summary
+def extract_contact_residues(summary, selected_keys, cutoff=4.0):
     entity_atoms = {key: [] for key in selected_keys}
     atom_to_residue_info = {}
     for key in selected_keys:
+        for entry in summary[key]:
             residue = entry['residue']
             for atom in residue:
                 entity_atoms[key].append(atom)
     all_atoms = sum(entity_atoms.values(), [])
     ns = NeighborSearch(all_atoms)
+    close_contacts = ns.search_all(cutoff, level='A')
     contact_summary = defaultdict(set)
     seen = set()
+    for atom1, atom2 in close_contacts:
+        if atom1 == atom2:
+            continue
+        key1, res1 = atom_to_residue_info.get(atom1, (None, None))
+        key2, res2 = atom_to_residue_info.get(atom2, (None, None))
+        if key1 is None or key2 is None or key1 == key2:
+            continue
+        tag1 = (key1, res1.id)
+        tag2 = (key2, res2.id)
+        if (tag1, tag2) in seen or (tag2, tag1) in seen:
+            continue
+        seen.add((tag1, tag2))
+        contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
+        contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
     contact_summary_final = {
         key: [
     return contact_summary_final
+def extract_polar_contacts(summary, contact_summary, cutoff=3.5, angle_cutoff=120.0):
+    polar_summary = defaultdict(set)
+    residue_lookup = {}
+    # 建立残基索引
+    for key in summary:
+        for entry in summary[key]:
+            residue = entry['residue']
+            residue_lookup[(key, residue.get_parent().id, str(residue.id[1]), residue.resname.strip())] = residue
+    # 遍历 contact_summary
+    for key1, contacts1 in contact_summary.items():
+        for entry1 in contacts1:
+            res1 = residue_lookup.get((key1, entry1['chain'], entry1['resi'], entry1['resn']))
+            if res1 is None:
+                continue
+            for key2, contacts2 in contact_summary.items():
+                if key1 == key2:
+                    continue
+                for entry2 in contacts2:
+                    res2 = residue_lookup.get((key2, entry2['chain'], entry2['resi'], entry2['resn']))
+                    if res2 is None:
+                        continue
+                    for atom1 in res1:
+                        donor_hs = find_donor_hydrogens(atom1)
+                        for atom2 in res2:
+                            if is_acceptor(atom2):
+                                for h in donor_hs:
+                                    dist = np.linalg.norm(h.coord - atom2.coord)
+                                    if dist > cutoff:
+                                        continue
+                                    angle = calculate_angle(atom1, h, atom2)
+                                    if angle >= angle_cutoff:
+                                        polar_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
+                                        polar_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
+    polar_contact_summary_final = {
+        key: [
+            {'chain': c, 'resi': r, 'resn': n}
+            for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
+        ]
+        for key, res_set in polar_summary.items()
+    }
+    return polar_contact_summary_final
 def set_default_styles(viewer, summary, entity_color_dict,
                        add_label=False):
         label_style = {
                 'fontOpacity':1,
                 'backgroundColor': 'black',
+                'fontColor': generate_color_bright(entity),
                 'fontSize': 10,
             }
         # 只处理蛋白、DNA、RNA等链，不处理ligand等小分子
             viewer.setStyle({'chain': chain_id}, {'cartoon': {'color': color}})
             if add_label:
                 viewer.addLabel(entity, label_style, {'chain':chain_id})
+        # 处理离子 (entity 后缀有 (ion))
+        elif '(ion)' in entity.lower():
+            element = entity.split()[0].upper()  # 提取元素符号，如 MG, NA
+            for entry in summary.get(entity, []):
+                # 有的PDB离子的 chain 是空字符，需要处理
+                chain = entry.get('chain', '').strip()
+                resi = entry.get('resi', '').strip()
+                # 用 sphere 表示离子
+                sel = {'resi': resi}
+                if chain:
+                    sel['chain'] = chain
+                viewer.setStyle(sel, {'sphere': {'color': color, 'radius': 2.0}})
+                if add_label:
+                    viewer.addLabel(entity, label_style, sel)
         else:
             # 例如 ligand，stick 显示
             for entry in summary[entity]:
         {'chain': 'A', 'resn': 'ASP', 'resi': '40'},
     ]
     """
+    color = generate_color_high(name)
+    label_color = generate_color_dark(name)
+    background_color = generate_color_low(name)
     label_style = {
         'fontOpacity':1,
         'showBackground': True,