update
Browse files- .gitignore +3 -1
- app.py +52 -25
- requirements.txt +2 -2
- utils.py +183 -42
.gitignore
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
static/dna.html
|
| 2 |
static/dna.pdb
|
| 3 |
static/test.cif
|
|
|
|
| 4 |
test.py
|
| 5 |
*.ipynb
|
| 6 |
__pycache__
|
| 7 |
-
*.pyc
|
|
|
|
|
|
| 1 |
static/dna.html
|
| 2 |
static/dna.pdb
|
| 3 |
static/test.cif
|
| 4 |
+
static/tmp
|
| 5 |
test.py
|
| 6 |
*.ipynb
|
| 7 |
__pycache__
|
| 8 |
+
*.pyc
|
| 9 |
+
/pdbfixer
|
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import py3Dmol
|
| 3 |
import json
|
|
@@ -47,12 +48,19 @@ def render_structure(structure_str, summary, entity_color_dict):
|
|
| 47 |
|
| 48 |
|
| 49 |
def render_html(view, entity_color_dict):
|
| 50 |
-
output = view._make_html().replace("'", '"').replace(
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
).replace(
|
| 54 |
-
|
| 55 |
-
|
| 56 |
)
|
| 57 |
# 构建图例
|
| 58 |
|
|
@@ -64,15 +72,22 @@ def render_html(view, entity_color_dict):
|
|
| 64 |
])
|
| 65 |
legend_html = f"<div style='margin-top:20px; text-align:center;'>{legend_items}</div>".replace("'", '"')
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
html_framework = f"""<iframe style=\"width: 100%; height: 800px;\" name=\"result\"
|
| 70 |
allow=\"midi; geolocation; microphone; camera; display-capture; encrypted-media;\"
|
| 71 |
sandbox=\"allow-modals allow-forms allow-scripts allow-same-origin allow-popups
|
| 72 |
allow-top-navigation-by-user-activation allow-downloads\"
|
| 73 |
allowfullscreen=\"\" allowpaymentrequest=\"\" frameborder=\"0\"
|
| 74 |
srcdoc='{html_content}'></iframe>"""
|
| 75 |
-
|
|
|
|
|
|
|
| 76 |
return html_framework
|
| 77 |
|
| 78 |
|
|
@@ -80,7 +95,7 @@ def analyze_contacts(selected_str, cutoff, structure_cache):
|
|
| 80 |
keys = selected_str
|
| 81 |
if len(keys) < 2:
|
| 82 |
debug_text = "<b style='color:red'>请至少选择两个实体进行分析</b>"
|
| 83 |
-
return
|
| 84 |
summary = structure_cache["summary"]
|
| 85 |
structure_str = structure_cache["structure_str"]
|
| 86 |
entity_color_dict = structure_cache["entity_color_dict"]
|
|
@@ -99,7 +114,7 @@ def load_structure(file_path):
|
|
| 99 |
structure_str, summary, entity_color_dict, structure_dict = read_file(file_path)
|
| 100 |
view = render_structure(structure_str, summary, entity_color_dict)
|
| 101 |
html_out = render_html(view, entity_color_dict)
|
| 102 |
-
return html_out, gr.Dropdown(label="选择实体", choices=list(summary.keys()), interactive=True), structure_dict
|
| 103 |
|
| 104 |
def update_selected(selected, current):
|
| 105 |
if selected in current:
|
|
@@ -114,25 +129,37 @@ def delete_selected(selected, current):
|
|
| 114 |
def clear_selected():
|
| 115 |
return ""
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
with gr.Blocks() as demo:
|
| 118 |
gr.HTML(get_text_content("static/gr_head.html"))
|
| 119 |
gr.Markdown(get_text_content("static/gr_head.md"))
|
| 120 |
structure_cache = gr.State(value={"structure_str": None, "summary": None, "entity_color_dict": None})
|
| 121 |
-
|
| 122 |
-
output = gr.HTML()
|
| 123 |
|
| 124 |
-
with gr.
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
debug_text = gr.Textbox(label="调试信息", interactive=False)
|
| 138 |
|
|
@@ -143,7 +170,7 @@ with gr.Blocks() as demo:
|
|
| 143 |
)
|
| 144 |
|
| 145 |
file_input.change(
|
| 146 |
-
fn=
|
| 147 |
inputs=file_input,
|
| 148 |
outputs=[output, entity_selector, structure_cache]
|
| 149 |
)
|
|
|
|
| 1 |
+
import html
|
| 2 |
import gradio as gr
|
| 3 |
import py3Dmol
|
| 4 |
import json
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
def render_html(view, entity_color_dict):
|
| 51 |
+
# output = view._make_html().replace("'", '"').replace(
|
| 52 |
+
# 'height: 233px;',
|
| 53 |
+
# 'height: 700px; max-height: 100%;'
|
| 54 |
+
# ).replace(
|
| 55 |
+
# 'width: 233px;',
|
| 56 |
+
# 'width: 100%;'
|
| 57 |
+
# )
|
| 58 |
+
output = view._make_html().replace(
|
| 59 |
+
"height: 233px;",
|
| 60 |
+
"height: 700px; max-height: 100%;"
|
| 61 |
).replace(
|
| 62 |
+
"width: 233px;",
|
| 63 |
+
"width: 100%;"
|
| 64 |
)
|
| 65 |
# 构建图例
|
| 66 |
|
|
|
|
| 72 |
])
|
| 73 |
legend_html = f"<div style='margin-top:20px; text-align:center;'>{legend_items}</div>".replace("'", '"')
|
| 74 |
|
| 75 |
+
# 对 output 和 legend_html 进行 HTML 转义
|
| 76 |
+
escaped_output = html.escape(output)
|
| 77 |
+
escaped_legend_html = html.escape(legend_html)
|
| 78 |
+
|
| 79 |
+
# 构建完整的 HTML 内容
|
| 80 |
+
html_content = f"""<!DOCTYPE html><html><body><center>{escaped_output}</center>{escaped_legend_html}</body></html>"""
|
| 81 |
+
|
| 82 |
html_framework = f"""<iframe style=\"width: 100%; height: 800px;\" name=\"result\"
|
| 83 |
allow=\"midi; geolocation; microphone; camera; display-capture; encrypted-media;\"
|
| 84 |
sandbox=\"allow-modals allow-forms allow-scripts allow-same-origin allow-popups
|
| 85 |
allow-top-navigation-by-user-activation allow-downloads\"
|
| 86 |
allowfullscreen=\"\" allowpaymentrequest=\"\" frameborder=\"0\"
|
| 87 |
srcdoc='{html_content}'></iframe>"""
|
| 88 |
+
# save the HTML content to a static
|
| 89 |
+
with open(os.path.join(TEMP_DIR, "structure_view.html"), "w") as f:
|
| 90 |
+
f.write(html_content)
|
| 91 |
return html_framework
|
| 92 |
|
| 93 |
|
|
|
|
| 95 |
keys = selected_str
|
| 96 |
if len(keys) < 2:
|
| 97 |
debug_text = "<b style='color:red'>请至少选择两个实体进行分析</b>"
|
| 98 |
+
return gr.update(), debug_text
|
| 99 |
summary = structure_cache["summary"]
|
| 100 |
structure_str = structure_cache["structure_str"]
|
| 101 |
entity_color_dict = structure_cache["entity_color_dict"]
|
|
|
|
| 114 |
structure_str, summary, entity_color_dict, structure_dict = read_file(file_path)
|
| 115 |
view = render_structure(structure_str, summary, entity_color_dict)
|
| 116 |
html_out = render_html(view, entity_color_dict)
|
| 117 |
+
return html_out, gr.Dropdown(label="选择实体", choices=list(summary.keys()), interactive=True, value=[]), structure_dict
|
| 118 |
|
| 119 |
def update_selected(selected, current):
|
| 120 |
if selected in current:
|
|
|
|
| 129 |
def clear_selected():
|
| 130 |
return ""
|
| 131 |
|
| 132 |
+
def handle_file_upload(file):
|
| 133 |
+
if file:
|
| 134 |
+
return load_structure(file.name)
|
| 135 |
+
else:
|
| 136 |
+
# 如果文件为空,保持当前状态
|
| 137 |
+
return gr.update(), gr.update(), gr.update()
|
| 138 |
+
|
| 139 |
+
|
| 140 |
with gr.Blocks() as demo:
|
| 141 |
gr.HTML(get_text_content("static/gr_head.html"))
|
| 142 |
gr.Markdown(get_text_content("static/gr_head.md"))
|
| 143 |
structure_cache = gr.State(value={"structure_str": None, "summary": None, "entity_color_dict": None})
|
|
|
|
|
|
|
| 144 |
|
| 145 |
+
with gr.Tab("Single Structure"):
|
| 146 |
+
output = gr.HTML()
|
| 147 |
+
with gr.Row():
|
| 148 |
+
# TODO: 增加对 CIF 文件的支持
|
| 149 |
+
file_input = gr.File(label="上传PDB文件", file_types=[".pdb"], scale=1)
|
| 150 |
+
with gr.Column(scale=2):
|
| 151 |
+
with gr.Row():
|
| 152 |
+
with gr.Column(scale=2):
|
| 153 |
+
entity_selector = gr.Dropdown(choices=[], interactive=True, multiselect=True, label="选择实体")
|
| 154 |
+
|
| 155 |
+
with gr.Column(scale=1):
|
| 156 |
+
cutoff_slider = gr.Slider(1, 10, value=3.5, step=0.5, label="Cutoff 距离 (Å)")
|
| 157 |
+
|
| 158 |
+
run_btn = gr.Button("分析并渲染")
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
with gr.Tab("Multi Structure"):
|
| 162 |
+
pass
|
| 163 |
|
| 164 |
debug_text = gr.Textbox(label="调试信息", interactive=False)
|
| 165 |
|
|
|
|
| 170 |
)
|
| 171 |
|
| 172 |
file_input.change(
|
| 173 |
+
fn=handle_file_upload,
|
| 174 |
inputs=file_input,
|
| 175 |
outputs=[output, entity_selector, structure_cache]
|
| 176 |
)
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
Bio==1.8.0
|
| 2 |
biopython==1.85
|
| 3 |
-
gradio==5.
|
| 4 |
-
py3Dmol==2.4.2
|
| 5 |
py3Dmol==2.4.2
|
|
|
|
|
|
| 1 |
Bio==1.8.0
|
| 2 |
biopython==1.85
|
| 3 |
+
gradio==5.31.0
|
|
|
|
| 4 |
py3Dmol==2.4.2
|
| 5 |
+
py3Dmol==2.4.2
|
utils.py
CHANGED
|
@@ -1,66 +1,141 @@
|
|
| 1 |
-
from Bio.PDB import MMCIFParser, PDBParser
|
| 2 |
from collections import defaultdict
|
| 3 |
import colorsys
|
| 4 |
import hashlib
|
|
|
|
| 5 |
|
| 6 |
-
from
|
|
|
|
|
|
|
| 7 |
from collections import defaultdict
|
| 8 |
import colorsys
|
| 9 |
import hashlib
|
| 10 |
from Bio.PDB.NeighborSearch import NeighborSearch
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
def get_text_content(file_path="static/gr_head.md"):
|
| 14 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 15 |
return f.read()
|
| 16 |
|
| 17 |
# 自动生成科研风格低饱和度颜色
|
| 18 |
-
def
|
| 19 |
hash_digest = hashlib.md5(name.encode()).hexdigest()
|
| 20 |
-
hue = int(hash_digest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
lightness = 0.75
|
| 22 |
-
saturation = 0.
|
| 23 |
rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
|
| 24 |
return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
|
| 25 |
|
| 26 |
# 自动生成科研风格高饱和度颜色
|
| 27 |
-
def
|
| 28 |
hash_digest = hashlib.md5(name.encode()).hexdigest()
|
| 29 |
-
hue = int(hash_digest
|
| 30 |
-
lightness = 0.5
|
| 31 |
-
saturation = 0.9
|
| 32 |
rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
|
| 33 |
return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# 给定实体名列表,生成颜色映射字典
|
| 36 |
def build_entity_color_dict(entity_list):
|
| 37 |
-
return {name:
|
| 38 |
|
| 39 |
def classify_residue(residue):
|
| 40 |
hetfield, resseq, icode = residue.id
|
| 41 |
resname = residue.resname.strip()
|
| 42 |
|
| 43 |
-
nucleic_acids = {
|
| 44 |
-
"A", "G", "C", "U", "T",
|
| 45 |
-
"DA", "DG", "DC", "DT", "DU"
|
| 46 |
-
}
|
| 47 |
-
|
| 48 |
if resname == 'HOH':
|
| 49 |
return 'Ignore', None
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"}
|
| 52 |
if resname in ions:
|
| 53 |
-
return 'Ion', f"{resname} (
|
| 54 |
-
return 'Ligand', f"{resname} (
|
|
|
|
| 55 |
elif hetfield == " ":
|
| 56 |
-
if resname in
|
| 57 |
if resname.startswith("D"):
|
| 58 |
return 'DNA', f"Chain {residue.get_parent().id} (DNA)"
|
| 59 |
else:
|
| 60 |
return 'RNA', f"Chain {residue.get_parent().id} (RNA)"
|
| 61 |
-
return 'Protein', f"Chain {residue.get_parent().id} (
|
| 62 |
else:
|
| 63 |
-
return 'Other', f"{resname} (
|
|
|
|
| 64 |
|
| 65 |
def analyze_structure_combined(file_path):
|
| 66 |
if file_path.endswith(".cif"):
|
|
@@ -91,13 +166,12 @@ def analyze_structure_combined(file_path):
|
|
| 91 |
|
| 92 |
return summary
|
| 93 |
|
| 94 |
-
def extract_contact_residues(
|
| 95 |
entity_atoms = {key: [] for key in selected_keys}
|
| 96 |
atom_to_residue_info = {}
|
| 97 |
|
| 98 |
-
# 从 results 中提取 atom
|
| 99 |
for key in selected_keys:
|
| 100 |
-
for entry in
|
| 101 |
residue = entry['residue']
|
| 102 |
for atom in residue:
|
| 103 |
entity_atoms[key].append(atom)
|
|
@@ -105,24 +179,27 @@ def extract_contact_residues(results, selected_keys, cutoff=4.0):
|
|
| 105 |
|
| 106 |
all_atoms = sum(entity_atoms.values(), [])
|
| 107 |
ns = NeighborSearch(all_atoms)
|
|
|
|
| 108 |
|
| 109 |
contact_summary = defaultdict(set)
|
| 110 |
seen = set()
|
| 111 |
|
| 112 |
-
for
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
| 126 |
|
| 127 |
contact_summary_final = {
|
| 128 |
key: [
|
|
@@ -134,6 +211,56 @@ def extract_contact_residues(results, selected_keys, cutoff=4.0):
|
|
| 134 |
|
| 135 |
return contact_summary_final
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
def set_default_styles(viewer, summary, entity_color_dict,
|
| 139 |
add_label=False):
|
|
@@ -142,7 +269,7 @@ def set_default_styles(viewer, summary, entity_color_dict,
|
|
| 142 |
label_style = {
|
| 143 |
'fontOpacity':1,
|
| 144 |
'backgroundColor': 'black',
|
| 145 |
-
'fontColor':
|
| 146 |
'fontSize': 10,
|
| 147 |
}
|
| 148 |
# 只处理蛋白、DNA、RNA等链,不处理ligand等小分子
|
|
@@ -152,6 +279,20 @@ def set_default_styles(viewer, summary, entity_color_dict,
|
|
| 152 |
viewer.setStyle({'chain': chain_id}, {'cartoon': {'color': color}})
|
| 153 |
if add_label:
|
| 154 |
viewer.addLabel(entity, label_style, {'chain':chain_id})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
else:
|
| 156 |
# 例如 ligand,stick 显示
|
| 157 |
for entry in summary[entity]:
|
|
@@ -178,9 +319,9 @@ def highlight_residues(viewer, residue_list, name='name',
|
|
| 178 |
{'chain': 'A', 'resn': 'ASP', 'resi': '40'},
|
| 179 |
]
|
| 180 |
"""
|
| 181 |
-
color =
|
| 182 |
-
label_color =
|
| 183 |
-
background_color =
|
| 184 |
label_style = {
|
| 185 |
'fontOpacity':1,
|
| 186 |
'showBackground': True,
|
|
|
|
|
|
|
| 1 |
from collections import defaultdict
|
| 2 |
import colorsys
|
| 3 |
import hashlib
|
| 4 |
+
import numpy as np
|
| 5 |
|
| 6 |
+
from tempfile import NamedTemporaryFile
|
| 7 |
+
|
| 8 |
+
from Bio.PDB import MMCIFParser, PDBParser, PDBList
|
| 9 |
from collections import defaultdict
|
| 10 |
import colorsys
|
| 11 |
import hashlib
|
| 12 |
from Bio.PDB.NeighborSearch import NeighborSearch
|
| 13 |
|
| 14 |
+
from Bio.PDB import PDBList
|
| 15 |
+
from tempfile import NamedTemporaryFile
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
+
def fetch_pdb_by_id(pdb_id):
|
| 19 |
+
pdbl = PDBList()
|
| 20 |
+
with NamedTemporaryFile(delete=False, suffix=f'.{pdb_id}.pdb') as temp_file:
|
| 21 |
+
save_dir = os.path.dirname(temp_file.name)
|
| 22 |
+
file_path = pdbl.retrieve_pdb_file(pdb_id, pdir=save_dir, file_format='pdb')
|
| 23 |
+
|
| 24 |
+
# Biopython返回的路径通常为 `pdbxxxx.ent` 形式
|
| 25 |
+
if not os.path.exists(file_path):
|
| 26 |
+
print(f"Download failed: File for {pdb_id} not found.")
|
| 27 |
+
return "not found"
|
| 28 |
+
|
| 29 |
+
# 检查文件是否为有效 PDB 文件(简单判断)
|
| 30 |
+
with open(file_path, 'r') as f:
|
| 31 |
+
content = f.read()
|
| 32 |
+
if not (content.startswith('HEADER') or 'ATOM' in content or 'HETATM' in content):
|
| 33 |
+
print(f"Invalid PDB file for {pdb_id}")
|
| 34 |
+
return "invalid"
|
| 35 |
+
|
| 36 |
+
# 移动到期望的临时文件路径
|
| 37 |
+
os.rename(file_path, temp_file.name)
|
| 38 |
+
print(f"PDB file for {pdb_id} saved in {temp_file.name}")
|
| 39 |
+
return temp_file.name
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
NUCLEIC_ACIDS = {
|
| 43 |
+
"A", "G", "C", "U", "T",
|
| 44 |
+
"DA", "DG", "DC", "DT", "DU"
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
AMINO_ACIDS = {
|
| 48 |
+
"ALA", "ARG", "ASN", "ASP", "CYS",
|
| 49 |
+
"GLN", "GLU", "GLY", "HIS", "ILE",
|
| 50 |
+
"LEU", "LYS", "MET", "PHE", "PRO",
|
| 51 |
+
"SER", "THR", "TRP", "TYR", "VAL",
|
| 52 |
+
"SEC", "PYL"
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
def find_donor_hydrogens(atom):
|
| 56 |
+
if atom.element not in {'N', 'O'}:
|
| 57 |
+
return []
|
| 58 |
+
hydrogens = []
|
| 59 |
+
for neighbor in atom.get_parent():
|
| 60 |
+
if neighbor.element == 'H':
|
| 61 |
+
hydrogens.append(neighbor)
|
| 62 |
+
return hydrogens
|
| 63 |
+
|
| 64 |
+
def is_acceptor(atom):
|
| 65 |
+
return atom.element in {'N', 'O'}
|
| 66 |
+
|
| 67 |
+
def calculate_angle(atom1, atom2, atom3):
|
| 68 |
+
v1 = atom1.coord - atom2.coord
|
| 69 |
+
v2 = atom3.coord - atom2.coord
|
| 70 |
+
cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
|
| 71 |
+
cos_theta = np.clip(cos_theta, -1.0, 1.0)
|
| 72 |
+
return np.degrees(np.arccos(cos_theta))
|
| 73 |
|
| 74 |
def get_text_content(file_path="static/gr_head.md"):
|
| 75 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 76 |
return f.read()
|
| 77 |
|
| 78 |
# 自动生成科研风格低饱和度颜色
|
| 79 |
+
def generate_color_low(name):
|
| 80 |
hash_digest = hashlib.md5(name.encode()).hexdigest()
|
| 81 |
+
hue = int(hash_digest, 16) % 360 / 360.0
|
| 82 |
+
# 让 hue 分桶,减少颜色接近
|
| 83 |
+
# hash_int = int(hash_digest, 16)
|
| 84 |
+
# hue_bins = 24 # 24 个颜色桶
|
| 85 |
+
# hue = (hash_int % hue_bins) / hue_bins
|
| 86 |
+
|
| 87 |
lightness = 0.75
|
| 88 |
+
saturation = 0.3
|
| 89 |
rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
|
| 90 |
return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
|
| 91 |
|
| 92 |
# 自动生成科研风格高饱和度颜色
|
| 93 |
+
def generate_color(name, lightness=0.5, saturation=0.9):
|
| 94 |
hash_digest = hashlib.md5(name.encode()).hexdigest()
|
| 95 |
+
hue = int(hash_digest, 16) % 360 / 360.0
|
|
|
|
|
|
|
| 96 |
rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
|
| 97 |
return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
|
| 98 |
|
| 99 |
+
|
| 100 |
+
def generate_color_high(name):
|
| 101 |
+
return generate_color(name, lightness=0.5, saturation=0.9)
|
| 102 |
+
|
| 103 |
+
def generate_color_dark(name):
|
| 104 |
+
return generate_color(name, lightness=0.2, saturation=0.9)
|
| 105 |
+
|
| 106 |
+
def generate_color_bright(name):
|
| 107 |
+
return generate_color(name, lightness=0.9, saturation=0.9)
|
| 108 |
+
|
| 109 |
# 给定实体名列表,生成颜色映射字典
|
| 110 |
def build_entity_color_dict(entity_list):
|
| 111 |
+
return {name: generate_color_low(name) for name in entity_list}
|
| 112 |
|
| 113 |
def classify_residue(residue):
|
| 114 |
hetfield, resseq, icode = residue.id
|
| 115 |
resname = residue.resname.strip()
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
if resname == 'HOH':
|
| 118 |
return 'Ignore', None
|
| 119 |
+
|
| 120 |
+
# 判断 HETATM 中的氨基酸是否为 peptide
|
| 121 |
+
if hetfield.startswith("H_"):
|
| 122 |
+
if resname in AMINO_ACIDS:
|
| 123 |
+
return 'Peptide', f"Chain {residue.get_parent().id} (Peptide)"
|
| 124 |
ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"}
|
| 125 |
if resname in ions:
|
| 126 |
+
return 'Ion', f"{resname} (Ion)"
|
| 127 |
+
return 'Ligand', f"{resname} (Ligand)"
|
| 128 |
+
|
| 129 |
elif hetfield == " ":
|
| 130 |
+
if resname in NUCLEIC_ACIDS:
|
| 131 |
if resname.startswith("D"):
|
| 132 |
return 'DNA', f"Chain {residue.get_parent().id} (DNA)"
|
| 133 |
else:
|
| 134 |
return 'RNA', f"Chain {residue.get_parent().id} (RNA)"
|
| 135 |
+
return 'Protein', f"Chain {residue.get_parent().id} (Protein)"
|
| 136 |
else:
|
| 137 |
+
return 'Other', f"{resname} (Other)"
|
| 138 |
+
|
| 139 |
|
| 140 |
def analyze_structure_combined(file_path):
|
| 141 |
if file_path.endswith(".cif"):
|
|
|
|
| 166 |
|
| 167 |
return summary
|
| 168 |
|
| 169 |
+
def extract_contact_residues(summary, selected_keys, cutoff=4.0):
|
| 170 |
entity_atoms = {key: [] for key in selected_keys}
|
| 171 |
atom_to_residue_info = {}
|
| 172 |
|
|
|
|
| 173 |
for key in selected_keys:
|
| 174 |
+
for entry in summary[key]:
|
| 175 |
residue = entry['residue']
|
| 176 |
for atom in residue:
|
| 177 |
entity_atoms[key].append(atom)
|
|
|
|
| 179 |
|
| 180 |
all_atoms = sum(entity_atoms.values(), [])
|
| 181 |
ns = NeighborSearch(all_atoms)
|
| 182 |
+
close_contacts = ns.search_all(cutoff, level='A')
|
| 183 |
|
| 184 |
contact_summary = defaultdict(set)
|
| 185 |
seen = set()
|
| 186 |
|
| 187 |
+
for atom1, atom2 in close_contacts:
|
| 188 |
+
if atom1 == atom2:
|
| 189 |
+
continue
|
| 190 |
+
key1, res1 = atom_to_residue_info.get(atom1, (None, None))
|
| 191 |
+
key2, res2 = atom_to_residue_info.get(atom2, (None, None))
|
| 192 |
+
if key1 is None or key2 is None or key1 == key2:
|
| 193 |
+
continue
|
| 194 |
+
|
| 195 |
+
tag1 = (key1, res1.id)
|
| 196 |
+
tag2 = (key2, res2.id)
|
| 197 |
+
if (tag1, tag2) in seen or (tag2, tag1) in seen:
|
| 198 |
+
continue
|
| 199 |
+
seen.add((tag1, tag2))
|
| 200 |
+
|
| 201 |
+
contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
|
| 202 |
+
contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
|
| 203 |
|
| 204 |
contact_summary_final = {
|
| 205 |
key: [
|
|
|
|
| 211 |
|
| 212 |
return contact_summary_final
|
| 213 |
|
| 214 |
+
def extract_polar_contacts(summary, contact_summary, cutoff=3.5, angle_cutoff=120.0):
|
| 215 |
+
polar_summary = defaultdict(set)
|
| 216 |
+
residue_lookup = {}
|
| 217 |
+
|
| 218 |
+
# 建立残基索引
|
| 219 |
+
for key in summary:
|
| 220 |
+
for entry in summary[key]:
|
| 221 |
+
residue = entry['residue']
|
| 222 |
+
residue_lookup[(key, residue.get_parent().id, str(residue.id[1]), residue.resname.strip())] = residue
|
| 223 |
+
|
| 224 |
+
# 遍历 contact_summary
|
| 225 |
+
for key1, contacts1 in contact_summary.items():
|
| 226 |
+
for entry1 in contacts1:
|
| 227 |
+
res1 = residue_lookup.get((key1, entry1['chain'], entry1['resi'], entry1['resn']))
|
| 228 |
+
if res1 is None:
|
| 229 |
+
continue
|
| 230 |
+
|
| 231 |
+
for key2, contacts2 in contact_summary.items():
|
| 232 |
+
if key1 == key2:
|
| 233 |
+
continue
|
| 234 |
+
|
| 235 |
+
for entry2 in contacts2:
|
| 236 |
+
res2 = residue_lookup.get((key2, entry2['chain'], entry2['resi'], entry2['resn']))
|
| 237 |
+
if res2 is None:
|
| 238 |
+
continue
|
| 239 |
+
|
| 240 |
+
for atom1 in res1:
|
| 241 |
+
donor_hs = find_donor_hydrogens(atom1)
|
| 242 |
+
for atom2 in res2:
|
| 243 |
+
if is_acceptor(atom2):
|
| 244 |
+
for h in donor_hs:
|
| 245 |
+
dist = np.linalg.norm(h.coord - atom2.coord)
|
| 246 |
+
if dist > cutoff:
|
| 247 |
+
continue
|
| 248 |
+
angle = calculate_angle(atom1, h, atom2)
|
| 249 |
+
if angle >= angle_cutoff:
|
| 250 |
+
polar_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
|
| 251 |
+
polar_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
|
| 252 |
+
|
| 253 |
+
polar_contact_summary_final = {
|
| 254 |
+
key: [
|
| 255 |
+
{'chain': c, 'resi': r, 'resn': n}
|
| 256 |
+
for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
|
| 257 |
+
]
|
| 258 |
+
for key, res_set in polar_summary.items()
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
return polar_contact_summary_final
|
| 262 |
+
|
| 263 |
+
|
| 264 |
|
| 265 |
def set_default_styles(viewer, summary, entity_color_dict,
|
| 266 |
add_label=False):
|
|
|
|
| 269 |
label_style = {
|
| 270 |
'fontOpacity':1,
|
| 271 |
'backgroundColor': 'black',
|
| 272 |
+
'fontColor': generate_color_bright(entity),
|
| 273 |
'fontSize': 10,
|
| 274 |
}
|
| 275 |
# 只处理蛋白、DNA、RNA等链,不处理ligand等小分子
|
|
|
|
| 279 |
viewer.setStyle({'chain': chain_id}, {'cartoon': {'color': color}})
|
| 280 |
if add_label:
|
| 281 |
viewer.addLabel(entity, label_style, {'chain':chain_id})
|
| 282 |
+
# 处理离子 (entity 后缀有 (ion))
|
| 283 |
+
elif '(ion)' in entity.lower():
|
| 284 |
+
element = entity.split()[0].upper() # 提取元素符号,如 MG, NA
|
| 285 |
+
for entry in summary.get(entity, []):
|
| 286 |
+
# 有的PDB离子的 chain 是空字符,需要处理
|
| 287 |
+
chain = entry.get('chain', '').strip()
|
| 288 |
+
resi = entry.get('resi', '').strip()
|
| 289 |
+
# 用 sphere 表示离子
|
| 290 |
+
sel = {'resi': resi}
|
| 291 |
+
if chain:
|
| 292 |
+
sel['chain'] = chain
|
| 293 |
+
viewer.setStyle(sel, {'sphere': {'color': color, 'radius': 2.0}})
|
| 294 |
+
if add_label:
|
| 295 |
+
viewer.addLabel(entity, label_style, sel)
|
| 296 |
else:
|
| 297 |
# 例如 ligand,stick 显示
|
| 298 |
for entry in summary[entity]:
|
|
|
|
| 319 |
{'chain': 'A', 'resn': 'ASP', 'resi': '40'},
|
| 320 |
]
|
| 321 |
"""
|
| 322 |
+
color = generate_color_high(name)
|
| 323 |
+
label_color = generate_color_dark(name)
|
| 324 |
+
background_color = generate_color_low(name)
|
| 325 |
label_style = {
|
| 326 |
'fontOpacity':1,
|
| 327 |
'showBackground': True,
|