import html
import io
import random
import zipfile
import gradio as gr
import py3Dmol
import json
import os
from tempfile import TemporaryDirectory
from Bio.PDB import PDBList
from utils import *
from multiana import *
import datetime
default_file = "static/test.pdb"
TEMP_DIR = "static/tmp/"
os.makedirs(TEMP_DIR, exist_ok=True)
def render_structure(structure_str, summary, entity_color_dict, add_label=True):
view = py3Dmol.view(width=233, height=233)
view.addModel(structure_str) # 不指定类型似乎可以自动识别
set_default_styles(view, summary, entity_color_dict, add_label=add_label)
view.zoomTo()
return view
def render_html(view, entity_color_dict):
output = view._make_html().replace(
"height: 233px;",
"height: 700px; max-height: 100%;"
).replace(
"width: 233px;",
"width: 100%;"
)
# 构建图例
legend_items = "".join([
f"
"
f""
f"{label}
"
for label, color in entity_color_dict.items()
])
legend_html = f"{legend_items}
".replace("'", '"')
# 对 output 和 legend_html 进行 HTML 转义
escaped_output = html.escape(output)
escaped_legend_html = html.escape(legend_html)
# 构建完整的 HTML 内容
html_content = f"""{escaped_output}{escaped_legend_html}"""
html_framework = f""""""
# save the HTML content to a static
with open(os.path.join(TEMP_DIR, "structure_view.html"), "w") as f:
f.write(html_content)
return html_framework
def analyze_contacts(selected_str, cutoff, structure_cache):
keys = selected_str
if len(keys) < 2:
debug_text = "请至少选择两个实体进行分析"
return gr.update(), debug_text
summary = structure_cache["summary"]
structure_str = structure_cache["structure_str"]
entity_color_dict = structure_cache["entity_color_dict"]
result = extract_contact_residues(summary, keys, cutoff)
view = render_structure(structure_str, summary, entity_color_dict, add_label=False)
for name, residue_list in result.items():
highlight_residues(view, residue_list, name=name)
flush_html = render_html(view, entity_color_dict)
report = {k: [x['resn'] + str(x['resi']) for x in v] for k, v in result.items()}
return flush_html, report
def load_structure(file_path):
structure_str, summary, entity_color_dict, structure_dict = read_file(file_path)
view = render_structure(structure_str, summary, entity_color_dict)
html_out = render_html(view, entity_color_dict)
return html_out, gr.Dropdown(label="选择实体", choices=list(summary.keys()), interactive=True, value=[]), structure_dict
def update_selected(selected, current):
if selected in current:
return current
current = current + "; " + selected if current else selected
return current
def delete_selected(selected, current):
current = "; ".join([s for s in current.split("; ") if s != selected])
return current
def clear_selected():
return ""
def handle_file_upload(file):
if file:
return load_structure(file.name)
else:
# 如果文件为空,保持当前状态
return gr.update(), gr.update(), gr.update()
def handle_pdb_id_input(pdb_id):
try:
pdb_id = pdb_id.strip().lower()
pdbl = PDBList()
# 使用 TemporaryDirectory 创建临时文件夹
with TemporaryDirectory() as temp_dir:
pdbl.retrieve_pdb_file(pdb_id, pdir=temp_dir, file_format='pdb')
pdb_file_path = os.path.join(temp_dir, f"pdb{pdb_id}.ent")
html_out, dd, structure_dict = load_structure(pdb_file_path)
return html_out, dd, structure_dict
except Exception as e:
error_message = f"获取PDB ID {pdb_id} 失败 {e}"
return error_message, gr.update(), gr.update()
def render_cache(structure_cache):
summary = structure_cache["summary"]
structure_str = structure_cache["structure_str"]
entity_color_dict = structure_cache["entity_color_dict"]
view = render_structure(structure_str, summary, entity_color_dict)
html_out = render_html(view, entity_color_dict)
return html_out
# 多结构分析
def multi_uniprot(uniprot_id, pdb_num):
uniprot_id = uniprot_id.strip()
print(f"Fetching structures for UniProt ID: {uniprot_id} with limit {pdb_num}")
sequence, pdb_list = get_uniprot_info(uniprot_id)
# randomly pick pdb_num PDB IDs
selected_pdb_ids = random.sample(pdb_list, min(pdb_num, len(pdb_list)))
pdbl = PDBList()
print("Zipping PDB files")
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
zip_file_path = os.path.join(TEMP_DIR, f"{uniprot_id}_structures_{timestamp}.zip")
with TemporaryDirectory() as group_path:
for pdb_id in selected_pdb_ids:
pdbl.retrieve_pdb_file(pdb_id, pdir=group_path, file_format='pdb')
# zip all PDB files
with zipfile.ZipFile(zip_file_path, 'w') as z:
for pdb_id in selected_pdb_ids:
pdb_file_path = os.path.join(group_path, f"pdb{pdb_id.lower()}.ent")
z.write(pdb_file_path, arcname=os.path.basename(pdb_file_path))
# 返回 ZIP 文件的二进制数据和序列
return zip_file_path, sequence, selected_pdb_ids
def multi_zip(zip_file, seq_input,
multi_cutoff, identity_threshold,
target_entity_keys=None,
cnt_by_file=True):
# 1. 解压 ZIP 文件到临时文件夹
if zip_file is None:
return gr.update(value="无效的ZIP文件"), None
with TemporaryDirectory() as group_path:
if isinstance(zip_file, str):
# 如果是文件路径(来自 multi_uniprot)
with zipfile.ZipFile(zip_file, 'r') as z:
z.extractall(group_path)
# delete the zip file after extraction
os.remove(zip_file)
else:
# 如果是二进制数据(来自用户上传)
zip_bytes = io.BytesIO(zip_file)
with zipfile.ZipFile(zip_bytes, 'r') as z:
z.extractall(group_path)
seq_fixed = sequence_fix(seq_input)
if not seq_fixed:
return gr.update(value="无效的序列格式,请输入有效的FASTA或纯氨基酸序列"), None
result = analyze_group(
group_path,
seq_fixed,
cutoff=multi_cutoff,
match_ratio=identity_threshold / 100,
target_entity_keys=target_entity_keys,
cnt_by_file=cnt_by_file
)
svg_html = logo_plot(seq_fixed, result)
return svg_html, result
with gr.Blocks() as demo:
gr.HTML(get_text_content("static/gr_head.html"))
gr.HTML(get_text_content("static/gr_info.html"))
structure_cache = gr.State(value={"structure_str": None, "summary": None, "entity_color_dict": None})
multi_result_cache = gr.State(value=None)
zip_cache = gr.State(value=None)
# 单结构分析
with gr.Tab("Single Structure"):
output = gr.HTML()
with gr.Row():
with gr.Column(scale=1):
# TODO: 增加对 CIF 文件的支持
pdb_input = gr.Textbox(
label="输入 PDB ID 获取结构",
placeholder="Input PDB ID",
interactive=True
)
pdb_btn = gr.Button("获取结构")
file_input = gr.File(label="或直接上传 PDB 文件", file_types=[".pdb", ".cif", ".ent"])
with gr.Column(scale=2):
with gr.Row():
entity_selector = gr.Dropdown(choices=[], interactive=True, multiselect=True, label="选择实体", scale=2)
cutoff_slider = gr.Slider(1, 10, value=3.5, step=0.5, label="Cutoff 距离 (Å)", scale=1)
run_btn = gr.Button("分析并渲染", variant="primary")
cln_btn = gr.Button("还原模型")
# 多结构分析
with gr.Tab("Multi Structure"):
multi_logo = gr.HTML(MULTI_HTML_HOLDER)
with gr.Row():
with gr.Column():
with gr.Tab("从 UniProt 获取"):
uniprot_input = gr.Textbox(
label="输入 UniProt ID 获取结构",
placeholder="Input UniProt ID",
interactive=True,
scale=2,
)
with gr.Row():
pdb_num_slider = gr.Slider(1, 100, value=10, step=1, label="获取 PDB 数量上限(按设定数量随机采样)", interactive=True, scale=2)
uniprot_btn = gr.Button("抓取蛋白数据", variant="primary", scale=1)
with gr.Tab("手动上传结构压缩文件"):
zip_input = gr.File(
label="上传包含 .pdb/.ent/.cif 的 zip 压缩文件",
file_types=[".zip"],
type="binary",
scale=1,
)
seq_input = gr.Textbox(
label="目标蛋白质序列",
placeholder="上传文件时需手动输入 FASTA 格式序列或纯氨基酸序列...",
lines=8,
scale=3,
)
with gr.Group():
mult_target_selector = gr.Dropdown(
value=['Ligand'],
choices=['Ligand', 'Protein', 'DNA', 'RNA', 'Ion'],
label="选择互作对象类型(可多选,无选择则统计全部)",
multiselect=True,
interactive=True,
)
with gr.Row():
multi_cutoff_slider = gr.Slider(1, 10, value=3.5, step=0.5, label="Cutoff 距离 (Å)", interactive=True, scale=3)
cnt_checkbox = gr.Checkbox(label="Yes", info="单文件内不重复统计位点", value=True, interactive=True)
identity_threshold = gr.Slider(0, 100, value=80, step=5, label="序列一致性阈值 (%)", interactive=True)
multi_run_btn = gr.Button("开始分析", variant="primary")
debug_text = gr.Textbox(label="调试信息", interactive=False)
# 单结构分析
run_btn.click(
fn=analyze_contacts,
inputs=[entity_selector, cutoff_slider, structure_cache],
outputs=[output, debug_text]
)
cln_btn.click(
fn=render_cache,
inputs=[structure_cache],
outputs=[output]
)
file_input.change(
fn=handle_file_upload,
inputs=file_input,
outputs=[output, entity_selector, structure_cache]
)
pdb_btn.click(
fn=handle_pdb_id_input,
inputs=pdb_input,
outputs=[output, entity_selector, structure_cache]
)
demo.load(
fn=lambda: load_structure(default_file),
inputs=[],
outputs=[output, entity_selector, structure_cache]
)
# 多结构分析
multi_run_btn.click(
fn=multi_zip,
inputs=[zip_input, seq_input, multi_cutoff_slider, identity_threshold, mult_target_selector, cnt_checkbox],
outputs=[multi_logo, multi_result_cache]
)
uniprot_btn.click(
fn=multi_uniprot,
inputs=[uniprot_input, pdb_num_slider],
outputs=[zip_input, seq_input, debug_text]
)
demo.launch()