import html import io import random import zipfile import gradio as gr import py3Dmol import json import os from tempfile import TemporaryDirectory from Bio.PDB import PDBList from utils import * from multiana import * import datetime default_file = "static/test.pdb" TEMP_DIR = "static/tmp/" os.makedirs(TEMP_DIR, exist_ok=True) def render_structure(structure_str, summary, entity_color_dict, add_label=True): view = py3Dmol.view(width=233, height=233) view.addModel(structure_str) # 不指定类型似乎可以自动识别 set_default_styles(view, summary, entity_color_dict, add_label=add_label) view.zoomTo() return view def render_html(view, entity_color_dict): output = view._make_html().replace( "height: 233px;", "height: 700px; max-height: 100%;" ).replace( "width: 233px;", "width: 100%;" ) # 构建图例 legend_items = "".join([ f"
" f"" f"{label}
" for label, color in entity_color_dict.items() ]) legend_html = f"
{legend_items}
".replace("'", '"') # 对 output 和 legend_html 进行 HTML 转义 escaped_output = html.escape(output) escaped_legend_html = html.escape(legend_html) # 构建完整的 HTML 内容 html_content = f"""
{escaped_output}
{escaped_legend_html}""" html_framework = f"""""" # save the HTML content to a static with open(os.path.join(TEMP_DIR, "structure_view.html"), "w") as f: f.write(html_content) return html_framework def analyze_contacts(selected_str, cutoff, structure_cache): keys = selected_str if len(keys) < 2: debug_text = "请至少选择两个实体进行分析" return gr.update(), debug_text summary = structure_cache["summary"] structure_str = structure_cache["structure_str"] entity_color_dict = structure_cache["entity_color_dict"] result = extract_contact_residues(summary, keys, cutoff) view = render_structure(structure_str, summary, entity_color_dict, add_label=False) for name, residue_list in result.items(): highlight_residues(view, residue_list, name=name) flush_html = render_html(view, entity_color_dict) report = {k: [x['resn'] + str(x['resi']) for x in v] for k, v in result.items()} return flush_html, report def load_structure(file_path): structure_str, summary, entity_color_dict, structure_dict = read_file(file_path) view = render_structure(structure_str, summary, entity_color_dict) html_out = render_html(view, entity_color_dict) return html_out, gr.Dropdown(label="选择实体", choices=list(summary.keys()), interactive=True, value=[]), structure_dict def update_selected(selected, current): if selected in current: return current current = current + "; " + selected if current else selected return current def delete_selected(selected, current): current = "; ".join([s for s in current.split("; ") if s != selected]) return current def clear_selected(): return "" def handle_file_upload(file): if file: return load_structure(file.name) else: # 如果文件为空,保持当前状态 return gr.update(), gr.update(), gr.update() def handle_pdb_id_input(pdb_id): try: pdb_id = pdb_id.strip().lower() pdbl = PDBList() # 使用 TemporaryDirectory 创建临时文件夹 with TemporaryDirectory() as temp_dir: pdbl.retrieve_pdb_file(pdb_id, pdir=temp_dir, file_format='pdb') pdb_file_path = os.path.join(temp_dir, f"pdb{pdb_id}.ent") html_out, dd, structure_dict = load_structure(pdb_file_path) return html_out, dd, structure_dict except Exception as e: error_message = f"获取PDB ID {pdb_id} 失败 {e}" return error_message, gr.update(), gr.update() def render_cache(structure_cache): summary = structure_cache["summary"] structure_str = structure_cache["structure_str"] entity_color_dict = structure_cache["entity_color_dict"] view = render_structure(structure_str, summary, entity_color_dict) html_out = render_html(view, entity_color_dict) return html_out # 多结构分析 def multi_uniprot(uniprot_id, pdb_num): uniprot_id = uniprot_id.strip() print(f"Fetching structures for UniProt ID: {uniprot_id} with limit {pdb_num}") sequence, pdb_list = get_uniprot_info(uniprot_id) # randomly pick pdb_num PDB IDs selected_pdb_ids = random.sample(pdb_list, min(pdb_num, len(pdb_list))) pdbl = PDBList() print("Zipping PDB files") timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") zip_file_path = os.path.join(TEMP_DIR, f"{uniprot_id}_structures_{timestamp}.zip") with TemporaryDirectory() as group_path: for pdb_id in selected_pdb_ids: pdbl.retrieve_pdb_file(pdb_id, pdir=group_path, file_format='pdb') # zip all PDB files with zipfile.ZipFile(zip_file_path, 'w') as z: for pdb_id in selected_pdb_ids: pdb_file_path = os.path.join(group_path, f"pdb{pdb_id.lower()}.ent") z.write(pdb_file_path, arcname=os.path.basename(pdb_file_path)) # 返回 ZIP 文件的二进制数据和序列 return zip_file_path, sequence, selected_pdb_ids def multi_zip(zip_file, seq_input, multi_cutoff, identity_threshold, target_entity_keys=None, cnt_by_file=True): # 1. 解压 ZIP 文件到临时文件夹 if zip_file is None: return gr.update(value="无效的ZIP文件"), None with TemporaryDirectory() as group_path: if isinstance(zip_file, str): # 如果是文件路径(来自 multi_uniprot) with zipfile.ZipFile(zip_file, 'r') as z: z.extractall(group_path) # delete the zip file after extraction os.remove(zip_file) else: # 如果是二进制数据(来自用户上传) zip_bytes = io.BytesIO(zip_file) with zipfile.ZipFile(zip_bytes, 'r') as z: z.extractall(group_path) seq_fixed = sequence_fix(seq_input) if not seq_fixed: return gr.update(value="无效的序列格式,请输入有效的FASTA或纯氨基酸序列"), None result = analyze_group( group_path, seq_fixed, cutoff=multi_cutoff, match_ratio=identity_threshold / 100, target_entity_keys=target_entity_keys, cnt_by_file=cnt_by_file ) svg_html = logo_plot(seq_fixed, result) return svg_html, result with gr.Blocks() as demo: gr.HTML(get_text_content("static/gr_head.html")) gr.HTML(get_text_content("static/gr_info.html")) structure_cache = gr.State(value={"structure_str": None, "summary": None, "entity_color_dict": None}) multi_result_cache = gr.State(value=None) zip_cache = gr.State(value=None) # 单结构分析 with gr.Tab("Single Structure"): output = gr.HTML() with gr.Row(): with gr.Column(scale=1): # TODO: 增加对 CIF 文件的支持 pdb_input = gr.Textbox( label="输入 PDB ID 获取结构", placeholder="Input PDB ID", interactive=True ) pdb_btn = gr.Button("获取结构") file_input = gr.File(label="或直接上传 PDB 文件", file_types=[".pdb", ".cif", ".ent"]) with gr.Column(scale=2): with gr.Row(): entity_selector = gr.Dropdown(choices=[], interactive=True, multiselect=True, label="选择实体", scale=2) cutoff_slider = gr.Slider(1, 10, value=3.5, step=0.5, label="Cutoff 距离 (Å)", scale=1) run_btn = gr.Button("分析并渲染", variant="primary") cln_btn = gr.Button("还原模型") # 多结构分析 with gr.Tab("Multi Structure"): multi_logo = gr.HTML(MULTI_HTML_HOLDER) with gr.Row(): with gr.Column(): with gr.Tab("从 UniProt 获取"): uniprot_input = gr.Textbox( label="输入 UniProt ID 获取结构", placeholder="Input UniProt ID", interactive=True, scale=2, ) with gr.Row(): pdb_num_slider = gr.Slider(1, 100, value=10, step=1, label="获取 PDB 数量上限(按设定数量随机采样)", interactive=True, scale=2) uniprot_btn = gr.Button("抓取蛋白数据", variant="primary", scale=1) with gr.Tab("手动上传结构压缩文件"): zip_input = gr.File( label="上传包含 .pdb/.ent/.cif 的 zip 压缩文件", file_types=[".zip"], type="binary", scale=1, ) seq_input = gr.Textbox( label="目标蛋白质序列", placeholder="上传文件时需手动输入 FASTA 格式序列或纯氨基酸序列...", lines=8, scale=3, ) with gr.Group(): mult_target_selector = gr.Dropdown( value=['Ligand'], choices=['Ligand', 'Protein', 'DNA', 'RNA', 'Ion'], label="选择互作对象类型(可多选,无选择则统计全部)", multiselect=True, interactive=True, ) with gr.Row(): multi_cutoff_slider = gr.Slider(1, 10, value=3.5, step=0.5, label="Cutoff 距离 (Å)", interactive=True, scale=3) cnt_checkbox = gr.Checkbox(label="Yes", info="单文件内不重复统计位点", value=True, interactive=True) identity_threshold = gr.Slider(0, 100, value=80, step=5, label="序列一致性阈值 (%)", interactive=True) multi_run_btn = gr.Button("开始分析", variant="primary") debug_text = gr.Textbox(label="调试信息", interactive=False) # 单结构分析 run_btn.click( fn=analyze_contacts, inputs=[entity_selector, cutoff_slider, structure_cache], outputs=[output, debug_text] ) cln_btn.click( fn=render_cache, inputs=[structure_cache], outputs=[output] ) file_input.change( fn=handle_file_upload, inputs=file_input, outputs=[output, entity_selector, structure_cache] ) pdb_btn.click( fn=handle_pdb_id_input, inputs=pdb_input, outputs=[output, entity_selector, structure_cache] ) demo.load( fn=lambda: load_structure(default_file), inputs=[], outputs=[output, entity_selector, structure_cache] ) # 多结构分析 multi_run_btn.click( fn=multi_zip, inputs=[zip_input, seq_input, multi_cutoff_slider, identity_threshold, mult_target_selector, cnt_checkbox], outputs=[multi_logo, multi_result_cache] ) uniprot_btn.click( fn=multi_uniprot, inputs=[uniprot_input, pdb_num_slider], outputs=[zip_input, seq_input, debug_text] ) demo.launch()