from collections import defaultdict import colorsys import hashlib import numpy as np from tempfile import NamedTemporaryFile from io import StringIO from Bio.PDB import MMCIFParser, PDBParser, PDBIO from collections import defaultdict import colorsys import hashlib from Bio.PDB.NeighborSearch import NeighborSearch from Bio.PDB.DSSP import DSSP NUCLEIC_ACIDS = { "A", "G", "C", "U", "T", "DA", "DG", "DC", "DT", "DU" } AMINO_ACIDS = { "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE", "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL", "SEC", "PYL" } def find_donor_hydrogens(atom): if atom.element not in {'N', 'O'}: return [] hydrogens = [] for neighbor in atom.get_parent(): if neighbor.element == 'H': hydrogens.append(neighbor) return hydrogens def is_acceptor(atom): return atom.element in {'N', 'O'} def calculate_angle(atom1, atom2, atom3): v1 = atom1.coord - atom2.coord v2 = atom3.coord - atom2.coord cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)) cos_theta = np.clip(cos_theta, -1.0, 1.0) return np.degrees(np.arccos(cos_theta)) def get_text_content(file_path="static/gr_head.md"): with open(file_path, "r", encoding="utf-8") as f: return f.read() # 自动生成科研风格低饱和度颜色 def generate_color_low(name): hash_digest = hashlib.md5(name.encode()).hexdigest() hue = int(hash_digest, 16) % 360 / 360.0 lightness = 0.75 saturation = 0.3 rgb = colorsys.hls_to_rgb(hue, lightness, saturation) return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255)) # 自动生成科研风格高饱和度颜色 def generate_color(name, lightness=0.5, saturation=0.9): hash_digest = hashlib.md5(name.encode()).hexdigest() hue = int(hash_digest, 16) % 360 / 360.0 rgb = colorsys.hls_to_rgb(hue, lightness, saturation) return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255)) def generate_color_high(name): return generate_color(name, lightness=0.5, saturation=0.9) def generate_color_dark(name): return generate_color(name, lightness=0.2, saturation=0.9) def generate_color_bright(name): return generate_color(name, lightness=0.9, saturation=0.9) # 给定实体名列表,生成颜色映射字典 def build_entity_color_dict(entity_list): return {name: generate_color_low(name) for name in entity_list} def classify_residue(residue): hetfield, resseq, icode = residue.id resname = residue.resname.strip() if resname == 'HOH': return 'Ignore', None # 判断 HETATM 中的氨基酸是否为 peptide if hetfield.startswith("H_"): if resname in AMINO_ACIDS: return 'Peptide', f"Chain {residue.get_parent().id} (Peptide)" ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"} if resname in ions: return 'Ion', f"{resname} (Ion)" return 'Ligand', f"{resname} (Ligand)" elif hetfield == " ": if resname in NUCLEIC_ACIDS: if resname.startswith("D"): return 'DNA', f"Chain {residue.get_parent().id} (DNA)" else: return 'RNA', f"Chain {residue.get_parent().id} (RNA)" return 'Protein', f"Chain {residue.get_parent().id} (Protein)" else: return 'Other', f"{resname} (Other)" def analyze_structure_combined(file_path): if file_path.endswith(".cif"): with open(file_path, 'r') as f: content = f.read() # 如果缺少 data_ 开头,就加上一个默认块名 if not content.lstrip().startswith("data_"): content = "data_auto\n" + content # 3. 写入临时 mmCIF 文件 with NamedTemporaryFile(suffix=".cif", delete=False, mode='w') as tmp: tmp.write(content) file_path = tmp.name parser = MMCIFParser(QUIET=True) elif file_path.endswith(".pdb") or file_path.endswith(".ent"): parser = PDBParser(QUIET=True) else: raise ValueError("Unsupported file format. Only .cif and .pdb are supported.") structure = parser.get_structure("structure", file_path) summary = defaultdict(list) for model in structure: for chain in model: for residue in chain: rtype, key = classify_residue(residue) if rtype == 'Ignore': continue resseq = residue.id[1] resname = residue.resname.strip() summary[key].append({ 'chain': chain.id, 'resn': resname, 'resi': str(resseq), 'residue': residue }) # 对 summary 的键进行排序,优先级:Protein > 其他以 "Chain" 开头的 > 其他 # 这样可以避免在setStyle 时一些配体离子等的style被覆盖 sorted_summary = dict(sorted( summary.items(), key=lambda x: ( x[0] != "Chain X (Protein)", # 将 Protein 放在最前 not x[0].startswith("Chain"), # 其他以 "Chain" 开头的其次 x[0] # 其他按字母顺序排序 ) )) return sorted_summary def read_file(file_path): if file_path is None: return "未提供结构文件" try: with open(file_path, "r") as f: structure_str = f.read() except Exception as e: return f"读取文件失败: {e}" # file_format = file_path.split(".")[-1] summary = analyze_structure_combined(file_path) entity_color_dict = build_entity_color_dict(list(summary.keys())) # 缓存用于后续交互 structure_dict = { "structure_str": structure_str, "summary": summary, "entity_color_dict": entity_color_dict } return structure_str, summary, entity_color_dict, structure_dict def extract_contact_residues(summary, selected_keys, cutoff=3.5): entity_atoms = {key: [] for key in selected_keys} atom_to_residue_info = {} for key in selected_keys: for entry in summary[key]: residue = entry['residue'] for atom in residue: entity_atoms[key].append(atom) atom_to_residue_info[atom] = (key, residue) all_atoms = sum(entity_atoms.values(), []) ns = NeighborSearch(all_atoms) close_contacts = ns.search_all(cutoff, level='A') contact_summary = defaultdict(set) seen = set() for atom1, atom2 in close_contacts: if atom1 == atom2: continue key1, res1 = atom_to_residue_info.get(atom1, (None, None)) key2, res2 = atom_to_residue_info.get(atom2, (None, None)) if key1 is None or key2 is None or key1 == key2: continue tag1 = (key1, res1.id) tag2 = (key2, res2.id) if (tag1, tag2) in seen or (tag2, tag1) in seen: continue seen.add((tag1, tag2)) contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip())) contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip())) contact_summary_final = { key: [ {'chain': c, 'resi': r, 'resn': n} for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1]))) ] for key, res_set in contact_summary.items() } return contact_summary_final def extract_polar_contacts(summary, contact_summary, cutoff=3.5, angle_cutoff=120.0): polar_summary = defaultdict(set) residue_lookup = {} # 建立残基索引 for key in summary: for entry in summary[key]: residue = entry['residue'] residue_lookup[(key, residue.get_parent().id, str(residue.id[1]), residue.resname.strip())] = residue # 遍历 contact_summary for key1, contacts1 in contact_summary.items(): for entry1 in contacts1: res1 = residue_lookup.get((key1, entry1['chain'], entry1['resi'], entry1['resn'])) if res1 is None: continue for key2, contacts2 in contact_summary.items(): if key1 == key2: continue for entry2 in contacts2: res2 = residue_lookup.get((key2, entry2['chain'], entry2['resi'], entry2['resn'])) if res2 is None: continue for atom1 in res1: donor_hs = find_donor_hydrogens(atom1) for atom2 in res2: if is_acceptor(atom2): for h in donor_hs: dist = np.linalg.norm(h.coord - atom2.coord) if dist > cutoff: continue angle = calculate_angle(atom1, h, atom2) if angle >= angle_cutoff: polar_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip())) polar_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip())) polar_contact_summary_final = { key: [ {'chain': c, 'resi': r, 'resn': n} for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1]))) ] for key, res_set in polar_summary.items() } return polar_contact_summary_final def set_default_styles(viewer, summary, entity_color_dict, add_label=True): viewer.setStyle({'hetflag': True}, {"stick": {}}) for entity, color in entity_color_dict.items(): label_style = { 'fontOpacity':1, 'backgroundColor': 'black', 'fontColor': generate_color_bright(entity), 'fontSize': 10, } # 只处理蛋白、DNA、RNA等链,不处理ligand等小分子 # TODO: 检查1L9Z if "(protein)" in entity.lower(): # 提取链ID chain_id = entity.split()[1] viewer.setStyle({'chain': chain_id}, {'cartoon': {'arrows': True, 'color': color, 'opacity': 0.9}}) if add_label: viewer.addLabel(entity, label_style, {'chain':chain_id}) elif '(dna)' in entity.lower() or '(rna)' in entity.lower(): # 提取链ID chain_id = entity.split()[1] viewer.setStyle({'chain': chain_id}, {'cartoon': {'color': color, 'nucleicAcid': True, 'opacity': 0.8}}) # 碱基和磷酸用stick) if add_label: viewer.addLabel(entity, label_style, {'chain':chain_id}) # 处理离子 (entity 后缀有 (ion)) elif '(ion)' in entity.lower(): # element = entity.split()[0].upper() # 提取元素符号,如 MG, NA for entry in summary.get(entity, []): # TODO: 1C3R 这个pdb的一个锌离子无法显示 # 有的PDB离子的 chain 是空字符,需要处理 chain = entry.get('chain', '').strip() resi = entry.get('resi', '').strip() # 用 sphere 表示离子 sel = {'resi': int(resi)} if chain: sel['chain'] = chain viewer.setStyle(sel, {'sphere': {'color': color, 'radius': 2.0}}) viewer.zoomTo(sel) if add_label: viewer.addLabel(entity, label_style, sel) else: # 例如 ligand,stick 显示 for entry in summary[entity]: viewer.setStyle( {'chain': entry['chain'], 'resi': int(entry['resi'])}, {'stick': {'color': color}} ) cur_res_dict = {'chain': entry['chain'], 'resi': int(entry['resi'])} if add_label: viewer.addLabel(entity, label_style, cur_res_dict) def highlight_residues(viewer, residue_list, name='name', style='stick', # color='yellowCarbon', # label_color='orange', # label_background=None, font_size=15): """ 高亮显示指定的残基 :param view: py3Dmol 视图对象 :param residue_list: 残基列表 residue_list = [ {'chain': 'A', 'resn': 'LYS', 'resi': '25'}, {'chain': 'A', 'resn': 'ASP', 'resi': '40'}, ] """ color = generate_color_high(name) label_color = generate_color_dark(name) background_color = generate_color_low(name) label_style = { 'fontOpacity':1, 'showBackground': True, 'backgroundColor': background_color, 'backgroundOpacity': 0.5, 'borderColor': 'grey', 'fontColor': label_color, 'fontSize': font_size } for res in residue_list: cur_res_dict = {'chain': res['chain'], 'resi': int(res['resi'])} # viewer.setStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {'cartoon': {'colorscheme': color}}) viewer.addStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {style: {'color': color}}) viewer.addLabel(f"{res['resn']} {res['resi']}", label_style, cur_res_dict) return viewer