| | from collections import defaultdict |
| | import colorsys |
| | import hashlib |
| | import numpy as np |
| |
|
| | from tempfile import NamedTemporaryFile |
| | from io import StringIO |
| | from Bio.PDB import MMCIFParser, PDBParser, PDBIO |
| | from collections import defaultdict |
| | import colorsys |
| | import hashlib |
| | from Bio.PDB.NeighborSearch import NeighborSearch |
| | from Bio.PDB.DSSP import DSSP |
| |
|
| | NUCLEIC_ACIDS = { |
| | "A", "G", "C", "U", "T", |
| | "DA", "DG", "DC", "DT", "DU" |
| | } |
| |
|
| | AMINO_ACIDS = { |
| | "ALA", "ARG", "ASN", "ASP", "CYS", |
| | "GLN", "GLU", "GLY", "HIS", "ILE", |
| | "LEU", "LYS", "MET", "PHE", "PRO", |
| | "SER", "THR", "TRP", "TYR", "VAL", |
| | "SEC", "PYL" |
| | } |
| |
|
| | def find_donor_hydrogens(atom): |
| | if atom.element not in {'N', 'O'}: |
| | return [] |
| | hydrogens = [] |
| | for neighbor in atom.get_parent(): |
| | if neighbor.element == 'H': |
| | hydrogens.append(neighbor) |
| | return hydrogens |
| |
|
| | def is_acceptor(atom): |
| | return atom.element in {'N', 'O'} |
| |
|
| | def calculate_angle(atom1, atom2, atom3): |
| | v1 = atom1.coord - atom2.coord |
| | v2 = atom3.coord - atom2.coord |
| | cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)) |
| | cos_theta = np.clip(cos_theta, -1.0, 1.0) |
| | return np.degrees(np.arccos(cos_theta)) |
| |
|
| | def get_text_content(file_path="static/gr_head.md"): |
| | with open(file_path, "r", encoding="utf-8") as f: |
| | return f.read() |
| | |
| | |
| | def generate_color_low(name): |
| | hash_digest = hashlib.md5(name.encode()).hexdigest() |
| | hue = int(hash_digest, 16) % 360 / 360.0 |
| | lightness = 0.75 |
| | saturation = 0.3 |
| | rgb = colorsys.hls_to_rgb(hue, lightness, saturation) |
| | return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255)) |
| |
|
| | |
| | def generate_color(name, lightness=0.5, saturation=0.9): |
| | hash_digest = hashlib.md5(name.encode()).hexdigest() |
| | hue = int(hash_digest, 16) % 360 / 360.0 |
| | rgb = colorsys.hls_to_rgb(hue, lightness, saturation) |
| | return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255)) |
| |
|
| |
|
| | def generate_color_high(name): |
| | return generate_color(name, lightness=0.5, saturation=0.9) |
| |
|
| | def generate_color_dark(name): |
| | return generate_color(name, lightness=0.2, saturation=0.9) |
| |
|
| | def generate_color_bright(name): |
| | return generate_color(name, lightness=0.9, saturation=0.9) |
| |
|
| | |
| | def build_entity_color_dict(entity_list): |
| | return {name: generate_color_low(name) for name in entity_list} |
| |
|
| | def classify_residue(residue): |
| | hetfield, resseq, icode = residue.id |
| | resname = residue.resname.strip() |
| |
|
| | if resname == 'HOH': |
| | return 'Ignore', None |
| | |
| | |
| | if hetfield.startswith("H_"): |
| | if resname in AMINO_ACIDS: |
| | return 'Peptide', f"Chain {residue.get_parent().id} (Peptide)" |
| | ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"} |
| | if resname in ions: |
| | return 'Ion', f"{resname} (Ion)" |
| | return 'Ligand', f"{resname} (Ligand)" |
| |
|
| | elif hetfield == " ": |
| | if resname in NUCLEIC_ACIDS: |
| | if resname.startswith("D"): |
| | return 'DNA', f"Chain {residue.get_parent().id} (DNA)" |
| | else: |
| | return 'RNA', f"Chain {residue.get_parent().id} (RNA)" |
| | return 'Protein', f"Chain {residue.get_parent().id} (Protein)" |
| | else: |
| | return 'Other', f"{resname} (Other)" |
| |
|
| |
|
| | def analyze_structure_combined(file_path): |
| | if file_path.endswith(".cif"): |
| | with open(file_path, 'r') as f: |
| | content = f.read() |
| | |
| | if not content.lstrip().startswith("data_"): |
| | content = "data_auto\n" + content |
| | |
| | with NamedTemporaryFile(suffix=".cif", delete=False, mode='w') as tmp: |
| | tmp.write(content) |
| | file_path = tmp.name |
| | parser = MMCIFParser(QUIET=True) |
| |
|
| | elif file_path.endswith(".pdb") or file_path.endswith(".ent"): |
| | parser = PDBParser(QUIET=True) |
| | else: |
| | raise ValueError("Unsupported file format. Only .cif and .pdb are supported.") |
| |
|
| | structure = parser.get_structure("structure", file_path) |
| | summary = defaultdict(list) |
| |
|
| | for model in structure: |
| | for chain in model: |
| | for residue in chain: |
| | rtype, key = classify_residue(residue) |
| | if rtype == 'Ignore': |
| | continue |
| |
|
| | resseq = residue.id[1] |
| | resname = residue.resname.strip() |
| | summary[key].append({ |
| | 'chain': chain.id, |
| | 'resn': resname, |
| | 'resi': str(resseq), |
| | 'residue': residue |
| | }) |
| |
|
| | |
| | |
| | sorted_summary = dict(sorted( |
| | summary.items(), |
| | key=lambda x: ( |
| | x[0] != "Chain X (Protein)", |
| | not x[0].startswith("Chain"), |
| | x[0] |
| | ) |
| | )) |
| | return sorted_summary |
| |
|
| | def read_file(file_path): |
| | if file_path is None: |
| | return "<b style='color:red'>未提供结构文件</b>" |
| |
|
| | try: |
| | with open(file_path, "r") as f: |
| | structure_str = f.read() |
| | except Exception as e: |
| | return f"<b style='color:red'>读取文件失败: {e}</b>" |
| |
|
| | |
| | summary = analyze_structure_combined(file_path) |
| | entity_color_dict = build_entity_color_dict(list(summary.keys())) |
| |
|
| | |
| | structure_dict = { |
| | "structure_str": structure_str, |
| | "summary": summary, |
| | "entity_color_dict": entity_color_dict |
| | } |
| |
|
| | return structure_str, summary, entity_color_dict, structure_dict |
| |
|
| |
|
| | def extract_contact_residues(summary, selected_keys, cutoff=3.5): |
| | entity_atoms = {key: [] for key in selected_keys} |
| | atom_to_residue_info = {} |
| |
|
| | for key in selected_keys: |
| | for entry in summary[key]: |
| | residue = entry['residue'] |
| | for atom in residue: |
| | entity_atoms[key].append(atom) |
| | atom_to_residue_info[atom] = (key, residue) |
| |
|
| | all_atoms = sum(entity_atoms.values(), []) |
| | ns = NeighborSearch(all_atoms) |
| | close_contacts = ns.search_all(cutoff, level='A') |
| |
|
| | contact_summary = defaultdict(set) |
| | seen = set() |
| |
|
| | for atom1, atom2 in close_contacts: |
| | if atom1 == atom2: |
| | continue |
| | key1, res1 = atom_to_residue_info.get(atom1, (None, None)) |
| | key2, res2 = atom_to_residue_info.get(atom2, (None, None)) |
| | if key1 is None or key2 is None or key1 == key2: |
| | continue |
| |
|
| | tag1 = (key1, res1.id) |
| | tag2 = (key2, res2.id) |
| | if (tag1, tag2) in seen or (tag2, tag1) in seen: |
| | continue |
| | seen.add((tag1, tag2)) |
| |
|
| | contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip())) |
| | contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip())) |
| |
|
| | contact_summary_final = { |
| | key: [ |
| | {'chain': c, 'resi': r, 'resn': n} |
| | for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1]))) |
| | ] |
| | for key, res_set in contact_summary.items() |
| | } |
| |
|
| | return contact_summary_final |
| |
|
| | def extract_polar_contacts(summary, contact_summary, cutoff=3.5, angle_cutoff=120.0): |
| | polar_summary = defaultdict(set) |
| | residue_lookup = {} |
| | |
| | |
| | for key in summary: |
| | for entry in summary[key]: |
| | residue = entry['residue'] |
| | residue_lookup[(key, residue.get_parent().id, str(residue.id[1]), residue.resname.strip())] = residue |
| |
|
| | |
| | for key1, contacts1 in contact_summary.items(): |
| | for entry1 in contacts1: |
| | res1 = residue_lookup.get((key1, entry1['chain'], entry1['resi'], entry1['resn'])) |
| | if res1 is None: |
| | continue |
| |
|
| | for key2, contacts2 in contact_summary.items(): |
| | if key1 == key2: |
| | continue |
| |
|
| | for entry2 in contacts2: |
| | res2 = residue_lookup.get((key2, entry2['chain'], entry2['resi'], entry2['resn'])) |
| | if res2 is None: |
| | continue |
| |
|
| | for atom1 in res1: |
| | donor_hs = find_donor_hydrogens(atom1) |
| | for atom2 in res2: |
| | if is_acceptor(atom2): |
| | for h in donor_hs: |
| | dist = np.linalg.norm(h.coord - atom2.coord) |
| | if dist > cutoff: |
| | continue |
| | angle = calculate_angle(atom1, h, atom2) |
| | if angle >= angle_cutoff: |
| | polar_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip())) |
| | polar_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip())) |
| | |
| | polar_contact_summary_final = { |
| | key: [ |
| | {'chain': c, 'resi': r, 'resn': n} |
| | for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1]))) |
| | ] |
| | for key, res_set in polar_summary.items() |
| | } |
| |
|
| | return polar_contact_summary_final |
| |
|
| |
|
| |
|
| | def set_default_styles(viewer, summary, entity_color_dict, |
| | add_label=True): |
| | viewer.setStyle({'hetflag': True}, {"stick": {}}) |
| | for entity, color in entity_color_dict.items(): |
| | label_style = { |
| | 'fontOpacity':1, |
| | 'backgroundColor': 'black', |
| | 'fontColor': generate_color_bright(entity), |
| | 'fontSize': 10, |
| | } |
| | |
| | |
| | if "(protein)" in entity.lower(): |
| | |
| | chain_id = entity.split()[1] |
| | viewer.setStyle({'chain': chain_id}, |
| | {'cartoon': {'arrows': True, |
| | 'color': color, |
| | 'opacity': 0.9}}) |
| | if add_label: |
| | viewer.addLabel(entity, label_style, {'chain':chain_id}) |
| | elif '(dna)' in entity.lower() or '(rna)' in entity.lower(): |
| | |
| | chain_id = entity.split()[1] |
| | viewer.setStyle({'chain': chain_id}, |
| | {'cartoon': {'color': color, |
| | 'nucleicAcid': True, |
| | 'opacity': 0.8}}) |
| | if add_label: |
| | viewer.addLabel(entity, label_style, {'chain':chain_id}) |
| |
|
| | |
| | elif '(ion)' in entity.lower(): |
| | |
| | for entry in summary.get(entity, []): |
| | |
| | |
| | chain = entry.get('chain', '').strip() |
| | resi = entry.get('resi', '').strip() |
| | |
| | sel = {'resi': int(resi)} |
| | if chain: |
| | sel['chain'] = chain |
| |
|
| | viewer.setStyle(sel, {'sphere': {'color': color, 'radius': 2.0}}) |
| | viewer.zoomTo(sel) |
| | if add_label: |
| | viewer.addLabel(entity, label_style, sel) |
| | else: |
| | |
| | for entry in summary[entity]: |
| | viewer.setStyle( |
| | {'chain': entry['chain'], 'resi': int(entry['resi'])}, |
| | {'stick': {'color': color}} |
| | ) |
| | cur_res_dict = {'chain': entry['chain'], 'resi': int(entry['resi'])} |
| | if add_label: |
| | viewer.addLabel(entity, label_style, cur_res_dict) |
| |
|
| | def highlight_residues(viewer, residue_list, name='name', |
| | style='stick', |
| | |
| | |
| | |
| | font_size=15): |
| | """ |
| | 高亮显示指定的残基 |
| | :param view: py3Dmol 视图对象 |
| | :param residue_list: 残基列表 |
| | residue_list = [ |
| | {'chain': 'A', 'resn': 'LYS', 'resi': '25'}, |
| | {'chain': 'A', 'resn': 'ASP', 'resi': '40'}, |
| | ] |
| | """ |
| | color = generate_color_high(name) |
| | label_color = generate_color_dark(name) |
| | background_color = generate_color_low(name) |
| | label_style = { |
| | 'fontOpacity':1, |
| | 'showBackground': True, |
| | 'backgroundColor': background_color, |
| | 'backgroundOpacity': 0.5, |
| | 'borderColor': 'grey', |
| | 'fontColor': label_color, |
| | 'fontSize': font_size |
| | } |
| | for res in residue_list: |
| | cur_res_dict = {'chain': res['chain'], 'resi': int(res['resi'])} |
| |
|
| | |
| | viewer.addStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {style: {'color': color}}) |
| |
|
| | viewer.addLabel(f"{res['resn']} {res['resi']}", label_style, cur_res_dict) |
| |
|
| | return viewer |
| |
|
| |
|
| |
|