Spaces:

EZ4Fanta
/

bindana

Sleeping

File size: 13,748 Bytes

from collections import defaultdict
import colorsys
import hashlib
import numpy as np

from tempfile import NamedTemporaryFile
from io import StringIO
from Bio.PDB import MMCIFParser, PDBParser, PDBIO
from collections import defaultdict
import colorsys
import hashlib
from Bio.PDB.NeighborSearch import NeighborSearch
from Bio.PDB.DSSP import DSSP

NUCLEIC_ACIDS = {
    "A", "G", "C", "U", "T",
    "DA", "DG", "DC", "DT", "DU"
}

AMINO_ACIDS = {
    "ALA", "ARG", "ASN", "ASP", "CYS",
    "GLN", "GLU", "GLY", "HIS", "ILE",
    "LEU", "LYS", "MET", "PHE", "PRO",
    "SER", "THR", "TRP", "TYR", "VAL",
    "SEC", "PYL"
}

def find_donor_hydrogens(atom):
    if atom.element not in {'N', 'O'}:
        return []
    hydrogens = []
    for neighbor in atom.get_parent():
        if neighbor.element == 'H':
            hydrogens.append(neighbor)
    return hydrogens

def is_acceptor(atom):
    return atom.element in {'N', 'O'}

def calculate_angle(atom1, atom2, atom3):
    v1 = atom1.coord - atom2.coord
    v2 = atom3.coord - atom2.coord
    cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    cos_theta = np.clip(cos_theta, -1.0, 1.0)
    return np.degrees(np.arccos(cos_theta))

def get_text_content(file_path="static/gr_head.md"):
    with open(file_path, "r", encoding="utf-8") as f:
        return f.read()
    
# 自动生成科研风格低饱和度颜色
def generate_color_low(name):
    hash_digest = hashlib.md5(name.encode()).hexdigest()
    hue = int(hash_digest, 16) % 360 / 360.0
    lightness = 0.75
    saturation = 0.3
    rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))

# 自动生成科研风格高饱和度颜色
def generate_color(name, lightness=0.5, saturation=0.9):
    hash_digest = hashlib.md5(name.encode()).hexdigest()
    hue = int(hash_digest, 16) % 360 / 360.0
    rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))


def generate_color_high(name):
    return generate_color(name, lightness=0.5, saturation=0.9)

def generate_color_dark(name):
    return generate_color(name, lightness=0.2, saturation=0.9)

def generate_color_bright(name):
    return generate_color(name, lightness=0.9, saturation=0.9)

# 给定实体名列表，生成颜色映射字典
def build_entity_color_dict(entity_list):
    return {name: generate_color_low(name) for name in entity_list}

def classify_residue(residue):
    hetfield, resseq, icode = residue.id
    resname = residue.resname.strip()

    if resname == 'HOH':
        return 'Ignore', None
    
    # 判断 HETATM 中的氨基酸是否为 peptide
    if hetfield.startswith("H_"):
        if resname in AMINO_ACIDS:
            return 'Peptide', f"Chain {residue.get_parent().id} (Peptide)"
        ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"}
        if resname in ions:
            return 'Ion', f"{resname} (Ion)"
        return 'Ligand', f"{resname} (Ligand)"

    elif hetfield == " ":
        if resname in NUCLEIC_ACIDS:
            if resname.startswith("D"):
                return 'DNA', f"Chain {residue.get_parent().id} (DNA)"
            else:
                return 'RNA', f"Chain {residue.get_parent().id} (RNA)"
        return 'Protein', f"Chain {residue.get_parent().id} (Protein)"
    else:
        return 'Other', f"{resname} (Other)"


def analyze_structure_combined(file_path):
    if file_path.endswith(".cif"):
        with open(file_path, 'r') as f:
            content = f.read()
        # 如果缺少 data_ 开头，就加上一个默认块名
        if not content.lstrip().startswith("data_"):
            content = "data_auto\n" + content
        # 3. 写入临时 mmCIF 文件
        with NamedTemporaryFile(suffix=".cif", delete=False, mode='w') as tmp:
            tmp.write(content)
            file_path = tmp.name
        parser = MMCIFParser(QUIET=True)

    elif file_path.endswith(".pdb") or file_path.endswith(".ent"):
        parser = PDBParser(QUIET=True)
    else:
        raise ValueError("Unsupported file format. Only .cif and .pdb are supported.")

    structure = parser.get_structure("structure", file_path)
    summary = defaultdict(list)

    for model in structure:
        for chain in model:
            for residue in chain:
                rtype, key = classify_residue(residue)
                if rtype == 'Ignore':
                    continue

                resseq = residue.id[1]
                resname = residue.resname.strip()
                summary[key].append({
                    'chain': chain.id,
                    'resn': resname,
                    'resi': str(resseq),
                    'residue': residue
                })

    # 对 summary 的键进行排序，优先级：Protein > 其他以 "Chain" 开头的 > 其他
    # 这样可以避免在setStyle 时一些配体离子等的style被覆盖
    sorted_summary = dict(sorted(
        summary.items(),
        key=lambda x: (
            x[0] != "Chain X (Protein)",  # 将 Protein 放在最前
            not x[0].startswith("Chain"),  # 其他以 "Chain" 开头的其次
            x[0]  # 其他按字母顺序排序
        )
    ))
    return sorted_summary

def read_file(file_path):
    if file_path is None:
        return "<b style='color:red'>未提供结构文件</b>"

    try:
        with open(file_path, "r") as f:
            structure_str = f.read()
    except Exception as e:
        return f"<b style='color:red'>读取文件失败: {e}</b>"

    # file_format = file_path.split(".")[-1]
    summary = analyze_structure_combined(file_path)
    entity_color_dict = build_entity_color_dict(list(summary.keys()))

    # 缓存用于后续交互
    structure_dict = {
        "structure_str": structure_str,
        "summary": summary,
        "entity_color_dict": entity_color_dict
    }

    return structure_str, summary, entity_color_dict, structure_dict


def extract_contact_residues(summary, selected_keys, cutoff=3.5):
    entity_atoms = {key: [] for key in selected_keys}
    atom_to_residue_info = {}

    for key in selected_keys:
        for entry in summary[key]:
            residue = entry['residue']
            for atom in residue:
                entity_atoms[key].append(atom)
                atom_to_residue_info[atom] = (key, residue)

    all_atoms = sum(entity_atoms.values(), [])
    ns = NeighborSearch(all_atoms)
    close_contacts = ns.search_all(cutoff, level='A')

    contact_summary = defaultdict(set)
    seen = set()

    for atom1, atom2 in close_contacts:
        if atom1 == atom2:
            continue
        key1, res1 = atom_to_residue_info.get(atom1, (None, None))
        key2, res2 = atom_to_residue_info.get(atom2, (None, None))
        if key1 is None or key2 is None or key1 == key2:
            continue

        tag1 = (key1, res1.id)
        tag2 = (key2, res2.id)
        if (tag1, tag2) in seen or (tag2, tag1) in seen:
            continue
        seen.add((tag1, tag2))

        contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
        contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))

    contact_summary_final = {
        key: [
            {'chain': c, 'resi': r, 'resn': n}
            for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
        ]
        for key, res_set in contact_summary.items()
    }

    return contact_summary_final

def extract_polar_contacts(summary, contact_summary, cutoff=3.5, angle_cutoff=120.0):
    polar_summary = defaultdict(set)
    residue_lookup = {}
    
    # 建立残基索引
    for key in summary:
        for entry in summary[key]:
            residue = entry['residue']
            residue_lookup[(key, residue.get_parent().id, str(residue.id[1]), residue.resname.strip())] = residue

    # 遍历 contact_summary
    for key1, contacts1 in contact_summary.items():
        for entry1 in contacts1:
            res1 = residue_lookup.get((key1, entry1['chain'], entry1['resi'], entry1['resn']))
            if res1 is None:
                continue

            for key2, contacts2 in contact_summary.items():
                if key1 == key2:
                    continue

                for entry2 in contacts2:
                    res2 = residue_lookup.get((key2, entry2['chain'], entry2['resi'], entry2['resn']))
                    if res2 is None:
                        continue

                    for atom1 in res1:
                        donor_hs = find_donor_hydrogens(atom1)
                        for atom2 in res2:
                            if is_acceptor(atom2):
                                for h in donor_hs:
                                    dist = np.linalg.norm(h.coord - atom2.coord)
                                    if dist > cutoff:
                                        continue
                                    angle = calculate_angle(atom1, h, atom2)
                                    if angle >= angle_cutoff:
                                        polar_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
                                        polar_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
    
    polar_contact_summary_final = {
        key: [
            {'chain': c, 'resi': r, 'resn': n}
            for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
        ]
        for key, res_set in polar_summary.items()
    }

    return polar_contact_summary_final



def set_default_styles(viewer, summary, entity_color_dict,
                       add_label=True):
    viewer.setStyle({'hetflag': True}, {"stick": {}})
    for entity, color in entity_color_dict.items():
        label_style = {
                'fontOpacity':1,
                'backgroundColor': 'black',
                'fontColor': generate_color_bright(entity),
                'fontSize': 10,
            }
        # 只处理蛋白、DNA、RNA等链，不处理ligand等小分子
        # TODO: 检查1L9Z
        if "(protein)" in entity.lower():
            # 提取链ID
            chain_id = entity.split()[1]
            viewer.setStyle({'chain': chain_id},
                            {'cartoon': {'arrows': True,
                                         'color': color,
                                         'opacity': 0.9}})
            if add_label:
                viewer.addLabel(entity, label_style, {'chain':chain_id})
        elif '(dna)' in entity.lower() or '(rna)' in entity.lower():
            # 提取链ID
            chain_id = entity.split()[1]
            viewer.setStyle({'chain': chain_id},
                            {'cartoon': {'color': color,
                                         'nucleicAcid': True,
                                         'opacity': 0.8}})  # 碱基和磷酸用stick)
            if add_label:
                viewer.addLabel(entity, label_style, {'chain':chain_id})    

        # 处理离子 (entity 后缀有 (ion))
        elif '(ion)' in entity.lower():
            # element = entity.split()[0].upper()  # 提取元素符号，如 MG, NA
            for entry in summary.get(entity, []):
                # TODO: 1C3R 这个pdb的一个锌离子无法显示
                # 有的PDB离子的 chain 是空字符，需要处理
                chain = entry.get('chain', '').strip()
                resi = entry.get('resi', '').strip()
                # 用 sphere 表示离子
                sel = {'resi': int(resi)}
                if chain:
                    sel['chain'] = chain

                viewer.setStyle(sel, {'sphere': {'color': color, 'radius': 2.0}})
                viewer.zoomTo(sel)
                if add_label:
                    viewer.addLabel(entity, label_style, sel)
        else:
            # 例如 ligand，stick 显示
            for entry in summary[entity]:
                viewer.setStyle(
                    {'chain': entry['chain'], 'resi': int(entry['resi'])},
                    {'stick': {'color': color}}
                )
                cur_res_dict = {'chain': entry['chain'], 'resi': int(entry['resi'])}
                if add_label:
                    viewer.addLabel(entity, label_style, cur_res_dict)

def highlight_residues(viewer, residue_list, name='name',
                       style='stick',
                    #    color='yellowCarbon',
                    #    label_color='orange',
                    #    label_background=None,
                       font_size=15):
    """
    高亮显示指定的残基
    :param view: py3Dmol 视图对象
    :param residue_list: 残基列表 
    residue_list = [
        {'chain': 'A', 'resn': 'LYS', 'resi': '25'},
        {'chain': 'A', 'resn': 'ASP', 'resi': '40'},
    ]
    """
    color = generate_color_high(name)
    label_color = generate_color_dark(name)
    background_color = generate_color_low(name)
    label_style = {
        'fontOpacity':1,
        'showBackground': True,
        'backgroundColor': background_color,
        'backgroundOpacity': 0.5,
        'borderColor': 'grey',
        'fontColor': label_color,
        'fontSize': font_size
    }
    for res in residue_list:
        cur_res_dict = {'chain': res['chain'], 'resi': int(res['resi'])}

        # viewer.setStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {'cartoon': {'colorscheme': color}})
        viewer.addStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {style: {'color': color}})

        viewer.addLabel(f"{res['resn']} {res['resi']}", label_style, cur_res_dict)

    return viewer