File size: 13,748 Bytes
45368f2
 
 
4d885c1
45368f2
4d885c1
43c59ae
 
45368f2
 
 
 
43c59ae
4d885c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45368f2
 
 
 
 
 
4d885c1
45368f2
4d885c1
45368f2
4d885c1
45368f2
 
 
 
4d885c1
45368f2
4d885c1
45368f2
 
 
4d885c1
 
 
 
 
 
 
 
 
 
45368f2
 
4d885c1
45368f2
 
 
 
 
 
 
4d885c1
 
 
 
 
45368f2
 
4d885c1
 
 
45368f2
4d885c1
45368f2
 
 
 
4d885c1
45368f2
4d885c1
 
45368f2
 
 
43c59ae
 
 
 
 
 
 
 
 
45368f2
43c59ae
5905209
45368f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7df8c54
 
 
 
 
 
 
 
 
 
 
45368f2
43c59ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5905209
45368f2
 
 
 
4d885c1
45368f2
 
 
 
 
 
 
4d885c1
45368f2
 
 
 
4d885c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45368f2
 
 
 
 
 
 
 
 
 
 
4d885c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45368f2
 
5905209
43c59ae
45368f2
 
 
 
4d885c1
45368f2
 
 
b2f19ba
 
45368f2
 
43c59ae
 
 
 
45368f2
 
b2f19ba
 
 
 
43c59ae
 
 
b2f19ba
 
 
4d885c1
 
5905209
4d885c1
5905209
4d885c1
 
 
 
5905209
4d885c1
 
5905209
4d885c1
5905209
4d885c1
 
45368f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d885c1
 
 
45368f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
from collections import defaultdict
import colorsys
import hashlib
import numpy as np

from tempfile import NamedTemporaryFile
from io import StringIO
from Bio.PDB import MMCIFParser, PDBParser, PDBIO
from collections import defaultdict
import colorsys
import hashlib
from Bio.PDB.NeighborSearch import NeighborSearch
from Bio.PDB.DSSP import DSSP

NUCLEIC_ACIDS = {
    "A", "G", "C", "U", "T",
    "DA", "DG", "DC", "DT", "DU"
}

AMINO_ACIDS = {
    "ALA", "ARG", "ASN", "ASP", "CYS",
    "GLN", "GLU", "GLY", "HIS", "ILE",
    "LEU", "LYS", "MET", "PHE", "PRO",
    "SER", "THR", "TRP", "TYR", "VAL",
    "SEC", "PYL"
}

def find_donor_hydrogens(atom):
    if atom.element not in {'N', 'O'}:
        return []
    hydrogens = []
    for neighbor in atom.get_parent():
        if neighbor.element == 'H':
            hydrogens.append(neighbor)
    return hydrogens

def is_acceptor(atom):
    return atom.element in {'N', 'O'}

def calculate_angle(atom1, atom2, atom3):
    v1 = atom1.coord - atom2.coord
    v2 = atom3.coord - atom2.coord
    cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    cos_theta = np.clip(cos_theta, -1.0, 1.0)
    return np.degrees(np.arccos(cos_theta))

def get_text_content(file_path="static/gr_head.md"):
    with open(file_path, "r", encoding="utf-8") as f:
        return f.read()
    
# 自动生成科研风格低饱和度颜色
def generate_color_low(name):
    hash_digest = hashlib.md5(name.encode()).hexdigest()
    hue = int(hash_digest, 16) % 360 / 360.0
    lightness = 0.75
    saturation = 0.3
    rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))

# 自动生成科研风格高饱和度颜色
def generate_color(name, lightness=0.5, saturation=0.9):
    hash_digest = hashlib.md5(name.encode()).hexdigest()
    hue = int(hash_digest, 16) % 360 / 360.0
    rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))


def generate_color_high(name):
    return generate_color(name, lightness=0.5, saturation=0.9)

def generate_color_dark(name):
    return generate_color(name, lightness=0.2, saturation=0.9)

def generate_color_bright(name):
    return generate_color(name, lightness=0.9, saturation=0.9)

# 给定实体名列表,生成颜色映射字典
def build_entity_color_dict(entity_list):
    return {name: generate_color_low(name) for name in entity_list}

def classify_residue(residue):
    hetfield, resseq, icode = residue.id
    resname = residue.resname.strip()

    if resname == 'HOH':
        return 'Ignore', None
    
    # 判断 HETATM 中的氨基酸是否为 peptide
    if hetfield.startswith("H_"):
        if resname in AMINO_ACIDS:
            return 'Peptide', f"Chain {residue.get_parent().id} (Peptide)"
        ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"}
        if resname in ions:
            return 'Ion', f"{resname} (Ion)"
        return 'Ligand', f"{resname} (Ligand)"

    elif hetfield == " ":
        if resname in NUCLEIC_ACIDS:
            if resname.startswith("D"):
                return 'DNA', f"Chain {residue.get_parent().id} (DNA)"
            else:
                return 'RNA', f"Chain {residue.get_parent().id} (RNA)"
        return 'Protein', f"Chain {residue.get_parent().id} (Protein)"
    else:
        return 'Other', f"{resname} (Other)"


def analyze_structure_combined(file_path):
    if file_path.endswith(".cif"):
        with open(file_path, 'r') as f:
            content = f.read()
        # 如果缺少 data_ 开头,就加上一个默认块名
        if not content.lstrip().startswith("data_"):
            content = "data_auto\n" + content
        # 3. 写入临时 mmCIF 文件
        with NamedTemporaryFile(suffix=".cif", delete=False, mode='w') as tmp:
            tmp.write(content)
            file_path = tmp.name
        parser = MMCIFParser(QUIET=True)

    elif file_path.endswith(".pdb") or file_path.endswith(".ent"):
        parser = PDBParser(QUIET=True)
    else:
        raise ValueError("Unsupported file format. Only .cif and .pdb are supported.")

    structure = parser.get_structure("structure", file_path)
    summary = defaultdict(list)

    for model in structure:
        for chain in model:
            for residue in chain:
                rtype, key = classify_residue(residue)
                if rtype == 'Ignore':
                    continue

                resseq = residue.id[1]
                resname = residue.resname.strip()
                summary[key].append({
                    'chain': chain.id,
                    'resn': resname,
                    'resi': str(resseq),
                    'residue': residue
                })

    # 对 summary 的键进行排序,优先级:Protein > 其他以 "Chain" 开头的 > 其他
    # 这样可以避免在setStyle 时一些配体离子等的style被覆盖
    sorted_summary = dict(sorted(
        summary.items(),
        key=lambda x: (
            x[0] != "Chain X (Protein)",  # 将 Protein 放在最前
            not x[0].startswith("Chain"),  # 其他以 "Chain" 开头的其次
            x[0]  # 其他按字母顺序排序
        )
    ))
    return sorted_summary

def read_file(file_path):
    if file_path is None:
        return "<b style='color:red'>未提供结构文件</b>"

    try:
        with open(file_path, "r") as f:
            structure_str = f.read()
    except Exception as e:
        return f"<b style='color:red'>读取文件失败: {e}</b>"

    # file_format = file_path.split(".")[-1]
    summary = analyze_structure_combined(file_path)
    entity_color_dict = build_entity_color_dict(list(summary.keys()))

    # 缓存用于后续交互
    structure_dict = {
        "structure_str": structure_str,
        "summary": summary,
        "entity_color_dict": entity_color_dict
    }

    return structure_str, summary, entity_color_dict, structure_dict


def extract_contact_residues(summary, selected_keys, cutoff=3.5):
    entity_atoms = {key: [] for key in selected_keys}
    atom_to_residue_info = {}

    for key in selected_keys:
        for entry in summary[key]:
            residue = entry['residue']
            for atom in residue:
                entity_atoms[key].append(atom)
                atom_to_residue_info[atom] = (key, residue)

    all_atoms = sum(entity_atoms.values(), [])
    ns = NeighborSearch(all_atoms)
    close_contacts = ns.search_all(cutoff, level='A')

    contact_summary = defaultdict(set)
    seen = set()

    for atom1, atom2 in close_contacts:
        if atom1 == atom2:
            continue
        key1, res1 = atom_to_residue_info.get(atom1, (None, None))
        key2, res2 = atom_to_residue_info.get(atom2, (None, None))
        if key1 is None or key2 is None or key1 == key2:
            continue

        tag1 = (key1, res1.id)
        tag2 = (key2, res2.id)
        if (tag1, tag2) in seen or (tag2, tag1) in seen:
            continue
        seen.add((tag1, tag2))

        contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
        contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))

    contact_summary_final = {
        key: [
            {'chain': c, 'resi': r, 'resn': n}
            for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
        ]
        for key, res_set in contact_summary.items()
    }

    return contact_summary_final

def extract_polar_contacts(summary, contact_summary, cutoff=3.5, angle_cutoff=120.0):
    polar_summary = defaultdict(set)
    residue_lookup = {}
    
    # 建立残基索引
    for key in summary:
        for entry in summary[key]:
            residue = entry['residue']
            residue_lookup[(key, residue.get_parent().id, str(residue.id[1]), residue.resname.strip())] = residue

    # 遍历 contact_summary
    for key1, contacts1 in contact_summary.items():
        for entry1 in contacts1:
            res1 = residue_lookup.get((key1, entry1['chain'], entry1['resi'], entry1['resn']))
            if res1 is None:
                continue

            for key2, contacts2 in contact_summary.items():
                if key1 == key2:
                    continue

                for entry2 in contacts2:
                    res2 = residue_lookup.get((key2, entry2['chain'], entry2['resi'], entry2['resn']))
                    if res2 is None:
                        continue

                    for atom1 in res1:
                        donor_hs = find_donor_hydrogens(atom1)
                        for atom2 in res2:
                            if is_acceptor(atom2):
                                for h in donor_hs:
                                    dist = np.linalg.norm(h.coord - atom2.coord)
                                    if dist > cutoff:
                                        continue
                                    angle = calculate_angle(atom1, h, atom2)
                                    if angle >= angle_cutoff:
                                        polar_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
                                        polar_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
    
    polar_contact_summary_final = {
        key: [
            {'chain': c, 'resi': r, 'resn': n}
            for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
        ]
        for key, res_set in polar_summary.items()
    }

    return polar_contact_summary_final



def set_default_styles(viewer, summary, entity_color_dict,
                       add_label=True):
    viewer.setStyle({'hetflag': True}, {"stick": {}})
    for entity, color in entity_color_dict.items():
        label_style = {
                'fontOpacity':1,
                'backgroundColor': 'black',
                'fontColor': generate_color_bright(entity),
                'fontSize': 10,
            }
        # 只处理蛋白、DNA、RNA等链,不处理ligand等小分子
        # TODO: 检查1L9Z
        if "(protein)" in entity.lower():
            # 提取链ID
            chain_id = entity.split()[1]
            viewer.setStyle({'chain': chain_id},
                            {'cartoon': {'arrows': True,
                                         'color': color,
                                         'opacity': 0.9}})
            if add_label:
                viewer.addLabel(entity, label_style, {'chain':chain_id})
        elif '(dna)' in entity.lower() or '(rna)' in entity.lower():
            # 提取链ID
            chain_id = entity.split()[1]
            viewer.setStyle({'chain': chain_id},
                            {'cartoon': {'color': color,
                                         'nucleicAcid': True,
                                         'opacity': 0.8}})  # 碱基和磷酸用stick)
            if add_label:
                viewer.addLabel(entity, label_style, {'chain':chain_id})    

        # 处理离子 (entity 后缀有 (ion))
        elif '(ion)' in entity.lower():
            # element = entity.split()[0].upper()  # 提取元素符号,如 MG, NA
            for entry in summary.get(entity, []):
                # TODO: 1C3R 这个pdb的一个锌离子无法显示
                # 有的PDB离子的 chain 是空字符,需要处理
                chain = entry.get('chain', '').strip()
                resi = entry.get('resi', '').strip()
                # 用 sphere 表示离子
                sel = {'resi': int(resi)}
                if chain:
                    sel['chain'] = chain

                viewer.setStyle(sel, {'sphere': {'color': color, 'radius': 2.0}})
                viewer.zoomTo(sel)
                if add_label:
                    viewer.addLabel(entity, label_style, sel)
        else:
            # 例如 ligand,stick 显示
            for entry in summary[entity]:
                viewer.setStyle(
                    {'chain': entry['chain'], 'resi': int(entry['resi'])},
                    {'stick': {'color': color}}
                )
                cur_res_dict = {'chain': entry['chain'], 'resi': int(entry['resi'])}
                if add_label:
                    viewer.addLabel(entity, label_style, cur_res_dict)

def highlight_residues(viewer, residue_list, name='name',
                       style='stick',
                    #    color='yellowCarbon',
                    #    label_color='orange',
                    #    label_background=None,
                       font_size=15):
    """
    高亮显示指定的残基
    :param view: py3Dmol 视图对象
    :param residue_list: 残基列表 
    residue_list = [
        {'chain': 'A', 'resn': 'LYS', 'resi': '25'},
        {'chain': 'A', 'resn': 'ASP', 'resi': '40'},
    ]
    """
    color = generate_color_high(name)
    label_color = generate_color_dark(name)
    background_color = generate_color_low(name)
    label_style = {
        'fontOpacity':1,
        'showBackground': True,
        'backgroundColor': background_color,
        'backgroundOpacity': 0.5,
        'borderColor': 'grey',
        'fontColor': label_color,
        'fontSize': font_size
    }
    for res in residue_list:
        cur_res_dict = {'chain': res['chain'], 'resi': int(res['resi'])}

        # viewer.setStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {'cartoon': {'colorscheme': color}})
        viewer.addStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {style: {'color': color}})

        viewer.addLabel(f"{res['resn']} {res['resi']}", label_style, cur_res_dict)

    return viewer