File size: 13,748 Bytes
45368f2 4d885c1 45368f2 4d885c1 43c59ae 45368f2 43c59ae 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 43c59ae 45368f2 43c59ae 5905209 45368f2 7df8c54 45368f2 43c59ae 5905209 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 4d885c1 45368f2 5905209 43c59ae 45368f2 4d885c1 45368f2 b2f19ba 45368f2 43c59ae 45368f2 b2f19ba 43c59ae b2f19ba 4d885c1 5905209 4d885c1 5905209 4d885c1 5905209 4d885c1 5905209 4d885c1 5905209 4d885c1 45368f2 4d885c1 45368f2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 | from collections import defaultdict
import colorsys
import hashlib
import numpy as np
from tempfile import NamedTemporaryFile
from io import StringIO
from Bio.PDB import MMCIFParser, PDBParser, PDBIO
from collections import defaultdict
import colorsys
import hashlib
from Bio.PDB.NeighborSearch import NeighborSearch
from Bio.PDB.DSSP import DSSP
NUCLEIC_ACIDS = {
"A", "G", "C", "U", "T",
"DA", "DG", "DC", "DT", "DU"
}
AMINO_ACIDS = {
"ALA", "ARG", "ASN", "ASP", "CYS",
"GLN", "GLU", "GLY", "HIS", "ILE",
"LEU", "LYS", "MET", "PHE", "PRO",
"SER", "THR", "TRP", "TYR", "VAL",
"SEC", "PYL"
}
def find_donor_hydrogens(atom):
if atom.element not in {'N', 'O'}:
return []
hydrogens = []
for neighbor in atom.get_parent():
if neighbor.element == 'H':
hydrogens.append(neighbor)
return hydrogens
def is_acceptor(atom):
return atom.element in {'N', 'O'}
def calculate_angle(atom1, atom2, atom3):
v1 = atom1.coord - atom2.coord
v2 = atom3.coord - atom2.coord
cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
cos_theta = np.clip(cos_theta, -1.0, 1.0)
return np.degrees(np.arccos(cos_theta))
def get_text_content(file_path="static/gr_head.md"):
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
# 自动生成科研风格低饱和度颜色
def generate_color_low(name):
hash_digest = hashlib.md5(name.encode()).hexdigest()
hue = int(hash_digest, 16) % 360 / 360.0
lightness = 0.75
saturation = 0.3
rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
# 自动生成科研风格高饱和度颜色
def generate_color(name, lightness=0.5, saturation=0.9):
hash_digest = hashlib.md5(name.encode()).hexdigest()
hue = int(hash_digest, 16) % 360 / 360.0
rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))
def generate_color_high(name):
return generate_color(name, lightness=0.5, saturation=0.9)
def generate_color_dark(name):
return generate_color(name, lightness=0.2, saturation=0.9)
def generate_color_bright(name):
return generate_color(name, lightness=0.9, saturation=0.9)
# 给定实体名列表,生成颜色映射字典
def build_entity_color_dict(entity_list):
return {name: generate_color_low(name) for name in entity_list}
def classify_residue(residue):
hetfield, resseq, icode = residue.id
resname = residue.resname.strip()
if resname == 'HOH':
return 'Ignore', None
# 判断 HETATM 中的氨基酸是否为 peptide
if hetfield.startswith("H_"):
if resname in AMINO_ACIDS:
return 'Peptide', f"Chain {residue.get_parent().id} (Peptide)"
ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"}
if resname in ions:
return 'Ion', f"{resname} (Ion)"
return 'Ligand', f"{resname} (Ligand)"
elif hetfield == " ":
if resname in NUCLEIC_ACIDS:
if resname.startswith("D"):
return 'DNA', f"Chain {residue.get_parent().id} (DNA)"
else:
return 'RNA', f"Chain {residue.get_parent().id} (RNA)"
return 'Protein', f"Chain {residue.get_parent().id} (Protein)"
else:
return 'Other', f"{resname} (Other)"
def analyze_structure_combined(file_path):
if file_path.endswith(".cif"):
with open(file_path, 'r') as f:
content = f.read()
# 如果缺少 data_ 开头,就加上一个默认块名
if not content.lstrip().startswith("data_"):
content = "data_auto\n" + content
# 3. 写入临时 mmCIF 文件
with NamedTemporaryFile(suffix=".cif", delete=False, mode='w') as tmp:
tmp.write(content)
file_path = tmp.name
parser = MMCIFParser(QUIET=True)
elif file_path.endswith(".pdb") or file_path.endswith(".ent"):
parser = PDBParser(QUIET=True)
else:
raise ValueError("Unsupported file format. Only .cif and .pdb are supported.")
structure = parser.get_structure("structure", file_path)
summary = defaultdict(list)
for model in structure:
for chain in model:
for residue in chain:
rtype, key = classify_residue(residue)
if rtype == 'Ignore':
continue
resseq = residue.id[1]
resname = residue.resname.strip()
summary[key].append({
'chain': chain.id,
'resn': resname,
'resi': str(resseq),
'residue': residue
})
# 对 summary 的键进行排序,优先级:Protein > 其他以 "Chain" 开头的 > 其他
# 这样可以避免在setStyle 时一些配体离子等的style被覆盖
sorted_summary = dict(sorted(
summary.items(),
key=lambda x: (
x[0] != "Chain X (Protein)", # 将 Protein 放在最前
not x[0].startswith("Chain"), # 其他以 "Chain" 开头的其次
x[0] # 其他按字母顺序排序
)
))
return sorted_summary
def read_file(file_path):
if file_path is None:
return "<b style='color:red'>未提供结构文件</b>"
try:
with open(file_path, "r") as f:
structure_str = f.read()
except Exception as e:
return f"<b style='color:red'>读取文件失败: {e}</b>"
# file_format = file_path.split(".")[-1]
summary = analyze_structure_combined(file_path)
entity_color_dict = build_entity_color_dict(list(summary.keys()))
# 缓存用于后续交互
structure_dict = {
"structure_str": structure_str,
"summary": summary,
"entity_color_dict": entity_color_dict
}
return structure_str, summary, entity_color_dict, structure_dict
def extract_contact_residues(summary, selected_keys, cutoff=3.5):
entity_atoms = {key: [] for key in selected_keys}
atom_to_residue_info = {}
for key in selected_keys:
for entry in summary[key]:
residue = entry['residue']
for atom in residue:
entity_atoms[key].append(atom)
atom_to_residue_info[atom] = (key, residue)
all_atoms = sum(entity_atoms.values(), [])
ns = NeighborSearch(all_atoms)
close_contacts = ns.search_all(cutoff, level='A')
contact_summary = defaultdict(set)
seen = set()
for atom1, atom2 in close_contacts:
if atom1 == atom2:
continue
key1, res1 = atom_to_residue_info.get(atom1, (None, None))
key2, res2 = atom_to_residue_info.get(atom2, (None, None))
if key1 is None or key2 is None or key1 == key2:
continue
tag1 = (key1, res1.id)
tag2 = (key2, res2.id)
if (tag1, tag2) in seen or (tag2, tag1) in seen:
continue
seen.add((tag1, tag2))
contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
contact_summary_final = {
key: [
{'chain': c, 'resi': r, 'resn': n}
for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
]
for key, res_set in contact_summary.items()
}
return contact_summary_final
def extract_polar_contacts(summary, contact_summary, cutoff=3.5, angle_cutoff=120.0):
polar_summary = defaultdict(set)
residue_lookup = {}
# 建立残基索引
for key in summary:
for entry in summary[key]:
residue = entry['residue']
residue_lookup[(key, residue.get_parent().id, str(residue.id[1]), residue.resname.strip())] = residue
# 遍历 contact_summary
for key1, contacts1 in contact_summary.items():
for entry1 in contacts1:
res1 = residue_lookup.get((key1, entry1['chain'], entry1['resi'], entry1['resn']))
if res1 is None:
continue
for key2, contacts2 in contact_summary.items():
if key1 == key2:
continue
for entry2 in contacts2:
res2 = residue_lookup.get((key2, entry2['chain'], entry2['resi'], entry2['resn']))
if res2 is None:
continue
for atom1 in res1:
donor_hs = find_donor_hydrogens(atom1)
for atom2 in res2:
if is_acceptor(atom2):
for h in donor_hs:
dist = np.linalg.norm(h.coord - atom2.coord)
if dist > cutoff:
continue
angle = calculate_angle(atom1, h, atom2)
if angle >= angle_cutoff:
polar_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
polar_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))
polar_contact_summary_final = {
key: [
{'chain': c, 'resi': r, 'resn': n}
for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
]
for key, res_set in polar_summary.items()
}
return polar_contact_summary_final
def set_default_styles(viewer, summary, entity_color_dict,
add_label=True):
viewer.setStyle({'hetflag': True}, {"stick": {}})
for entity, color in entity_color_dict.items():
label_style = {
'fontOpacity':1,
'backgroundColor': 'black',
'fontColor': generate_color_bright(entity),
'fontSize': 10,
}
# 只处理蛋白、DNA、RNA等链,不处理ligand等小分子
# TODO: 检查1L9Z
if "(protein)" in entity.lower():
# 提取链ID
chain_id = entity.split()[1]
viewer.setStyle({'chain': chain_id},
{'cartoon': {'arrows': True,
'color': color,
'opacity': 0.9}})
if add_label:
viewer.addLabel(entity, label_style, {'chain':chain_id})
elif '(dna)' in entity.lower() or '(rna)' in entity.lower():
# 提取链ID
chain_id = entity.split()[1]
viewer.setStyle({'chain': chain_id},
{'cartoon': {'color': color,
'nucleicAcid': True,
'opacity': 0.8}}) # 碱基和磷酸用stick)
if add_label:
viewer.addLabel(entity, label_style, {'chain':chain_id})
# 处理离子 (entity 后缀有 (ion))
elif '(ion)' in entity.lower():
# element = entity.split()[0].upper() # 提取元素符号,如 MG, NA
for entry in summary.get(entity, []):
# TODO: 1C3R 这个pdb的一个锌离子无法显示
# 有的PDB离子的 chain 是空字符,需要处理
chain = entry.get('chain', '').strip()
resi = entry.get('resi', '').strip()
# 用 sphere 表示离子
sel = {'resi': int(resi)}
if chain:
sel['chain'] = chain
viewer.setStyle(sel, {'sphere': {'color': color, 'radius': 2.0}})
viewer.zoomTo(sel)
if add_label:
viewer.addLabel(entity, label_style, sel)
else:
# 例如 ligand,stick 显示
for entry in summary[entity]:
viewer.setStyle(
{'chain': entry['chain'], 'resi': int(entry['resi'])},
{'stick': {'color': color}}
)
cur_res_dict = {'chain': entry['chain'], 'resi': int(entry['resi'])}
if add_label:
viewer.addLabel(entity, label_style, cur_res_dict)
def highlight_residues(viewer, residue_list, name='name',
style='stick',
# color='yellowCarbon',
# label_color='orange',
# label_background=None,
font_size=15):
"""
高亮显示指定的残基
:param view: py3Dmol 视图对象
:param residue_list: 残基列表
residue_list = [
{'chain': 'A', 'resn': 'LYS', 'resi': '25'},
{'chain': 'A', 'resn': 'ASP', 'resi': '40'},
]
"""
color = generate_color_high(name)
label_color = generate_color_dark(name)
background_color = generate_color_low(name)
label_style = {
'fontOpacity':1,
'showBackground': True,
'backgroundColor': background_color,
'backgroundOpacity': 0.5,
'borderColor': 'grey',
'fontColor': label_color,
'fontSize': font_size
}
for res in residue_list:
cur_res_dict = {'chain': res['chain'], 'resi': int(res['resi'])}
# viewer.setStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {'cartoon': {'colorscheme': color}})
viewer.addStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {style: {'color': color}})
viewer.addLabel(f"{res['resn']} {res['resi']}", label_style, cur_res_dict)
return viewer
|