Spaces:

EZ4Fanta
/

bindana

Sleeping

App Files Files Community

bindana / utils.py

EZ4Fanta

add multi ana

c549a30 9 months ago

raw

history blame contribute delete

13.7 kB

	from collections import defaultdict
	import colorsys
	import hashlib
	import numpy as np

	from tempfile import NamedTemporaryFile
	from io import StringIO
	from Bio.PDB import MMCIFParser, PDBParser, PDBIO
	from collections import defaultdict
	import colorsys
	import hashlib
	from Bio.PDB.NeighborSearch import NeighborSearch
	from Bio.PDB.DSSP import DSSP

	NUCLEIC_ACIDS = {
	"A", "G", "C", "U", "T",
	"DA", "DG", "DC", "DT", "DU"
	}

	AMINO_ACIDS = {
	"ALA", "ARG", "ASN", "ASP", "CYS",
	"GLN", "GLU", "GLY", "HIS", "ILE",
	"LEU", "LYS", "MET", "PHE", "PRO",
	"SER", "THR", "TRP", "TYR", "VAL",
	"SEC", "PYL"
	}

	def find_donor_hydrogens(atom):
	if atom.element not in {'N', 'O'}:
	return []
	hydrogens = []
	for neighbor in atom.get_parent():
	if neighbor.element == 'H':
	hydrogens.append(neighbor)
	return hydrogens

	def is_acceptor(atom):
	return atom.element in {'N', 'O'}

	def calculate_angle(atom1, atom2, atom3):
	v1 = atom1.coord - atom2.coord
	v2 = atom3.coord - atom2.coord
	cos_theta = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
	cos_theta = np.clip(cos_theta, -1.0, 1.0)
	return np.degrees(np.arccos(cos_theta))

	def get_text_content(file_path="static/gr_head.md"):
	with open(file_path, "r", encoding="utf-8") as f:
	return f.read()

	# 自动生成科研风格低饱和度颜色
	def generate_color_low(name):
	hash_digest = hashlib.md5(name.encode()).hexdigest()
	hue = int(hash_digest, 16) % 360 / 360.0
	lightness = 0.75
	saturation = 0.3
	rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
	return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]255), int(rgb[1]255), int(rgb[2]*255))

	# 自动生成科研风格高饱和度颜色
	def generate_color(name, lightness=0.5, saturation=0.9):
	hash_digest = hashlib.md5(name.encode()).hexdigest()
	hue = int(hash_digest, 16) % 360 / 360.0
	rgb = colorsys.hls_to_rgb(hue, lightness, saturation)
	return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]255), int(rgb[1]255), int(rgb[2]*255))


	def generate_color_high(name):
	return generate_color(name, lightness=0.5, saturation=0.9)

	def generate_color_dark(name):
	return generate_color(name, lightness=0.2, saturation=0.9)

	def generate_color_bright(name):
	return generate_color(name, lightness=0.9, saturation=0.9)

	# 给定实体名列表，生成颜色映射字典
	def build_entity_color_dict(entity_list):
	return {name: generate_color_low(name) for name in entity_list}

	def classify_residue(residue):
	hetfield, resseq, icode = residue.id
	resname = residue.resname.strip()

	if resname == 'HOH':
	return 'Ignore', None

	# 判断 HETATM 中的氨基酸是否为 peptide
	if hetfield.startswith("H_"):
	if resname in AMINO_ACIDS:
	return 'Peptide', f"Chain {residue.get_parent().id} (Peptide)"
	ions = {"NA", "CL", "K", "CA", "MG", "ZN", "FE", "MN", "CU", "CO"}
	if resname in ions:
	return 'Ion', f"{resname} (Ion)"
	return 'Ligand', f"{resname} (Ligand)"

	elif hetfield == " ":
	if resname in NUCLEIC_ACIDS:
	if resname.startswith("D"):
	return 'DNA', f"Chain {residue.get_parent().id} (DNA)"
	else:
	return 'RNA', f"Chain {residue.get_parent().id} (RNA)"
	return 'Protein', f"Chain {residue.get_parent().id} (Protein)"
	else:
	return 'Other', f"{resname} (Other)"


	def analyze_structure_combined(file_path):
	if file_path.endswith(".cif"):
	with open(file_path, 'r') as f:
	content = f.read()
	# 如果缺少 data_ 开头，就加上一个默认块名
	if not content.lstrip().startswith("data_"):
	content = "data_auto\n" + content
	# 3. 写入临时 mmCIF 文件
	with NamedTemporaryFile(suffix=".cif", delete=False, mode='w') as tmp:
	tmp.write(content)
	file_path = tmp.name
	parser = MMCIFParser(QUIET=True)

	elif file_path.endswith(".pdb") or file_path.endswith(".ent"):
	parser = PDBParser(QUIET=True)
	else:
	raise ValueError("Unsupported file format. Only .cif and .pdb are supported.")

	structure = parser.get_structure("structure", file_path)
	summary = defaultdict(list)

	for model in structure:
	for chain in model:
	for residue in chain:
	rtype, key = classify_residue(residue)
	if rtype == 'Ignore':
	continue

	resseq = residue.id[1]
	resname = residue.resname.strip()
	summary[key].append({
	'chain': chain.id,
	'resn': resname,
	'resi': str(resseq),
	'residue': residue
	})

	# 对 summary 的键进行排序，优先级：Protein > 其他以 "Chain" 开头的 > 其他
	# 这样可以避免在setStyle 时一些配体离子等的style被覆盖
	sorted_summary = dict(sorted(
	summary.items(),
	key=lambda x: (
	x[0] != "Chain X (Protein)", # 将 Protein 放在最前
	not x[0].startswith("Chain"), # 其他以 "Chain" 开头的其次
	x[0] # 其他按字母顺序排序
	)
	))
	return sorted_summary

	def read_file(file_path):
	if file_path is None:
	return "<b style='color:red'>未提供结构文件</b>"

	try:
	with open(file_path, "r") as f:
	structure_str = f.read()
	except Exception as e:
	return f"<b style='color:red'>读取文件失败: {e}</b>"

	# file_format = file_path.split(".")[-1]
	summary = analyze_structure_combined(file_path)
	entity_color_dict = build_entity_color_dict(list(summary.keys()))

	# 缓存用于后续交互
	structure_dict = {
	"structure_str": structure_str,
	"summary": summary,
	"entity_color_dict": entity_color_dict
	}

	return structure_str, summary, entity_color_dict, structure_dict


	def extract_contact_residues(summary, selected_keys, cutoff=3.5):
	entity_atoms = {key: [] for key in selected_keys}
	atom_to_residue_info = {}

	for key in selected_keys:
	for entry in summary[key]:
	residue = entry['residue']
	for atom in residue:
	entity_atoms[key].append(atom)
	atom_to_residue_info[atom] = (key, residue)

	all_atoms = sum(entity_atoms.values(), [])
	ns = NeighborSearch(all_atoms)
	close_contacts = ns.search_all(cutoff, level='A')

	contact_summary = defaultdict(set)
	seen = set()

	for atom1, atom2 in close_contacts:
	if atom1 == atom2:
	continue
	key1, res1 = atom_to_residue_info.get(atom1, (None, None))
	key2, res2 = atom_to_residue_info.get(atom2, (None, None))
	if key1 is None or key2 is None or key1 == key2:
	continue

	tag1 = (key1, res1.id)
	tag2 = (key2, res2.id)
	if (tag1, tag2) in seen or (tag2, tag1) in seen:
	continue
	seen.add((tag1, tag2))

	contact_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
	contact_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))

	contact_summary_final = {
	key: [
	{'chain': c, 'resi': r, 'resn': n}
	for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
	]
	for key, res_set in contact_summary.items()
	}

	return contact_summary_final

	def extract_polar_contacts(summary, contact_summary, cutoff=3.5, angle_cutoff=120.0):
	polar_summary = defaultdict(set)
	residue_lookup = {}

	# 建立残基索引
	for key in summary:
	for entry in summary[key]:
	residue = entry['residue']
	residue_lookup[(key, residue.get_parent().id, str(residue.id[1]), residue.resname.strip())] = residue

	# 遍历 contact_summary
	for key1, contacts1 in contact_summary.items():
	for entry1 in contacts1:
	res1 = residue_lookup.get((key1, entry1['chain'], entry1['resi'], entry1['resn']))
	if res1 is None:
	continue

	for key2, contacts2 in contact_summary.items():
	if key1 == key2:
	continue

	for entry2 in contacts2:
	res2 = residue_lookup.get((key2, entry2['chain'], entry2['resi'], entry2['resn']))
	if res2 is None:
	continue

	for atom1 in res1:
	donor_hs = find_donor_hydrogens(atom1)
	for atom2 in res2:
	if is_acceptor(atom2):
	for h in donor_hs:
	dist = np.linalg.norm(h.coord - atom2.coord)
	if dist > cutoff:
	continue
	angle = calculate_angle(atom1, h, atom2)
	if angle >= angle_cutoff:
	polar_summary[key1].add((res1.get_parent().id, str(res1.id[1]), res1.resname.strip()))
	polar_summary[key2].add((res2.get_parent().id, str(res2.id[1]), res2.resname.strip()))

	polar_contact_summary_final = {
	key: [
	{'chain': c, 'resi': r, 'resn': n}
	for (c, r, n) in sorted(res_set, key=lambda x: (x[0], int(x[1])))
	]
	for key, res_set in polar_summary.items()
	}

	return polar_contact_summary_final



	def set_default_styles(viewer, summary, entity_color_dict,
	add_label=True):
	viewer.setStyle({'hetflag': True}, {"stick": {}})
	for entity, color in entity_color_dict.items():
	label_style = {
	'fontOpacity':1,
	'backgroundColor': 'black',
	'fontColor': generate_color_bright(entity),
	'fontSize': 10,
	}
	# 只处理蛋白、DNA、RNA等链，不处理ligand等小分子
	# TODO: 检查1L9Z
	if "(protein)" in entity.lower():
	# 提取链ID
	chain_id = entity.split()[1]
	viewer.setStyle({'chain': chain_id},
	{'cartoon': {'arrows': True,
	'color': color,
	'opacity': 0.9}})
	if add_label:
	viewer.addLabel(entity, label_style, {'chain':chain_id})
	elif '(dna)' in entity.lower() or '(rna)' in entity.lower():
	# 提取链ID
	chain_id = entity.split()[1]
	viewer.setStyle({'chain': chain_id},
	{'cartoon': {'color': color,
	'nucleicAcid': True,
	'opacity': 0.8}}) # 碱基和磷酸用stick)
	if add_label:
	viewer.addLabel(entity, label_style, {'chain':chain_id})

	# 处理离子 (entity 后缀有 (ion))
	elif '(ion)' in entity.lower():
	# element = entity.split()[0].upper() # 提取元素符号，如 MG, NA
	for entry in summary.get(entity, []):
	# TODO: 1C3R 这个pdb的一个锌离子无法显示
	# 有的PDB离子的 chain 是空字符，需要处理
	chain = entry.get('chain', '').strip()
	resi = entry.get('resi', '').strip()
	# 用 sphere 表示离子
	sel = {'resi': int(resi)}
	if chain:
	sel['chain'] = chain

	viewer.setStyle(sel, {'sphere': {'color': color, 'radius': 2.0}})
	viewer.zoomTo(sel)
	if add_label:
	viewer.addLabel(entity, label_style, sel)
	else:
	# 例如 ligand，stick 显示
	for entry in summary[entity]:
	viewer.setStyle(
	{'chain': entry['chain'], 'resi': int(entry['resi'])},
	{'stick': {'color': color}}
	)
	cur_res_dict = {'chain': entry['chain'], 'resi': int(entry['resi'])}
	if add_label:
	viewer.addLabel(entity, label_style, cur_res_dict)

	def highlight_residues(viewer, residue_list, name='name',
	style='stick',
	# color='yellowCarbon',
	# label_color='orange',
	# label_background=None,
	font_size=15):
	"""
	高亮显示指定的残基
	:param view: py3Dmol 视图对象
	:param residue_list: 残基列表
	residue_list = [
	{'chain': 'A', 'resn': 'LYS', 'resi': '25'},
	{'chain': 'A', 'resn': 'ASP', 'resi': '40'},
	]
	"""
	color = generate_color_high(name)
	label_color = generate_color_dark(name)
	background_color = generate_color_low(name)
	label_style = {
	'fontOpacity':1,
	'showBackground': True,
	'backgroundColor': background_color,
	'backgroundOpacity': 0.5,
	'borderColor': 'grey',
	'fontColor': label_color,
	'fontSize': font_size
	}
	for res in residue_list:
	cur_res_dict = {'chain': res['chain'], 'resi': int(res['resi'])}

	# viewer.setStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {'cartoon': {'colorscheme': color}})
	viewer.addStyle({'chain': res['chain'], 'resi': int(res['resi'])}, {style: {'color': color}})

	viewer.addLabel(f"{res['resn']} {res['resi']}", label_style, cur_res_dict)

	return viewer