Spaces:

mrzhang
/

generate_cst_tools

Build error

App Files Files Community

generate_cst_tools / extract_ligand_binding_sites.py

mrzhang

Upload extract_ligand_binding_sites.py

f63fd21 over 3 years ago

raw

history blame contribute delete

11.5 kB

	#!/usr/bin/env python3

	import sys
	import os
	import json
	import subprocess

	import pyrosetta
	from pyrosetta import rosetta

	def real_heavy_atoms(pose, residue):
	'''Return the indices of heavy atoms that are not virtual.'''
	rh_atoms = []

	for i in range(1, pose.residue(residue).nheavyatoms() + 1):
	if not pose.residue(residue).is_virtual(i):
	rh_atoms.append(i)

	return rh_atoms

	def min_heavy_atom_b_factor(pose, residue):
	'''Return the minimal heavy atom b factor of a residue.'''
	rh_atoms = real_heavy_atoms(pose, residue)
	b_factors = [pose.pdb_info().bfactor(residue, i) for i in rh_atoms]

	return min(b_factors)

	def average_heavy_atom_b_factor(pose, residue):
	'''Return the heavy atom b factor of a residue.'''
	rh_atoms = real_heavy_atoms(pose, residue)

	b_factors = [pose.pdb_info().bfactor(residue, i) for i in rh_atoms]

	return sum(b_factors) / len(b_factors)

	def minimize_pose(pose):
	'''Minimize a pose.'''
	mm = rosetta.core.kinematics.MoveMap()
	mm.set_bb(True)
	mm.set_chi(True)
	mm.set_jump(True)
	min_opts = rosetta.core.optimization.MinimizerOptions( "lbfgs_armijo_nonmonotone", 0.01, True )

	min_mover = rosetta.protocols.minimization_packing.MinMover()
	min_mover = rosetta.protocols.minimization_packing.MinMover()
	min_mover.movemap(mm)

	min_mover.apply(pose)

	def fast_relax(pose, fast_relax_rounds=1, use_ex_rotamers=False):
	'''Relax a pose.'''
	rosetta.basic.options.set_boolean_option('relax:constrain_relax_to_start_coords', True)
	rosetta.basic.options.set_boolean_option('relax:ramp_constraints', True)

	task_factory = rosetta.core.pack.task.TaskFactory()

	if use_ex_rotamers:
	ers = rosetta.core.pack.task.operation.ExtraRotamersGeneric()
	ers.ex1(True)
	ers.ex2(True)
	ers.extrachi_cutoff(18)
	task_factory.push_back(ers)

	task_factory.push_back(rosetta.core.pack.task.operation.RestrictToRepacking())
	lac = rosetta.protocols.task_operations.LimitAromaChi2Operation()
	task_factory.push_back(lac)

	sfxn = rosetta.core.scoring.get_score_function()
	fast_relax_mover = rosetta.protocols.relax.FastRelax(sfxn, fast_relax_rounds)

	fast_relax_mover.apply(pose)

	def find_ligand_residues(pose, size_min=0, size_max=float('inf')):
	'''Find the ligand residues in a pose. The number of heavy atoms of the
	ligand should be within the range [size_min, size_max].
	Return a list of residue numbers.
	'''

	ligand_residues = []

	for i in range(1, pose.size() + 1):
	if pose.residue(i).is_ligand():
	if size_min <= len(real_heavy_atoms(pose, i)) <= size_max:
	ligand_residues.append(i)

	return ligand_residues

	def min_heavy_atom_distance(pose, residue1, residue2):
	'''Return the minimum heavy atom distance
	between two residues.
	'''
	min_distance = float('inf')

	for a1 in real_heavy_atoms(pose, residue1):
	for a2 in real_heavy_atoms(pose, residue2):
	dist = pose.residue(residue1).xyz(a1).distance(pose.residue(residue2).xyz(a2))

	if dist < min_distance:
	min_distance = dist

	return min_distance

	def residue_residue_energies(pose, res1, res2):
	'''Get the interaction energies between two residues.'''
	e_edge = pose.energies().energy_graph().find_energy_edge(res1, res2)

	energies = {}

	if e_edge:
	energies['fa_atr'] = e_edge[rosetta.core.scoring.fa_atr]
	energies['fa_rep'] = e_edge[rosetta.core.scoring.fa_rep]
	energies['fa_sol'] = e_edge[rosetta.core.scoring.fa_sol]
	energies['fa_elec'] = e_edge[rosetta.core.scoring.fa_elec]
	energies['lk_ball_wtd'] = e_edge[rosetta.core.scoring.lk_ball_wtd]
	energies['hbond_bb_sc'] = e_edge[rosetta.core.scoring.hbond_bb_sc]
	energies['hbond_sc'] = e_edge[rosetta.core.scoring.hbond_sc]
	energies['weighted_total'] = e_edge.dot(pose.energies().weights())

	return energies
	else:
	return None

	def get_interaction_residues(pose, target_residue, distance_cutoff, a_e_h_energy_cutoff, rep_cutoff=5, residue_total_energy_cutoff=50):
	'''Get the residues interacting with a target residue.
	The pose should be scored.
	Only return the residues that satisfy the distance and energy cutoffs.
	The only attraction, electrostatic and hydrogne bond energies are considered.
	Return a list of [residue_rosetta_number, residue_pdb_number, residue_type, interaction_energies].
	'''
	interaction_residues = []

	canonical_aa = ['ALA', 'PRO', 'VAL', 'LEU', 'ILE', 'MET',
	'PHE', 'TYR', 'TRP', 'SER', 'THR', 'CYS',
	'LYS', 'ARG', 'HIS', 'ASP', 'GLU', 'ASN',
	'GLN', 'GLY']


	for i in range(1, pose.size() + 1):
	if i == target_residue: continue

	# Only allow canonical protein residues
	if not pose.residue(i).is_protein(): continue
	if not pose.residue(i).name3() in canonical_aa: continue

	d = min_heavy_atom_distance(pose, i, target_residue)

	if d > distance_cutoff: continue

	energies = residue_residue_energies(pose, i, target_residue)
	energies['residue_total_energy'] = pose.energies().residue_total_energy(i)

	if energies is None:
	continue

	a_e_h_energies = sum(energies[k] for k in ['fa_atr', 'fa_elec', 'hbond_bb_sc', 'hbond_sc'])

	if a_e_h_energies < a_e_h_energy_cutoff and energies['fa_rep'] < rep_cutoff \
	and energies['residue_total_energy'] < residue_total_energy_cutoff and min_heavy_atom_b_factor(pose, i) > 0:

	ir = [i, pose.pdb_info().pose2pdb(i), pose.residue(i).name3(), energies]
	interaction_residues.append(ir)

	return interaction_residues

	def extract_subpose(pose, residues):
	'''Extract a sub-pose from a pose.
	Return the new pose.
	'''
	seqposes = rosetta.utility.vector1_unsigned_long()
	for seqpos in residues:
	seqposes.append(seqpos)

	new_pose = rosetta.core.pose.Pose()
	rosetta.core.pose.pdbslice(new_pose, pose, seqposes)

	return new_pose

	def generate_params_file_for_ligand(ligand_pdb, ligand_name, ligand_id, output_path):
	'''Generate params file for a ligand.'''
	with open('./site_settings.json', 'r') as f:
	site_settings = json.load(f)

	openbabel = site_settings['openbabel']
	molfile_to_params_script = site_settings['molfile_to_params_script']

	abs_ligand_pdb = os.path.abspath(ligand_pdb)

	# Go to the output path

	cwd = os.getcwd()
	os.chdir(output_path)

	# Run OpenBabel to convert the ligand PDB file to mol2 file

	subprocess.call([openbabel, '-ipdb', abs_ligand_pdb, '-omol2', '-O', 'ligand_{0}.mol2'.format(ligand_id)])

	# Generate params file using the molfile_to_params.py script

	generate_params_cmd =[
	'python2', molfile_to_params_script,
	'--clobber', # Overwrite existing files
	'--no-pdb',
	'-n', ligand_name,
	'-p', 'ligand_{0}'.format(ligand_id),
	'--conformers-in-one-file',
	'--keep-names',
	'ligand_{0}.mol2'.format(ligand_id)]

	# subprocess.call(generate_params_cmd)

	# Go back to the current working directory

	os.chdir(cwd)


	def get_binding_sites_for_a_structure(pdb_file, output_path, b_factor_cutoff=60, min_binding_site_size=2, minimize=False, relax=False):
	''''''

	# Load and score the structure

	pose = pyrosetta.pose_from_file(pdb_file)
	sfxn = rosetta.core.scoring.get_score_function()
	sfxn(pose)

	# Relax the pose if required

	if minimize:
	minimize_pose(pose)
	if relax:
	fast_relax(pose)

	# Find the ligand residues that have no more than 100 heavy atoms

	ligand_residues = find_ligand_residues(pose, size_min=1, size_max=100)

	ligand_id = 0
	ligand_names = set()

	for ligand in ligand_residues:

	# Skip ligands that have high b factors

	if average_heavy_atom_b_factor(pose, ligand) > b_factor_cutoff: continue

	# Skip the ligand if the same type of ligand has already been extracted
	# This is to avoid over counting for ligands in structures that have multiple chains

	ligand_name = pose.residue(ligand).name3()
	print(ligand_name)
	if ligand_name in ligand_names:
	continue

	# Skip the ligand if the ligand has bad energy (probably due to clashing)

	ligand_total_energy = pose.energies().residue_total_energy(ligand)
	if ligand_total_energy > 10:
	continue

	interaction_residues = get_interaction_residues(pose, ligand, distance_cutoff=5, a_e_h_energy_cutoff=-1)
	binding_pose = extract_subpose(pose, [ligand] + list(i[0] for i in interaction_residues))
	ligand_pose = extract_subpose(pose, [ligand])

	# Get the binding site information

	binding_site_info = {}
	binding_site_info['pdb_file'] = pdb_file
	binding_site_info['ligand_name'] = pose.residue(ligand).name3()
	binding_site_info['ligand_n_heavy_atoms'] = len(real_heavy_atoms(pose, ligand))
	binding_site_info['ligand_number'] = pose.pdb_info().pose2pdb(ligand)
	binding_site_info['ligand_total_energy'] = ligand_total_energy
	binding_site_info['binding_site_energies'] = interaction_residues

	# Skip the ligand if there are too few binding site residues

	if len(interaction_residues) < min_binding_site_size:
	continue

	# Make the output dir

	os.makedirs(output_path, exist_ok=True)

	# Write the outputs

	ligand_names.add(ligand_name) # record the ligand name

	with open(os.path.join(output_path, 'binding_site_info_{0}.json'.format(ligand_id)), 'w') as f:
	json.dump(binding_site_info, f, indent=' ')

	binding_pose.dump_pdb(os.path.join(output_path, 'binding_site_{0}.pdb.gz'.format(ligand_id)))
	ligand_pdb = os.path.join(output_path, 'ligand_{0}.pdb.gz'.format(ligand_id))
	ligand_pose.dump_pdb(ligand_pdb)

	# Generate params files for ligands


	generate_params_file_for_ligand(ligand_pdb, ligand_pose.residue(1).name3(), ligand_id, output_path)

	ligand_id += 1

	import os
	import numpy
	def extract_ligand_binding_sites(input_path):#生成binding_site等四类文件
	#if __name__ == '__main__':
	i= 0
	# job_id = int(sys.argv[1])
	os.chdir("/home/bio/workshop/zhang/")
	filename=input_path.name.split('/')[-1][0:4]

	input_path = './input/input_pdb'
	output_path = './output/out_binding_site'


	pdb_paths = sorted(os.listdir(input_path))
	job_id=pdb_paths.index(filename)#查找input_path的文件列表的位置，返回index
	pdb_path = pdb_paths[job_id ]
	print(filename)
	print(job_id)
	pyrosetta.init()
	for f in os.listdir(os.path.join(input_path, pdb_path)):
	i=i+1
	pdb_file = os.path.join(input_path, pdb_path, f)
	print(f)
	print(i)

	output_path_single = os.path.join(output_path, pdb_path, f)

	get_binding_sites_for_a_structure(pdb_file, output_path_single)