Spaces:
Sleeping
Sleeping
| from openbabel import pybel | |
| from meeko import MoleculePreparation | |
| from meeko import obutils | |
| from vina import Vina | |
| import subprocess | |
| import rdkit.Chem as Chem | |
| from rdkit.Chem import AllChem | |
| import tempfile | |
| import AutoDockTools | |
| import os | |
| import contextlib | |
| from utils.reconstruct import reconstruct_from_generated | |
| from utils.evaluation.docking_qvina import get_random_id, BaseDockingTask | |
| def supress_stdout(func): | |
| def wrapper(*a, **ka): | |
| with open(os.devnull, 'w') as devnull: | |
| with contextlib.redirect_stdout(devnull): | |
| return func(*a, **ka) | |
| return wrapper | |
| class PrepLig(object): | |
| def __init__(self, input_mol, mol_format): | |
| if mol_format == 'smi': | |
| self.ob_mol = pybel.readstring('smi', input_mol) | |
| elif mol_format == 'sdf': | |
| self.ob_mol = next(pybel.readfile(mol_format, input_mol)) | |
| else: | |
| raise ValueError(f'mol_format {mol_format} not supported') | |
| def addH(self, polaronly=False, correctforph=True, PH=7): | |
| self.ob_mol.OBMol.AddHydrogens(polaronly, correctforph, PH) | |
| obutils.writeMolecule(self.ob_mol.OBMol, 'tmp_h.sdf') | |
| def gen_conf(self): | |
| sdf_block = self.ob_mol.write('sdf') | |
| rdkit_mol = Chem.MolFromMolBlock(sdf_block, removeHs=False) | |
| AllChem.EmbedMolecule(rdkit_mol, Chem.rdDistGeom.ETKDGv3()) | |
| self.ob_mol = pybel.readstring('sdf', Chem.MolToMolBlock(rdkit_mol)) | |
| obutils.writeMolecule(self.ob_mol.OBMol, 'conf_h.sdf') | |
| def get_pdbqt(self, lig_pdbqt=None): | |
| preparator = MoleculePreparation() | |
| preparator.prepare(self.ob_mol.OBMol) | |
| if lig_pdbqt is not None: | |
| preparator.write_pdbqt_file(lig_pdbqt) | |
| return | |
| else: | |
| return preparator.write_pdbqt_string() | |
| class PrepProt(object): | |
| def __init__(self, pdb_file): | |
| self.prot = pdb_file | |
| def del_water(self, dry_pdb_file): # optional | |
| with open(self.prot) as f: | |
| lines = [l for l in f.readlines() if l.startswith('ATOM') or l.startswith('HETATM')] | |
| dry_lines = [l for l in lines if not 'HOH' in l] | |
| with open(dry_pdb_file, 'w') as f: | |
| f.write(''.join(dry_lines)) | |
| self.prot = dry_pdb_file | |
| def addH(self, prot_pqr): # call pdb2pqr | |
| self.prot_pqr = prot_pqr | |
| subprocess.Popen(['pdb2pqr30','--ff=AMBER',self.prot, self.prot_pqr], | |
| stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL).communicate() | |
| def get_pdbqt(self, prot_pdbqt): | |
| prepare_receptor = os.path.join(AutoDockTools.__path__[0], 'Utilities24/prepare_receptor4.py') | |
| subprocess.Popen(['python3', prepare_receptor, '-r', self.prot_pqr, '-o', prot_pdbqt], | |
| stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL).communicate() | |
| class VinaDock(object): | |
| def __init__(self, lig_pdbqt, prot_pdbqt): | |
| self.lig_pdbqt = lig_pdbqt | |
| self.prot_pdbqt = prot_pdbqt | |
| def _max_min_pdb(self, pdb, buffer): | |
| with open(pdb, 'r') as f: | |
| lines = [l for l in f.readlines() if l.startswith('ATOM') or l.startswith('HEATATM')] | |
| xs = [float(l[31:39]) for l in lines] | |
| ys = [float(l[39:47]) for l in lines] | |
| zs = [float(l[47:55]) for l in lines] | |
| print(max(xs), min(xs)) | |
| print(max(ys), min(ys)) | |
| print(max(zs), min(zs)) | |
| pocket_center = [(max(xs) + min(xs))/2, (max(ys) + min(ys))/2, (max(zs) + min(zs))/2] | |
| box_size = [(max(xs) - min(xs)) + buffer, (max(ys) - min(ys)) + buffer, (max(zs) - min(zs)) + buffer] | |
| return pocket_center, box_size | |
| def get_box(self, ref=None, buffer=0): | |
| ''' | |
| ref: reference pdb to define pocket. | |
| buffer: buffer size to add | |
| if ref is not None: | |
| get the max and min on x, y, z axis in ref pdb and add buffer to each dimension | |
| else: | |
| use the entire protein to define pocket | |
| ''' | |
| if ref is None: | |
| ref = self.prot_pdbqt | |
| self.pocket_center, self.box_size = self._max_min_pdb(ref, buffer) | |
| print(self.pocket_center, self.box_size) | |
| def dock(self, score_func='vina', seed=0, mode='dock', exhaustiveness=8, save_pose=False, **kwargs): # seed=0 mean random seed | |
| v = Vina(sf_name=score_func, seed=seed, verbosity=0, **kwargs) | |
| v.set_receptor(self.prot_pdbqt) | |
| v.set_ligand_from_file(self.lig_pdbqt) | |
| v.compute_vina_maps(center=self.pocket_center, box_size=self.box_size) | |
| if mode == 'score_only': | |
| score = v.score()[0] | |
| elif mode == 'minimize': | |
| score = v.optimize()[0] | |
| elif mode == 'dock': | |
| v.dock(exhaustiveness=exhaustiveness, n_poses=1) | |
| score = v.energies(n_poses=1)[0][0] | |
| else: | |
| raise ValueError | |
| if not save_pose: | |
| return score | |
| else: | |
| if mode == 'score_only': | |
| pose = None | |
| elif mode == 'minimize': | |
| tmp = tempfile.NamedTemporaryFile() | |
| with open(tmp.name, 'w') as f: | |
| v.write_pose(tmp.name, overwrite=True) | |
| with open(tmp.name, 'r') as f: | |
| pose = f.read() | |
| elif mode == 'dock': | |
| pose = v.poses(n_poses=1) | |
| else: | |
| raise ValueError | |
| return score, pose | |
| class VinaDockingTask(BaseDockingTask): | |
| def from_generated_data(cls, data, protein_root='./data/crossdocked', **kwargs): | |
| # load original pdb | |
| protein_fn = os.path.join( | |
| os.path.dirname(data.ligand_filename), | |
| os.path.basename(data.ligand_filename)[:10] + '.pdb' # PDBId_Chain_rec.pdb | |
| ) | |
| protein_path = os.path.join(protein_root, protein_fn) | |
| ligand_rdmol = reconstruct_from_generated(data.clone()) | |
| return cls(protein_path, ligand_rdmol, **kwargs) | |
| def from_original_data(cls, data, ligand_root='./data/crossdocked_pocket10', protein_root='./data/crossdocked', | |
| **kwargs): | |
| protein_fn = os.path.join( | |
| os.path.dirname(data.ligand_filename), | |
| os.path.basename(data.ligand_filename)[:10] + '.pdb' | |
| ) | |
| protein_path = os.path.join(protein_root, protein_fn) | |
| ligand_path = os.path.join(ligand_root, data.ligand_filename) | |
| ligand_rdmol = next(iter(Chem.SDMolSupplier(ligand_path))) | |
| return cls(protein_path, ligand_rdmol, **kwargs) | |
| def from_generated_mol(cls, ligand_rdmol, ligand_filename, protein_root='./data/crossdocked', **kwargs): | |
| # load original pdb | |
| protein_fn = os.path.join( | |
| os.path.dirname(ligand_filename), | |
| os.path.basename(ligand_filename)[:10] + '.pdb' # PDBId_Chain_rec.pdb | |
| ) | |
| protein_path = os.path.join(protein_root, protein_fn) | |
| return cls(protein_path, ligand_rdmol, **kwargs) | |
| def __init__(self, protein_path, ligand_rdmol, tmp_dir='./tmp', center=None, | |
| size_factor=1., buffer=5.0): | |
| super().__init__(protein_path, ligand_rdmol) | |
| # self.conda_env = conda_env | |
| self.tmp_dir = os.path.realpath(tmp_dir) | |
| os.makedirs(tmp_dir, exist_ok=True) | |
| self.task_id = get_random_id() | |
| self.receptor_id = self.task_id + '_receptor' | |
| self.ligand_id = self.task_id + '_ligand' | |
| self.receptor_path = protein_path | |
| self.ligand_path = os.path.join(self.tmp_dir, self.ligand_id + '.sdf') | |
| self.recon_ligand_mol = ligand_rdmol | |
| ligand_rdmol = Chem.AddHs(ligand_rdmol, addCoords=True) | |
| sdf_writer = Chem.SDWriter(self.ligand_path) | |
| sdf_writer.write(ligand_rdmol) | |
| sdf_writer.close() | |
| self.ligand_rdmol = ligand_rdmol | |
| pos = ligand_rdmol.GetConformer(0).GetPositions() | |
| if center is None: | |
| self.center = (pos.max(0) + pos.min(0)) / 2 | |
| else: | |
| self.center = center | |
| if size_factor is None: | |
| self.size_x, self.size_y, self.size_z = 20, 20, 20 | |
| else: | |
| self.size_x, self.size_y, self.size_z = (pos.max(0) - pos.min(0)) * size_factor + buffer | |
| self.proc = None | |
| self.results = None | |
| self.output = None | |
| self.error_output = None | |
| self.docked_sdf_path = None | |
| def run(self, mode='dock', exhaustiveness=8, **kwargs): | |
| ligand_pdbqt = self.ligand_path[:-4] + '.pdbqt' | |
| protein_pqr = self.receptor_path[:-4] + '.pqr' | |
| protein_pdbqt = self.receptor_path[:-4] + '.pdbqt' | |
| lig = PrepLig(self.ligand_path, 'sdf') | |
| lig.get_pdbqt(ligand_pdbqt) | |
| prot = PrepProt(self.receptor_path) | |
| if not os.path.exists(protein_pqr): | |
| prot.addH(protein_pqr) | |
| if not os.path.exists(protein_pdbqt): | |
| prot.get_pdbqt(protein_pdbqt) | |
| dock = VinaDock(ligand_pdbqt, protein_pdbqt) | |
| dock.pocket_center, dock.box_size = self.center, [self.size_x, self.size_y, self.size_z] | |
| score, pose = dock.dock(score_func='vina', mode=mode, exhaustiveness=exhaustiveness, save_pose=True, **kwargs) | |
| return [{'affinity': score, 'pose': pose}] | |
| # if __name__ == '__main__': | |
| # lig_pdbqt = 'data/lig.pdbqt' | |
| # mol_file = 'data/1a4k_ligand.sdf' | |
| # a = PrepLig(mol_file, 'sdf') | |
| # # mol_file = 'CC(=C)C(=O)OCCN(C)C' | |
| # # a = PrepLig(mol_file, 'smi') | |
| # a.addH() | |
| # a.gen_conf() | |
| # a.get_pdbqt(lig_pdbqt) | |
| # | |
| # prot_file = 'data/1a4k_protein_chainAB.pdb' | |
| # prot_dry = 'data/protein_dry.pdb' | |
| # prot_pqr = 'data/protein.pqr' | |
| # prot_pdbqt = 'data/protein.pdbqt' | |
| # b = PrepProt(prot_file) | |
| # b.del_water(prot_dry) | |
| # b.addH(prot_pqr) | |
| # b.get_pdbqt(prot_pdbqt) | |
| # | |
| # dock = VinaDock(lig_pdbqt, prot_pdbqt) | |
| # dock.get_box() | |
| # dock.dock() | |