Spaces:

hemantn
/

AmberMDFlow

Running

AmberMDFlow / ambermdflow /docking_utils.py

naagrh

AmberMDFlow for HF Space (no binaries)

b8a800e about 2 months ago

21.6 kB

	#!/usr/bin/env python3
	"""
	Docking Utilities for AmberMDFlow

	This module contains all the Python functions needed for the docking workflow:
	1. Compute ligand center
	2. Prepare receptor (tleap + pdb4amber + meeko)
	3. Prepare ligand (obabel + meeko)
	4. Run Vina docking
	5. Split docked poses (vina_split)
	6. Convert poses to PDB (obabel)
	7. Sanitize docked poses for use in MD workflow

	Usage:
	from docking_utils import (
	compute_ligand_center,
	prepare_receptor,
	prepare_ligand,
	run_vina_docking,
	split_docked_poses,
	convert_pdbqt_to_pdb,
	sanitize_docked_pose
	)
	"""

	import subprocess
	from pathlib import Path
	import logging

	logger = logging.getLogger(__name__)


	def compute_ligand_center(pdb_path: str) -> tuple:
	"""
	Compute the geometric center of all atoms in a ligand PDB file.

	Args:
	pdb_path: Path to the ligand PDB file

	Returns:
	Tuple of (x, y, z) center coordinates
	"""
	try:
	import MDAnalysis as mda
	import numpy as np
	except ImportError as e:
	raise RuntimeError(
	"MDAnalysis and NumPy are required. Install with: "
	"conda install -c conda-forge mdanalysis numpy"
	) from e

	pdb_path = Path(pdb_path)
	if not pdb_path.exists():
	raise FileNotFoundError(f"Ligand file not found: {pdb_path}")

	u = mda.Universe(str(pdb_path))
	if u.atoms.n_atoms == 0:
	raise ValueError(f"No atoms found in ligand file {pdb_path}")

	coords = u.atoms.positions.astype(float)
	center = coords.mean(axis=0)

	logger.info(f"Ligand center for {pdb_path.name}: ({center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f})")
	return float(center[0]), float(center[1]), float(center[2])


	def prepare_receptor(protein_pdb: str, output_dir: str) -> tuple:
	"""
	Prepare receptor for docking:
	1. Run tleap to add hydrogens
	2. Run pdb4amber to fix element names
	3. Run mk_prepare_receptor.py to create PDBQT

	Args:
	protein_pdb: Path to protein PDB file (typically 1_protein_no_hydrogens.pdb)
	output_dir: Directory to store output files

	Returns:
	Tuple of (receptor_fixed_pdb_path, receptor_pdbqt_path)
	"""
	protein_pdb = Path(protein_pdb).resolve()
	output_dir = Path(output_dir)
	output_dir.mkdir(parents=True, exist_ok=True)

	if not protein_pdb.exists():
	raise FileNotFoundError(f"Protein PDB not found: {protein_pdb}")

	# Step 1: tleap - add hydrogens
	tleap_in = output_dir / "prepare_receptor.in"
	receptor_pdb = output_dir / "receptor.pdb"

	if not receptor_pdb.exists():
	logger.info("Step 1: Running tleap to add hydrogens to protein...")
	with open(tleap_in, "w") as f:
	f.write("source leaprc.protein.ff14SB\n")
	f.write(f"protein = loadpdb {protein_pdb}\n")
	f.write("savepdb protein receptor.pdb\n")
	f.write("quit\n")

	result = subprocess.run(
	["tleap", "-f", tleap_in.name],
	cwd=output_dir,
	capture_output=True,
	text=True,
	)
	if result.returncode != 0 or not receptor_pdb.exists():
	raise RuntimeError(
	f"tleap failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
	)
	logger.info(f" Created: {receptor_pdb}")

	# Step 2: pdb4amber - fix element names
	receptor_fixed = output_dir / "receptor_fixed.pdb"

	if not receptor_fixed.exists():
	logger.info("Step 2: Running pdb4amber to add element names...")
	result = subprocess.run(
	["pdb4amber", "-i", str(receptor_pdb), "-o", str(receptor_fixed)],
	capture_output=True,
	text=True,
	)
	if result.returncode != 0 or not receptor_fixed.exists():
	raise RuntimeError(
	f"pdb4amber failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
	)
	logger.info(f" Created: {receptor_fixed}")

	# Step 3: Meeko receptor preparation
	receptor_pdbqt = output_dir / "receptor.pdbqt"

	if not receptor_pdbqt.exists():
	logger.info("Step 3: Running mk_prepare_receptor.py to create PDBQT...")
	result = subprocess.run(
	["mk_prepare_receptor.py", "-i", str(receptor_fixed), "-o", "receptor", "-p"],
	cwd=output_dir,
	capture_output=True,
	text=True,
	)
	if result.returncode != 0 or not receptor_pdbqt.exists():
	raise RuntimeError(
	f"mk_prepare_receptor.py failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
	)
	logger.info(f" Created: {receptor_pdbqt}")

	return str(receptor_fixed), str(receptor_pdbqt)


	def prepare_ligand(ligand_pdb: str, output_dir: str, ligand_index: int = 1) -> str:
	"""
	Prepare ligand for docking:
	1. Convert PDB to SDF using obabel
	2. Convert SDF to PDBQT using mk_prepare_ligand.py

	Args:
	ligand_pdb: Path to ligand PDB file
	output_dir: Directory to store output files
	ligand_index: Index number for naming output files

	Returns:
	Path to ligand PDBQT file
	"""
	ligand_pdb = Path(ligand_pdb)
	output_dir = Path(output_dir)
	output_dir.mkdir(parents=True, exist_ok=True)

	if not ligand_pdb.exists():
	raise FileNotFoundError(f"Ligand PDB not found: {ligand_pdb}")

	# Step 1: obabel PDB -> SDF
	sdf_path = output_dir / f"ligand_{ligand_index}.sdf"

	logger.info(f"Step 1: Converting ligand {ligand_index} PDB to SDF...")
	result = subprocess.run(
	["obabel", "-i", "pdb", str(ligand_pdb), "-o", "sdf", "-O", str(sdf_path)],
	capture_output=True,
	text=True,
	)
	if result.returncode != 0 or not sdf_path.exists():
	raise RuntimeError(
	f"obabel failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
	)
	logger.info(f" Created: {sdf_path}")

	# Step 2: Meeko ligand preparation -> PDBQT
	pdbqt_path = output_dir / f"ligand_{ligand_index}.pdbqt"

	logger.info(f"Step 2: Converting ligand {ligand_index} SDF to PDBQT...")
	result = subprocess.run(
	["mk_prepare_ligand.py", "-i", str(sdf_path), "-o", str(pdbqt_path)],
	capture_output=True,
	text=True,
	)
	if result.returncode != 0 or not pdbqt_path.exists():
	raise RuntimeError(
	f"mk_prepare_ligand.py failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
	)
	logger.info(f" Created: {pdbqt_path}")

	return str(pdbqt_path)


	def run_vina_docking(
	receptor_pdbqt: str,
	ligand_pdbqt: str,
	center_x: float,
	center_y: float,
	center_z: float,
	size_x: float = 18.0,
	size_y: float = 18.0,
	size_z: float = 18.0,
	output_dir: str = None,
	ligand_index: int = 1,
	exhaustiveness: int = 8,
	num_modes: int = 9,
	) -> tuple:
	"""
	Run AutoDock Vina docking.

	Args:
	receptor_pdbqt: Path to receptor PDBQT file
	ligand_pdbqt: Path to ligand PDBQT file
	center_x, center_y, center_z: Box center coordinates (Angstroms)
	size_x, size_y, size_z: Box dimensions (Angstroms)
	output_dir: Directory for output files (default: same as ligand)
	ligand_index: Index for naming output files
	exhaustiveness: Search exhaustiveness (default: 8)
	num_modes: Maximum number of binding modes (default: 9)

	Returns:
	Tuple of (docked_pdbqt_path, log_file_path)
	"""
	ligand_pdbqt = Path(ligand_pdbqt)
	output_dir = Path(output_dir) if output_dir else ligand_pdbqt.parent

	docked_pdbqt = output_dir / f"ligand_{ligand_index}_docked.pdbqt"
	log_file = output_dir / f"ligand_{ligand_index}_docked.log"

	logger.info(f"Running Vina docking for ligand {ligand_index}...")
	logger.info(f" Center: ({center_x:.3f}, {center_y:.3f}, {center_z:.3f})")
	logger.info(f" Size: ({size_x:.1f}, {size_y:.1f}, {size_z:.1f})")

	cmd = [
	"vina",
	"--receptor", str(receptor_pdbqt),
	"--ligand", str(ligand_pdbqt),
	"--center_x", str(center_x),
	"--center_y", str(center_y),
	"--center_z", str(center_z),
	"--size_x", str(size_x),
	"--size_y", str(size_y),
	"--size_z", str(size_z),
	"--out", str(docked_pdbqt),
	"--log", str(log_file),
	"--exhaustiveness", str(exhaustiveness),
	"--num_modes", str(num_modes),
	]

	result = subprocess.run(cmd, capture_output=True, text=True)

	if result.returncode != 0 or not docked_pdbqt.exists():
	raise RuntimeError(
	f"Vina docking failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
	)

	logger.info(f" Created: {docked_pdbqt}")
	logger.info(f" Log: {log_file}")

	return str(docked_pdbqt), str(log_file)


	def parse_vina_log(log_path: str) -> list:
	"""
	Parse Vina log file to extract binding energies for each mode.

	Args:
	log_path: Path to Vina log file

	Returns:
	List of dicts with 'mode', 'affinity', 'rmsd_lb', 'rmsd_ub' for each pose
	"""
	log_path = Path(log_path)
	if not log_path.exists():
	return []

	energies = []
	in_results = False

	with open(log_path, "r") as f:
	for line in f:
	line = line.strip()
	if "-----+------------+----------+----------" in line:
	in_results = True
	continue
	if in_results and line and line[0].isdigit():
	parts = line.split()
	if len(parts) >= 4:
	try:
	energies.append({
	'mode': int(parts[0]),
	'affinity': float(parts[1]),
	'rmsd_lb': float(parts[2]),
	'rmsd_ub': float(parts[3]),
	})
	except (ValueError, IndexError):
	continue
	elif in_results and not line:
	break

	return energies


	def split_docked_poses(docked_pdbqt: str, output_prefix: str = None) -> list:
	"""
	Split docked PDBQT into individual pose files using vina_split.

	Args:
	docked_pdbqt: Path to docked PDBQT file with multiple poses
	output_prefix: Prefix for output files (default: derived from input)

	Returns:
	List of paths to individual pose PDBQT files
	"""
	docked_pdbqt = Path(docked_pdbqt)
	if not docked_pdbqt.exists():
	raise FileNotFoundError(f"Docked PDBQT not found: {docked_pdbqt}")

	output_dir = docked_pdbqt.parent
	if output_prefix is None:
	output_prefix = docked_pdbqt.stem.replace("_docked", "_mode")

	logger.info(f"Splitting docked poses from {docked_pdbqt.name}...")

	result = subprocess.run(
	["vina_split", "--input", str(docked_pdbqt), "--ligand", output_prefix],
	cwd=output_dir,
	capture_output=True,
	text=True,
	)

	if result.returncode != 0:
	raise RuntimeError(
	f"vina_split failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
	)

	# Find all generated mode files
	pose_files = sorted(output_dir.glob(f"{output_prefix}*.pdbqt"))
	logger.info(f" Split into {len(pose_files)} pose files")

	return [str(f) for f in pose_files]


	def convert_pdbqt_to_pdb(pdbqt_path: str, ph: float = 7.4) -> str:
	"""
	Convert PDBQT file to PDB using obabel.

	Args:
	pdbqt_path: Path to PDBQT file
	ph: pH for protonation (default: 7.4)

	Returns:
	Path to output PDB file
	"""
	pdbqt_path = Path(pdbqt_path)
	if not pdbqt_path.exists():
	raise FileNotFoundError(f"PDBQT file not found: {pdbqt_path}")

	pdb_path = pdbqt_path.with_suffix(".pdb")

	logger.info(f"Converting {pdbqt_path.name} to PDB...")

	result = subprocess.run(
	["obabel", str(pdbqt_path), "-O", str(pdb_path), "-p", str(ph)],
	capture_output=True,
	text=True,
	)

	if result.returncode != 0 or not pdb_path.exists():
	raise RuntimeError(
	f"obabel conversion failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
	)

	logger.info(f" Created: {pdb_path}")
	return str(pdb_path)


	def sanitize_docked_pose(original_ligand: str, pose_pdb: str) -> str:
	"""
	Sanitize a docked pose PDB to match the original ligand format:
	- Restore residue name, chain ID, and residue number from original
	- Convert ATOM to HETATM
	- Rename atoms to match original format (C1, N1, etc.)
	- Remove CONECT/MASTER records

	Args:
	original_ligand: Path to original ligand PDB file
	pose_pdb: Path to docked pose PDB file

	Returns:
	Path to sanitized pose PDB (same as pose_pdb, modified in place)
	"""
	original_ligand = Path(original_ligand)
	pose_pdb = Path(pose_pdb)

	if not original_ligand.exists():
	raise FileNotFoundError(f"Original ligand not found: {original_ligand}")
	if not pose_pdb.exists():
	raise FileNotFoundError(f"Pose PDB not found: {pose_pdb}")

	# Extract residue info from original ligand
	resname = "LIG"
	chain = "X"
	resnum = 1

	with open(original_ligand, "r") as f:
	for line in f:
	if line.startswith(("ATOM", "HETATM")):
	resname = line[17:20].strip() or "LIG"
	chain = line[21] if len(line) > 21 and line[21].strip() else "X"
	try:
	resnum = int(line[22:26].strip())
	except ValueError:
	resnum = 1
	break

	logger.info(f"Sanitizing pose with resname={resname}, chain={chain}, resnum={resnum}")

	# Process pose PDB
	new_lines = []
	atom_counter = 0
	element_counts = {}

	with open(pose_pdb, "r") as f:
	for line in f:
	if line.startswith(("CONECT", "MASTER")):
	continue
	if line.startswith(("ATOM", "HETATM")):
	atom_counter += 1

	# Extract element from line or atom name
	element = line[76:78].strip() if len(line) > 77 else ""
	if not element:
	# Try to get from atom name
	atom_name = line[12:16].strip()
	element = ''.join(c for c in atom_name if c.isalpha())[:2]
	if len(element) > 1:
	element = element[0].upper() + element[1].lower()

	if not element:
	element = "C" # Default fallback

	# Generate new atom name (C1, C2, N1, etc.)
	element_counts[element] = element_counts.get(element, 0) + 1
	new_atom_name = f"{element}{element_counts[element]}"
	new_atom_name = f"{new_atom_name:<4}" # Left-justified, 4 chars

	# Build new line as HETATM
	new_line = (
	f"HETATM{atom_counter:5d} {new_atom_name}"
	f"{resname:>3s} {chain}{resnum:4d} "
	f"{line[30:54]}" # Coordinates
	f"{line[54:66] if len(line) > 54 else ' 1.00 0.00'}" # Occupancy, B-factor
	f" {element:>2s}\n"
	)
	new_lines.append(new_line)
	elif line.startswith("END"):
	new_lines.append("END\n")

	# Write sanitized file
	with open(pose_pdb, "w") as f:
	f.writelines(new_lines)

	logger.info(f" Sanitized: {pose_pdb}")
	return str(pose_pdb)


	def run_full_docking_workflow(
	protein_pdb: str,
	ligand_pdbs: list,
	output_dir: str,
	box_configs: dict = None,
	) -> dict:
	"""
	Run the complete docking workflow for multiple ligands.

	Args:
	protein_pdb: Path to protein PDB file (1_protein_no_hydrogens.pdb)
	ligand_pdbs: List of paths to ligand PDB files
	output_dir: Base output directory for docking results
	box_configs: Optional dict of {ligand_index: {'center': (x,y,z), 'size': (sx,sy,sz)}}

	Returns:
	Dict with results for each ligand including poses and energies
	"""
	output_dir = Path(output_dir)
	output_dir.mkdir(parents=True, exist_ok=True)
	box_configs = box_configs or {}

	results = {
	'success': True,
	'ligands': [],
	'warnings': [],
	'errors': [],
	}

	# Step 1: Prepare receptor (only once for all ligands)
	logger.info("=" * 60)
	logger.info("STEP 1: Preparing receptor for docking")
	logger.info("=" * 60)

	try:
	receptor_fixed, receptor_pdbqt = prepare_receptor(protein_pdb, str(output_dir))
	except Exception as e:
	results['success'] = False
	results['errors'].append(f"Receptor preparation failed: {str(e)}")
	return results

	# Step 2: Process each ligand
	for idx, ligand_pdb in enumerate(ligand_pdbs, start=1):
	ligand_pdb = Path(ligand_pdb)
	logger.info("")
	logger.info("=" * 60)
	logger.info(f"STEP 2.{idx}: Processing ligand {idx}: {ligand_pdb.name}")
	logger.info("=" * 60)

	lig_dir = output_dir / f"ligand_{idx}"
	lig_dir.mkdir(parents=True, exist_ok=True)

	ligand_result = {
	'index': idx,
	'original_file': str(ligand_pdb),
	'poses': [],
	'energies': [],
	'success': True,
	}

	try:
	# Copy original ligand for reference
	original_copy = lig_dir / "original_ligand.pdb"
	if not original_copy.exists():
	original_copy.write_text(ligand_pdb.read_text())

	# Prepare ligand PDBQT
	ligand_pdbqt = prepare_ligand(str(ligand_pdb), str(lig_dir), idx)

	# Get box configuration
	cfg = box_configs.get(idx, {})
	center = cfg.get('center')
	size = cfg.get('size', (18.0, 18.0, 18.0))

	if center is None:
	# Compute center from ligand
	cx, cy, cz = compute_ligand_center(str(ligand_pdb))
	else:
	cx, cy, cz = center

	sx, sy, sz = size

	# Run Vina docking
	docked_pdbqt, log_file = run_vina_docking(
	receptor_pdbqt, ligand_pdbqt,
	cx, cy, cz, sx, sy, sz,
	str(lig_dir), idx
	)

	# Parse binding energies
	energies = parse_vina_log(log_file)
	ligand_result['energies'] = energies

	# Split poses
	pose_pdbqts = split_docked_poses(docked_pdbqt)

	# Convert each pose to PDB and sanitize
	for pose_pdbqt in pose_pdbqts:
	pose_pdb = convert_pdbqt_to_pdb(pose_pdbqt)
	sanitize_docked_pose(str(original_copy), pose_pdb)
	ligand_result['poses'].append(pose_pdb)

	except Exception as e:
	ligand_result['success'] = False
	ligand_result['error'] = str(e)
	results['errors'].append(f"Ligand {idx}: {str(e)}")
	logger.error(f"Error processing ligand {idx}: {e}")

	results['ligands'].append(ligand_result)

	# Check overall success
	results['success'] = all(lig['success'] for lig in results['ligands'])

	logger.info("")
	logger.info("=" * 60)
	logger.info("DOCKING WORKFLOW COMPLETE")
	logger.info("=" * 60)

	return results


	# Example usage / CLI interface
	if __name__ == "__main__":
	import argparse

	logging.basicConfig(level=logging.INFO, format='%(message)s')

	parser = argparse.ArgumentParser(description="Run AutoDock Vina docking workflow")
	parser.add_argument("--protein", required=True, help="Path to protein PDB file")
	parser.add_argument("--ligands", nargs="+", required=True, help="Paths to ligand PDB files")
	parser.add_argument("--output", required=True, help="Output directory")
	parser.add_argument("--center", nargs=3, type=float, help="Box center (x y z)")
	parser.add_argument("--size", nargs=3, type=float, default=[18, 18, 18], help="Box size (x y z)")

	args = parser.parse_args()

	box_configs = {}
	if args.center:
	for i in range(1, len(args.ligands) + 1):
	box_configs[i] = {
	'center': tuple(args.center),
	'size': tuple(args.size),
	}

	results = run_full_docking_workflow(
	args.protein,
	args.ligands,
	args.output,
	box_configs
	)

	print("\n" + "=" * 60)
	print("RESULTS SUMMARY")
	print("=" * 60)
	print(f"Overall success: {results['success']}")
	for lig in results['ligands']:
	print(f"\nLigand {lig['index']}:")
	print(f" Success: {lig['success']}")
	if lig['success']:
	print(f" Poses generated: {len(lig['poses'])}")
	if lig['energies']:
	print(f" Best binding energy: {lig['energies'][0]['affinity']} kcal/mol")
	else:
	print(f" Error: {lig.get('error', 'Unknown')}")