AmberPrep / amberprep /docking_utils.py
hemantn's picture
Rename project from AmberFlow to AmberPrep
fa25632
#!/usr/bin/env python3
"""
Docking Utilities for AmberPrep
This module contains all the Python functions needed for the docking workflow:
1. Compute ligand center
2. Prepare receptor (tleap + pdb4amber + meeko)
3. Prepare ligand (obabel + meeko)
4. Run Vina docking
5. Split docked poses (vina_split)
6. Convert poses to PDB (obabel)
7. Sanitize docked poses for use in MD workflow
Usage:
from docking_utils import (
compute_ligand_center,
prepare_receptor,
prepare_ligand,
run_vina_docking,
split_docked_poses,
convert_pdbqt_to_pdb,
sanitize_docked_pose
)
"""
import subprocess
from pathlib import Path
import logging
logger = logging.getLogger(__name__)
def compute_ligand_center(pdb_path: str) -> tuple:
"""
Compute the geometric center of all atoms in a ligand PDB file.
Args:
pdb_path: Path to the ligand PDB file
Returns:
Tuple of (x, y, z) center coordinates
"""
try:
import MDAnalysis as mda
import numpy as np
except ImportError as e:
raise RuntimeError(
"MDAnalysis and NumPy are required. Install with: "
"conda install -c conda-forge mdanalysis numpy"
) from e
pdb_path = Path(pdb_path)
if not pdb_path.exists():
raise FileNotFoundError(f"Ligand file not found: {pdb_path}")
u = mda.Universe(str(pdb_path))
if u.atoms.n_atoms == 0:
raise ValueError(f"No atoms found in ligand file {pdb_path}")
coords = u.atoms.positions.astype(float)
center = coords.mean(axis=0)
logger.info(f"Ligand center for {pdb_path.name}: ({center[0]:.3f}, {center[1]:.3f}, {center[2]:.3f})")
return float(center[0]), float(center[1]), float(center[2])
def prepare_receptor(protein_pdb: str, output_dir: str) -> tuple:
"""
Prepare receptor for docking:
1. Run tleap to add hydrogens
2. Run pdb4amber to fix element names
3. Run mk_prepare_receptor.py to create PDBQT
Args:
protein_pdb: Path to protein PDB file (typically 1_protein_no_hydrogens.pdb)
output_dir: Directory to store output files
Returns:
Tuple of (receptor_fixed_pdb_path, receptor_pdbqt_path)
"""
protein_pdb = Path(protein_pdb).resolve()
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
if not protein_pdb.exists():
raise FileNotFoundError(f"Protein PDB not found: {protein_pdb}")
# Step 1: tleap - add hydrogens
tleap_in = output_dir / "prepare_receptor.in"
receptor_pdb = output_dir / "receptor.pdb"
if not receptor_pdb.exists():
logger.info("Step 1: Running tleap to add hydrogens to protein...")
with open(tleap_in, "w") as f:
f.write("source leaprc.protein.ff14SB\n")
f.write(f"protein = loadpdb {protein_pdb}\n")
f.write("savepdb protein receptor.pdb\n")
f.write("quit\n")
result = subprocess.run(
["tleap", "-f", tleap_in.name],
cwd=output_dir,
capture_output=True,
text=True,
)
if result.returncode != 0 or not receptor_pdb.exists():
raise RuntimeError(
f"tleap failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
logger.info(f" Created: {receptor_pdb}")
# Step 2: pdb4amber - fix element names
receptor_fixed = output_dir / "receptor_fixed.pdb"
if not receptor_fixed.exists():
logger.info("Step 2: Running pdb4amber to add element names...")
result = subprocess.run(
["pdb4amber", "-i", str(receptor_pdb), "-o", str(receptor_fixed)],
capture_output=True,
text=True,
)
if result.returncode != 0 or not receptor_fixed.exists():
raise RuntimeError(
f"pdb4amber failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
logger.info(f" Created: {receptor_fixed}")
# Step 3: Meeko receptor preparation
receptor_pdbqt = output_dir / "receptor.pdbqt"
if not receptor_pdbqt.exists():
logger.info("Step 3: Running mk_prepare_receptor.py to create PDBQT...")
result = subprocess.run(
["mk_prepare_receptor.py", "-i", str(receptor_fixed), "-o", "receptor", "-p"],
cwd=output_dir,
capture_output=True,
text=True,
)
if result.returncode != 0 or not receptor_pdbqt.exists():
raise RuntimeError(
f"mk_prepare_receptor.py failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
logger.info(f" Created: {receptor_pdbqt}")
return str(receptor_fixed), str(receptor_pdbqt)
def prepare_ligand(ligand_pdb: str, output_dir: str, ligand_index: int = 1) -> str:
"""
Prepare ligand for docking:
1. Convert PDB to SDF using obabel
2. Convert SDF to PDBQT using mk_prepare_ligand.py
Args:
ligand_pdb: Path to ligand PDB file
output_dir: Directory to store output files
ligand_index: Index number for naming output files
Returns:
Path to ligand PDBQT file
"""
ligand_pdb = Path(ligand_pdb)
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
if not ligand_pdb.exists():
raise FileNotFoundError(f"Ligand PDB not found: {ligand_pdb}")
# Step 1: obabel PDB -> SDF
sdf_path = output_dir / f"ligand_{ligand_index}.sdf"
logger.info(f"Step 1: Converting ligand {ligand_index} PDB to SDF...")
result = subprocess.run(
["obabel", "-i", "pdb", str(ligand_pdb), "-o", "sdf", "-O", str(sdf_path)],
capture_output=True,
text=True,
)
if result.returncode != 0 or not sdf_path.exists():
raise RuntimeError(
f"obabel failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
logger.info(f" Created: {sdf_path}")
# Step 2: Meeko ligand preparation -> PDBQT
pdbqt_path = output_dir / f"ligand_{ligand_index}.pdbqt"
logger.info(f"Step 2: Converting ligand {ligand_index} SDF to PDBQT...")
result = subprocess.run(
["mk_prepare_ligand.py", "-i", str(sdf_path), "-o", str(pdbqt_path)],
capture_output=True,
text=True,
)
if result.returncode != 0 or not pdbqt_path.exists():
raise RuntimeError(
f"mk_prepare_ligand.py failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
logger.info(f" Created: {pdbqt_path}")
return str(pdbqt_path)
def run_vina_docking(
receptor_pdbqt: str,
ligand_pdbqt: str,
center_x: float,
center_y: float,
center_z: float,
size_x: float = 18.0,
size_y: float = 18.0,
size_z: float = 18.0,
output_dir: str = None,
ligand_index: int = 1,
exhaustiveness: int = 8,
num_modes: int = 9,
) -> tuple:
"""
Run AutoDock Vina docking.
Args:
receptor_pdbqt: Path to receptor PDBQT file
ligand_pdbqt: Path to ligand PDBQT file
center_x, center_y, center_z: Box center coordinates (Angstroms)
size_x, size_y, size_z: Box dimensions (Angstroms)
output_dir: Directory for output files (default: same as ligand)
ligand_index: Index for naming output files
exhaustiveness: Search exhaustiveness (default: 8)
num_modes: Maximum number of binding modes (default: 9)
Returns:
Tuple of (docked_pdbqt_path, log_file_path)
"""
ligand_pdbqt = Path(ligand_pdbqt)
output_dir = Path(output_dir) if output_dir else ligand_pdbqt.parent
docked_pdbqt = output_dir / f"ligand_{ligand_index}_docked.pdbqt"
log_file = output_dir / f"ligand_{ligand_index}_docked.log"
logger.info(f"Running Vina docking for ligand {ligand_index}...")
logger.info(f" Center: ({center_x:.3f}, {center_y:.3f}, {center_z:.3f})")
logger.info(f" Size: ({size_x:.1f}, {size_y:.1f}, {size_z:.1f})")
cmd = [
"vina",
"--receptor", str(receptor_pdbqt),
"--ligand", str(ligand_pdbqt),
"--center_x", str(center_x),
"--center_y", str(center_y),
"--center_z", str(center_z),
"--size_x", str(size_x),
"--size_y", str(size_y),
"--size_z", str(size_z),
"--out", str(docked_pdbqt),
"--log", str(log_file),
"--exhaustiveness", str(exhaustiveness),
"--num_modes", str(num_modes),
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0 or not docked_pdbqt.exists():
raise RuntimeError(
f"Vina docking failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
logger.info(f" Created: {docked_pdbqt}")
logger.info(f" Log: {log_file}")
return str(docked_pdbqt), str(log_file)
def parse_vina_log(log_path: str) -> list:
"""
Parse Vina log file to extract binding energies for each mode.
Args:
log_path: Path to Vina log file
Returns:
List of dicts with 'mode', 'affinity', 'rmsd_lb', 'rmsd_ub' for each pose
"""
log_path = Path(log_path)
if not log_path.exists():
return []
energies = []
in_results = False
with open(log_path, "r") as f:
for line in f:
line = line.strip()
if "-----+------------+----------+----------" in line:
in_results = True
continue
if in_results and line and line[0].isdigit():
parts = line.split()
if len(parts) >= 4:
try:
energies.append({
'mode': int(parts[0]),
'affinity': float(parts[1]),
'rmsd_lb': float(parts[2]),
'rmsd_ub': float(parts[3]),
})
except (ValueError, IndexError):
continue
elif in_results and not line:
break
return energies
def split_docked_poses(docked_pdbqt: str, output_prefix: str = None) -> list:
"""
Split docked PDBQT into individual pose files using vina_split.
Args:
docked_pdbqt: Path to docked PDBQT file with multiple poses
output_prefix: Prefix for output files (default: derived from input)
Returns:
List of paths to individual pose PDBQT files
"""
docked_pdbqt = Path(docked_pdbqt)
if not docked_pdbqt.exists():
raise FileNotFoundError(f"Docked PDBQT not found: {docked_pdbqt}")
output_dir = docked_pdbqt.parent
if output_prefix is None:
output_prefix = docked_pdbqt.stem.replace("_docked", "_mode")
logger.info(f"Splitting docked poses from {docked_pdbqt.name}...")
result = subprocess.run(
["vina_split", "--input", str(docked_pdbqt), "--ligand", output_prefix],
cwd=output_dir,
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError(
f"vina_split failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
# Find all generated mode files
pose_files = sorted(output_dir.glob(f"{output_prefix}*.pdbqt"))
logger.info(f" Split into {len(pose_files)} pose files")
return [str(f) for f in pose_files]
def convert_pdbqt_to_pdb(pdbqt_path: str, ph: float = 7.4) -> str:
"""
Convert PDBQT file to PDB using obabel.
Args:
pdbqt_path: Path to PDBQT file
ph: pH for protonation (default: 7.4)
Returns:
Path to output PDB file
"""
pdbqt_path = Path(pdbqt_path)
if not pdbqt_path.exists():
raise FileNotFoundError(f"PDBQT file not found: {pdbqt_path}")
pdb_path = pdbqt_path.with_suffix(".pdb")
logger.info(f"Converting {pdbqt_path.name} to PDB...")
result = subprocess.run(
["obabel", str(pdbqt_path), "-O", str(pdb_path), "-p", str(ph)],
capture_output=True,
text=True,
)
if result.returncode != 0 or not pdb_path.exists():
raise RuntimeError(
f"obabel conversion failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}"
)
logger.info(f" Created: {pdb_path}")
return str(pdb_path)
def sanitize_docked_pose(original_ligand: str, pose_pdb: str) -> str:
"""
Sanitize a docked pose PDB to match the original ligand format:
- Restore residue name, chain ID, and residue number from original
- Convert ATOM to HETATM
- Rename atoms to match original format (C1, N1, etc.)
- Remove CONECT/MASTER records
Args:
original_ligand: Path to original ligand PDB file
pose_pdb: Path to docked pose PDB file
Returns:
Path to sanitized pose PDB (same as pose_pdb, modified in place)
"""
original_ligand = Path(original_ligand)
pose_pdb = Path(pose_pdb)
if not original_ligand.exists():
raise FileNotFoundError(f"Original ligand not found: {original_ligand}")
if not pose_pdb.exists():
raise FileNotFoundError(f"Pose PDB not found: {pose_pdb}")
# Extract residue info from original ligand
resname = "LIG"
chain = "X"
resnum = 1
with open(original_ligand, "r") as f:
for line in f:
if line.startswith(("ATOM", "HETATM")):
resname = line[17:20].strip() or "LIG"
chain = line[21] if len(line) > 21 and line[21].strip() else "X"
try:
resnum = int(line[22:26].strip())
except ValueError:
resnum = 1
break
logger.info(f"Sanitizing pose with resname={resname}, chain={chain}, resnum={resnum}")
# Process pose PDB
new_lines = []
atom_counter = 0
element_counts = {}
with open(pose_pdb, "r") as f:
for line in f:
if line.startswith(("CONECT", "MASTER")):
continue
if line.startswith(("ATOM", "HETATM")):
atom_counter += 1
# Extract element from line or atom name
element = line[76:78].strip() if len(line) > 77 else ""
if not element:
# Try to get from atom name
atom_name = line[12:16].strip()
element = ''.join(c for c in atom_name if c.isalpha())[:2]
if len(element) > 1:
element = element[0].upper() + element[1].lower()
if not element:
element = "C" # Default fallback
# Generate new atom name (C1, C2, N1, etc.)
element_counts[element] = element_counts.get(element, 0) + 1
new_atom_name = f"{element}{element_counts[element]}"
new_atom_name = f"{new_atom_name:<4}" # Left-justified, 4 chars
# Build new line as HETATM
new_line = (
f"HETATM{atom_counter:5d} {new_atom_name}"
f"{resname:>3s} {chain}{resnum:4d} "
f"{line[30:54]}" # Coordinates
f"{line[54:66] if len(line) > 54 else ' 1.00 0.00'}" # Occupancy, B-factor
f" {element:>2s}\n"
)
new_lines.append(new_line)
elif line.startswith("END"):
new_lines.append("END\n")
# Write sanitized file
with open(pose_pdb, "w") as f:
f.writelines(new_lines)
logger.info(f" Sanitized: {pose_pdb}")
return str(pose_pdb)
def run_full_docking_workflow(
protein_pdb: str,
ligand_pdbs: list,
output_dir: str,
box_configs: dict = None,
) -> dict:
"""
Run the complete docking workflow for multiple ligands.
Args:
protein_pdb: Path to protein PDB file (1_protein_no_hydrogens.pdb)
ligand_pdbs: List of paths to ligand PDB files
output_dir: Base output directory for docking results
box_configs: Optional dict of {ligand_index: {'center': (x,y,z), 'size': (sx,sy,sz)}}
Returns:
Dict with results for each ligand including poses and energies
"""
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
box_configs = box_configs or {}
results = {
'success': True,
'ligands': [],
'warnings': [],
'errors': [],
}
# Step 1: Prepare receptor (only once for all ligands)
logger.info("=" * 60)
logger.info("STEP 1: Preparing receptor for docking")
logger.info("=" * 60)
try:
receptor_fixed, receptor_pdbqt = prepare_receptor(protein_pdb, str(output_dir))
except Exception as e:
results['success'] = False
results['errors'].append(f"Receptor preparation failed: {str(e)}")
return results
# Step 2: Process each ligand
for idx, ligand_pdb in enumerate(ligand_pdbs, start=1):
ligand_pdb = Path(ligand_pdb)
logger.info("")
logger.info("=" * 60)
logger.info(f"STEP 2.{idx}: Processing ligand {idx}: {ligand_pdb.name}")
logger.info("=" * 60)
lig_dir = output_dir / f"ligand_{idx}"
lig_dir.mkdir(parents=True, exist_ok=True)
ligand_result = {
'index': idx,
'original_file': str(ligand_pdb),
'poses': [],
'energies': [],
'success': True,
}
try:
# Copy original ligand for reference
original_copy = lig_dir / "original_ligand.pdb"
if not original_copy.exists():
original_copy.write_text(ligand_pdb.read_text())
# Prepare ligand PDBQT
ligand_pdbqt = prepare_ligand(str(ligand_pdb), str(lig_dir), idx)
# Get box configuration
cfg = box_configs.get(idx, {})
center = cfg.get('center')
size = cfg.get('size', (18.0, 18.0, 18.0))
if center is None:
# Compute center from ligand
cx, cy, cz = compute_ligand_center(str(ligand_pdb))
else:
cx, cy, cz = center
sx, sy, sz = size
# Run Vina docking
docked_pdbqt, log_file = run_vina_docking(
receptor_pdbqt, ligand_pdbqt,
cx, cy, cz, sx, sy, sz,
str(lig_dir), idx
)
# Parse binding energies
energies = parse_vina_log(log_file)
ligand_result['energies'] = energies
# Split poses
pose_pdbqts = split_docked_poses(docked_pdbqt)
# Convert each pose to PDB and sanitize
for pose_pdbqt in pose_pdbqts:
pose_pdb = convert_pdbqt_to_pdb(pose_pdbqt)
sanitize_docked_pose(str(original_copy), pose_pdb)
ligand_result['poses'].append(pose_pdb)
except Exception as e:
ligand_result['success'] = False
ligand_result['error'] = str(e)
results['errors'].append(f"Ligand {idx}: {str(e)}")
logger.error(f"Error processing ligand {idx}: {e}")
results['ligands'].append(ligand_result)
# Check overall success
results['success'] = all(lig['success'] for lig in results['ligands'])
logger.info("")
logger.info("=" * 60)
logger.info("DOCKING WORKFLOW COMPLETE")
logger.info("=" * 60)
return results
# Example usage / CLI interface
if __name__ == "__main__":
import argparse
logging.basicConfig(level=logging.INFO, format='%(message)s')
parser = argparse.ArgumentParser(description="Run AutoDock Vina docking workflow")
parser.add_argument("--protein", required=True, help="Path to protein PDB file")
parser.add_argument("--ligands", nargs="+", required=True, help="Paths to ligand PDB files")
parser.add_argument("--output", required=True, help="Output directory")
parser.add_argument("--center", nargs=3, type=float, help="Box center (x y z)")
parser.add_argument("--size", nargs=3, type=float, default=[18, 18, 18], help="Box size (x y z)")
args = parser.parse_args()
box_configs = {}
if args.center:
for i in range(1, len(args.ligands) + 1):
box_configs[i] = {
'center': tuple(args.center),
'size': tuple(args.size),
}
results = run_full_docking_workflow(
args.protein,
args.ligands,
args.output,
box_configs
)
print("\n" + "=" * 60)
print("RESULTS SUMMARY")
print("=" * 60)
print(f"Overall success: {results['success']}")
for lig in results['ligands']:
print(f"\nLigand {lig['index']}:")
print(f" Success: {lig['success']}")
if lig['success']:
print(f" Poses generated: {len(lig['poses'])}")
if lig['energies']:
print(f" Best binding energy: {lig['energies'][0]['affinity']} kcal/mol")
else:
print(f" Error: {lig.get('error', 'Unknown')}")