AmberFlow / python /structure_preparation.py
hemantn's picture
Deploy AmberFlow to Hugging Face Spaces
cc7c981
#!/usr/bin/env python3
"""
AMBER Structure Preparation Script using MDAnalysis
Complete pipeline: extract protein, add caps, handle ligands
"""
import os
import subprocess
import sys
import shutil
def run_command(cmd, description=""):
"""Run a command and return success status"""
try:
print(f"Running: {description}")
print(f"Command: {cmd}")
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=120)
print(f"Return code: {result.returncode}")
if result.stdout:
print(f"STDOUT: {result.stdout}")
if result.stderr:
print(f"STDERR: {result.stderr}")
if result.returncode != 0:
print(f"Error: {result.stderr}")
return False
return True
except subprocess.TimeoutExpired:
print(f"Timeout: {description}")
return False
except Exception as e:
print(f"Error running {description}: {str(e)}")
return False
def extract_protein_only(pdb_content, output_file, selected_chains=None):
"""Extract protein without hydrogens using MDAnalysis. Optionally restrict to selected chains."""
# Write input content to output file first
with open(output_file, 'w') as f:
f.write(pdb_content)
try:
# Run MDAnalysis command with the output file as input
chain_sel = ''
if selected_chains:
chain_filters = ' or '.join([f'chain {c}' for c in selected_chains])
chain_sel = f' and ({chain_filters})'
selection = f"protein{chain_sel} and not name H* 1H* 2H* 3H*"
cmd = f'python -c "import MDAnalysis as mda; u=mda.Universe(\'{output_file}\'); u.select_atoms(\'{selection}\').write(\'{output_file}\')"'
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
if result.returncode != 0:
raise Exception(f"MDAnalysis error: {result.stderr}")
return True
except Exception as e:
print(f"Error in extract_protein_only: {e}")
return False
def add_capping_groups(input_file, output_file):
"""Add ACE and NME capping groups using add_caps.py"""
# First add caps
temp_capped = output_file.replace('.pdb', '_temp.pdb')
cmd = f"python add_caps.py -i {input_file} -o {temp_capped}"
if not run_command(cmd, f"Adding capping groups to {input_file}"):
return False
# Then add TER cards using awk
cmd = f"awk '/NME/{{nme=NR}} /ACE/ && nme && NR > nme {{print \"TER\"; nme=0}} {{print}}' {temp_capped} > {output_file}"
if not run_command(cmd, f"Adding TER cards to {temp_capped}"):
return False
# Clean up temp file
if os.path.exists(temp_capped):
os.remove(temp_capped)
return True
def extract_selected_chains(pdb_content, output_file, selected_chains):
"""Extract selected chains using PyMOL commands"""
try:
# Write input content to temp file
temp_input = output_file.replace('.pdb', '_temp_input.pdb')
with open(temp_input, 'w') as f:
f.write(pdb_content)
# Build chain selection string
chain_filters = ' or '.join([f'chain {c}' for c in selected_chains])
selection = f"({chain_filters}) and polymer.protein"
# Use PyMOL to extract chains
cmd = f'''python -c "
import pymol
pymol.finish_launching(['pymol', '-c'])
pymol.cmd.load('{temp_input}')
pymol.cmd.save('{output_file}', '{selection}')
pymol.cmd.quit()
"'''
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
# Clean up temp file
if os.path.exists(temp_input):
os.remove(temp_input)
if result.returncode != 0:
print(f"PyMOL chain extraction error: {result.stderr}")
return False
return True
except Exception as e:
print(f"Error extracting selected chains: {e}")
return False
def extract_selected_ligands(pdb_content, output_file, selected_ligands):
"""Extract selected ligands using PyMOL commands"""
try:
# Write input content to temp file
temp_input = output_file.replace('.pdb', '_temp_input.pdb')
with open(temp_input, 'w') as f:
f.write(pdb_content)
# Build ligand selection string
parts = []
for lig in selected_ligands:
resn = lig.get('resn', '').strip()
chain = lig.get('chain', '').strip()
if resn and chain:
parts.append(f"(resn {resn} and chain {chain})")
elif resn:
parts.append(f"resn {resn}")
if not parts:
# No ligands to extract
with open(output_file, 'w') as f:
f.write('\n')
return True
selection = ' or '.join(parts)
# Use PyMOL to extract ligands
cmd = f'''python -c "
import pymol
pymol.finish_launching(['pymol', '-c'])
pymol.cmd.load('{temp_input}')
pymol.cmd.save('{output_file}', '{selection}')
pymol.cmd.quit()
"'''
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
# Clean up temp file
if os.path.exists(temp_input):
os.remove(temp_input)
if result.returncode != 0:
print(f"PyMOL ligand extraction error: {result.stderr}")
return False
return True
except Exception as e:
print(f"Error extracting selected ligands: {e}")
return False
def extract_ligands(pdb_content, output_file, ligand_residue_name=None, selected_ligands=None):
"""Extract ligands using MDAnalysis. Optionally restrict to selected ligands (list of dicts with resn, chain, resi)."""
# Write input content to output file first
with open(output_file, 'w') as f:
f.write(pdb_content)
try:
# Run MDAnalysis command with the output file as input
if selected_ligands:
# Build selection from provided ligand list (RESN-CHAIN groups)
parts = []
for lig in selected_ligands:
resn = lig.get('resn', '').strip()
chain = lig.get('chain', '').strip()
if resn and chain:
parts.append(f"(resname {resn} and segid {chain})")
elif resn:
parts.append(f"resname {resn}")
if parts:
selection = ' or '.join(parts)
cmd = f'''python -c "
import MDAnalysis as mda
u = mda.Universe('{output_file}')
u.select_atoms('{selection}').write('{output_file}')
"'''
else:
cmd = f"python -c \"open('{output_file}','w').write('\\n')\""
elif ligand_residue_name:
# Use specified ligand residue name - extract from both ATOM and HETATM records
cmd = f'''python -c "
import MDAnalysis as mda
u = mda.Universe('{output_file}')
# Extract specific ligand residue from both ATOM and HETATM records
u.select_atoms('resname {ligand_residue_name}').write('{output_file}')
"'''
else:
# Auto-detect ligand residues
cmd = f'''python -c "
import MDAnalysis as mda
u = mda.Universe('{output_file}')
# Get all unique residue names from HETATM records
hetatm_residues = set()
for atom in u.atoms:
if atom.record_type == 'HETATM':
hetatm_residues.add(atom.resname)
# Remove water and ions
ligand_residues = hetatm_residues - {{'HOH', 'WAT', 'TIP3', 'TIP4', 'SPC', 'SPCE', 'NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4', 'SO4'}}
if ligand_residues:
resname_sel = ' or '.join([f'resname {{res}}' for res in ligand_residues])
u.select_atoms(resname_sel).write('{output_file}')
else:
# No ligands found, create empty file
with open('{output_file}', 'w') as f:
f.write('\\n')
"'''
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
if result.returncode != 0:
raise Exception(f"MDAnalysis error: {result.stderr}")
# If specific ligand residue name was provided, convert ATOM to HETATM
if ligand_residue_name:
convert_atom_to_hetatm(output_file)
return True
except Exception as e:
print(f"Error in extract_ligands: {e}")
return False
def convert_atom_to_hetatm(pdb_file):
"""Convert ATOM records to HETATM in PDB file"""
try:
with open(pdb_file, 'r') as f:
lines = f.readlines()
# Convert ATOM to HETATM
converted_lines = []
for line in lines:
if line.startswith('ATOM'):
# Replace ATOM with HETATM
converted_line = 'HETATM' + line[6:]
converted_lines.append(converted_line)
else:
converted_lines.append(line)
# Write back to file
with open(pdb_file, 'w') as f:
f.writelines(converted_lines)
print(f"Converted ATOM records to HETATM in {pdb_file}")
return True
except Exception as e:
print(f"Error converting ATOM to HETATM: {e}")
return False
def correct_ligand_with_pymol(ligand_file, corrected_file):
"""Correct ligand using PyMOL"""
ligand_path = os.path.abspath(ligand_file)
corrected_path = os.path.abspath(corrected_file)
if not os.path.isfile(ligand_path) or os.path.getsize(ligand_path) == 0:
print("Ligand file missing or empty:", ligand_path)
return False
# Use PyMOL to add hydrogens and save corrected ligand
cmd = f'pymol -cq {ligand_path} -d "h_add; save {corrected_path}; quit"'
return run_command(cmd, f"Correcting ligand with PyMOL")
def remove_connect_records(pdb_file):
"""Remove CONNECT records from PDB file"""
try:
with open(pdb_file, 'r') as f:
lines = f.readlines()
# Filter out CONNECT records
filtered_lines = [line for line in lines if not line.startswith('CONECT')]
with open(pdb_file, 'w') as f:
f.writelines(filtered_lines)
print(f"Removed CONNECT records from {pdb_file}")
return True
except Exception as e:
print(f"Error removing CONNECT records: {e}")
return False
def merge_protein_and_ligand(protein_file, ligand_file, output_file):
"""Merge capped protein and corrected ligand with proper PDB formatting"""
try:
# Read protein file
with open(protein_file, 'r') as f:
protein_lines = f.readlines()
# Read ligand file
with open(ligand_file, 'r') as f:
ligand_lines = f.readlines()
# Process protein file: remove 'END' and add properly formatted 'TER'
protein_processed = []
last_atom_line = None
for line in protein_lines:
if line.strip() == 'END':
# Create properly formatted TER card using the last atom's info
if last_atom_line and last_atom_line.startswith('ATOM'):
# Extract atom number and residue info from last atom
atom_num = last_atom_line[6:11].strip()
res_name = last_atom_line[17:20].strip()
chain_id = last_atom_line[21:22].strip()
res_num = last_atom_line[22:26].strip()
ter_line = f"TER {atom_num:>5} {res_name} {chain_id}{res_num}\n"
protein_processed.append(ter_line)
else:
protein_processed.append('TER\n')
else:
protein_processed.append(line)
if line.startswith('ATOM'):
last_atom_line = line
# Process ligand file: remove header info (CRYST, REMARK, etc.) and keep only ATOM/HETATM
ligand_processed = []
for line in ligand_lines:
if line.startswith(('ATOM', 'HETATM')):
ligand_processed.append(line)
# Combine: protein + TER + ligand + END (no extra newline between TER and ligand)
merged_content = ''.join(protein_processed) + ''.join(ligand_processed) + 'END\n'
with open(output_file, 'w') as f:
f.write(merged_content)
return True
except Exception as e:
print(f"Error merging files: {str(e)}")
return False
def prepare_structure(pdb_content, options, output_dir="output"):
"""Main function to prepare structure for AMBER simulation"""
try:
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
# Define all file paths in output directory
input_file = os.path.join(output_dir, "0_original_input.pdb")
user_chain_file = os.path.join(output_dir, "0_user_chain_selected.pdb")
protein_file = os.path.join(output_dir, "1_protein_no_hydrogens.pdb")
protein_capped_file = os.path.join(output_dir, "2_protein_with_caps.pdb")
ligand_file = os.path.join(output_dir, "3_ligands_extracted.pdb")
ligand_corrected_file = os.path.join(output_dir, "4_ligands_corrected.pdb")
tleap_ready_file = os.path.join(output_dir, "tleap_ready.pdb")
# Step 0: Save original input for reference
print("Step 0: Saving original input...")
with open(input_file, 'w') as f:
f.write(pdb_content)
# Step 0.5: Extract user-selected chains and ligands
selected_chains = options.get('selected_chains', [])
selected_ligands = options.get('selected_ligands', [])
if selected_chains:
print(f"Step 0.5a: Extracting selected chains: {', '.join(selected_chains)}")
if not extract_selected_chains(pdb_content, user_chain_file, selected_chains):
raise Exception("Failed to extract selected chains")
else:
print("Step 0.5a: No chains selected, using original structure")
shutil.copy2(input_file, user_chain_file)
if selected_ligands:
ligand_names = [f"{l.get('resn', '')}-{l.get('chain', '')}" for l in selected_ligands]
print(f"Step 0.5b: Extracting selected ligands: {ligand_names}")
if not extract_selected_ligands(pdb_content, ligand_file, selected_ligands):
raise Exception("Failed to extract selected ligands")
else:
print("Step 0.5b: No ligands selected, creating empty ligand file")
with open(ligand_file, 'w') as f:
f.write('\n')
# Step 1: Extract protein only (remove hydrogens) from user-selected chains
print("Step 1: Extracting protein without hydrogens from selected chains...")
# Read the user-selected chain file
with open(user_chain_file, 'r') as f:
chain_content = f.read()
if not extract_protein_only(chain_content, protein_file):
raise Exception("Failed to extract protein")
# Step 2: Add capping groups (only if add_ace or add_nme is True)
add_ace = options.get('add_ace', True)
add_nme = options.get('add_nme', True)
if add_ace or add_nme:
print("Step 2: Adding ACE and NME capping groups...")
if not add_capping_groups(protein_file, protein_capped_file):
raise Exception("Failed to add capping groups")
else:
print("Step 2: Skipping capping groups (add_ace=False, add_nme=False)")
print("Using protein without capping - copying to capped file")
# Copy protein file to capped file (no capping)
shutil.copy2(protein_file, protein_capped_file)
# Step 3: Handle ligands (use pre-extracted ligand file)
preserve_ligands = options.get('preserve_ligands', True)
ligand_present = False
if preserve_ligands:
print("Step 3: Processing pre-extracted ligands...")
# Check if ligand file has content (not just empty or newline)
with open(ligand_file, 'r') as f:
ligand_content = f.read().strip()
if ligand_content and len(ligand_content) > 1:
ligand_present = True
print("Found pre-extracted ligands")
# Correct ligand with PyMOL
if not correct_ligand_with_pymol(ligand_file, ligand_corrected_file):
print("Error: Failed to process ligand")
return {
'error': 'Failed to process ligand with PyMOL',
'prepared_structure': '',
'original_atoms': 0,
'prepared_atoms': 0,
'removed_components': {},
'added_capping': {},
'preserved_ligands': 0,
'ligand_present': False
}
# Merge protein and ligand
if not merge_protein_and_ligand(protein_capped_file, ligand_corrected_file, tleap_ready_file):
raise Exception("Failed to merge protein and ligand")
else:
print("No ligands found in pre-extracted file, using protein only")
# Copy protein file to tleap_ready
shutil.copy2(protein_capped_file, tleap_ready_file)
else:
print("Step 3: Skipping ligand processing (preserve_ligands=False)")
print("Using protein only - copying capped protein to tleap_ready")
# Copy protein file to tleap_ready (protein only, no ligands)
shutil.copy2(protein_capped_file, tleap_ready_file)
# Remove CONNECT records from tleap_ready.pdb (PyMOL adds them)
print("Removing CONNECT records from tleap_ready.pdb...")
remove_connect_records(tleap_ready_file)
# Read the final prepared structure
with open(tleap_ready_file, 'r') as f:
prepared_content = f.read()
# Calculate statistics
original_atoms = len([line for line in pdb_content.split('\n') if line.startswith('ATOM')])
prepared_atoms = len([line for line in prepared_content.split('\n') if line.startswith('ATOM')])
# Calculate removed components
water_count = len([line for line in pdb_content.split('\n') if line.startswith('HETATM') and line[17:20].strip() in ['HOH', 'WAT', 'TIP3', 'TIP4', 'TIP5', 'SPC', 'SPCE']])
ion_count = len([line for line in pdb_content.split('\n') if line.startswith('HETATM') and line[17:20].strip() in ['NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4']])
hydrogen_count = len([line for line in pdb_content.split('\n') if line.startswith('ATOM') and line[76:78].strip() == 'H'])
# If not preserving ligands, count them as removed
ligand_count = 0
if not preserve_ligands and ligand_present:
# Count ligands from the pre-extracted file
with open(ligand_file, 'r') as f:
ligand_lines = [line for line in f if line.startswith('HETATM')]
ligand_count = len(set(line[17:20].strip() for line in ligand_lines))
removed_components = {
'water': water_count,
'ions': ion_count,
'hydrogens': hydrogen_count,
'ligands': ligand_count
}
# Calculate added capping groups (only if capping was performed)
if add_ace or add_nme:
# Count unique ACE and NME residues, not individual atoms
ace_residues = set()
nme_residues = set()
for line in prepared_content.split('\n'):
if line.startswith('ATOM') and 'ACE' in line:
# Extract residue number to count unique ACE groups
res_num = line[22:26].strip()
ace_residues.add(res_num)
elif line.startswith('ATOM') and 'NME' in line:
# Extract residue number to count unique NME groups
res_num = line[22:26].strip()
nme_residues.add(res_num)
added_capping = {
'ace_groups': len(ace_residues),
'nme_groups': len(nme_residues)
}
else:
added_capping = {
'ace_groups': 0,
'nme_groups': 0
}
# Count preserved ligands from the pre-extracted file
preserved_ligands = 0
if ligand_present and preserve_ligands:
with open(ligand_file, 'r') as f:
ligand_lines = [line for line in f if line.startswith('HETATM')]
preserved_ligands = len(set(line[17:20].strip() for line in ligand_lines))
result = {
'prepared_structure': prepared_content,
'original_atoms': original_atoms,
'prepared_atoms': prepared_atoms,
'removed_components': removed_components,
'added_capping': added_capping,
'preserved_ligands': preserved_ligands,
'ligand_present': ligand_present,
'separate_ligands': options.get('separate_ligands', False)
}
# If separate ligands is enabled and ligands are present, include ligand content
if ligand_present and options.get('separate_ligands', False):
with open(ligand_corrected_file, 'r') as f:
result['ligand_content'] = f.read()
return result
except Exception as e:
return {
'error': str(e),
'prepared_structure': '',
'original_atoms': 0,
'prepared_atoms': 0,
'removed_components': {},
'added_capping': {},
'preserved_ligands': 0,
'ligand_present': False
}
def parse_structure_info(pdb_content):
"""Parse structure information for display"""
lines = pdb_content.split('\n')
atom_count = 0
chains = set()
residues = set()
water_molecules = 0
ions = 0
ligands = set()
hetatoms = 0
# Common water molecule names
water_names = {'HOH', 'WAT', 'TIP3', 'TIP4', 'SPC', 'SPCE'}
# Common ion names
ion_names = {'NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4','SO4'}
# Common ligand indicators
ligand_indicators = {'ATP', 'ADP', 'AMP', 'GDP', 'GTP', 'NAD', 'FAD', 'HEM', 'HEME', 'COA', 'SAM', 'PLP', 'THF', 'FMN', 'FAD', 'NADP', 'UDP', 'CDP', 'TDP', 'GDP', 'ADP', 'ATP'}
for line in lines:
if line.startswith('ATOM'):
atom_count += 1
chain_id = line[21:22].strip()
if chain_id:
chains.add(chain_id)
res_name = line[17:20].strip()
res_num = line[22:26].strip()
residues.add(f"{res_name}{res_num}")
elif line.startswith('HETATM'):
hetatoms += 1
res_name = line[17:20].strip()
if res_name in water_names:
water_molecules += 1
elif res_name in ion_names:
ions += 1
elif res_name in ligand_indicators:
ligands.add(res_name)
# Count unique water molecules
unique_water_residues = set()
for line in lines:
if line.startswith('HETATM'):
res_name = line[17:20].strip()
res_num = line[22:26].strip()
if res_name in water_names:
unique_water_residues.add(f"{res_name}{res_num}")
return {
'atom_count': atom_count,
'chains': list(chains),
'residue_count': len(residues),
'water_molecules': len(unique_water_residues),
'ions': ions,
'ligands': list(ligands),
'hetatoms': hetatoms
}
def test_structure_preparation():
"""Test function to verify structure preparation works correctly"""
# Create a simple test PDB content
test_pdb = """HEADER TEST PROTEIN
ATOM 1 N MET A 1 16.347 37.019 21.335 1.00 50.73 N
ATOM 2 CA MET A 1 15.737 37.120 20.027 1.00 45.30 C
ATOM 3 C MET A 1 15.955 35.698 19.546 1.00 41.78 C
ATOM 4 O MET A 1 16.847 35.123 20.123 1.00 40.15 O
ATOM 5 CB MET A 1 14.234 37.456 19.789 1.00 44.12 C
ATOM 6 CG MET A 1 13.456 36.123 19.234 1.00 43.45 C
ATOM 7 SD MET A 1 12.123 35.456 18.123 1.00 42.78 S
ATOM 8 CE MET A 1 11.456 34.123 17.456 1.00 42.11 C
ATOM 9 N ALA A 2 15.123 35.456 18.789 1.00 40.44 N
ATOM 10 CA ALA A 2 14.456 34.123 18.123 1.00 39.77 C
ATOM 11 C ALA A 2 13.123 33.456 17.456 1.00 39.10 C
ATOM 12 O ALA A 2 12.456 32.123 16.789 1.00 38.43 O
ATOM 13 CB ALA A 2 13.789 33.123 17.123 1.00 38.76 C
ATOM 14 N ALA A 3 12.789 32.456 16.123 1.00 38.09 N
ATOM 15 CA ALA A 3 11.456 31.789 15.456 1.00 37.42 C
ATOM 16 C ALA A 3 10.123 30.456 14.789 1.00 36.75 C
ATOM 17 O ALA A 3 9.456 29.123 14.123 1.00 36.08 O
ATOM 18 CB ALA A 3 9.789 29.456 13.456 1.00 35.41 C
ATOM 19 OXT ALA A 3 8.123 28.789 13.456 1.00 35.74 O
HETATM 20 O HOH A 4 20.000 20.000 20.000 1.00 20.00 O
HETATM 21 H1 HOH A 4 20.500 20.500 20.500 1.00 20.00 H
HETATM 22 H2 HOH A 4 19.500 19.500 19.500 1.00 20.00 H
HETATM 23 NA NA A 5 25.000 25.000 25.000 1.00 25.00 NA
HETATM 24 CL CL A 6 30.000 30.000 30.000 1.00 30.00 CL
HETATM 1 PG GTP A 180 29.710 30.132 -5.989 1.00 52.48 A P
HETATM 2 O1G GTP A 180 29.197 28.937 -5.265 1.00 43.51 A O
HETATM 3 O2G GTP A 180 30.881 29.816 -6.827 1.00 63.11 A O
HETATM 4 O3G GTP A 180 30.013 31.278 -5.117 1.00 29.97 A O
HETATM 5 O3B GTP A 180 28.517 30.631 -6.995 1.00 23.23 A O
HETATM 6 PB GTP A 180 27.017 31.171 -6.766 1.00 29.58 A P
HETATM 7 O1B GTP A 180 26.072 30.050 -6.958 1.00 17.62 A O
HETATM 8 O2B GTP A 180 26.960 31.913 -5.483 1.00 38.76 A O
HETATM 9 O3A GTP A 180 26.807 32.212 -7.961 1.00 13.12 A O
HETATM 10 PA GTP A 180 26.277 33.726 -8.045 1.00 25.06 A P
HETATM 11 O1A GTP A 180 25.089 33.867 -7.187 1.00 44.06 A O
HETATM 12 O2A GTP A 180 27.427 34.635 -7.843 1.00 23.47 A O
HETATM 13 O5' GTP A 180 25.804 33.834 -9.555 1.00 42.05 A O
HETATM 14 C5' GTP A 180 26.615 33.475 -10.679 1.00 19.97 A C
HETATM 15 C4' GTP A 180 26.219 34.288 -11.894 1.00 14.90 A C
HETATM 16 O4' GTP A 180 24.826 34.017 -12.143 1.00 19.00 A O
HETATM 17 C3' GTP A 180 26.372 35.802 -11.724 1.00 4.96 A C
HETATM 18 O3' GTP A 180 26.880 36.347 -12.936 1.00 44.49 A O
HETATM 19 C2' GTP A 180 24.932 36.243 -11.481 1.00 17.12 A C
HETATM 20 O2' GTP A 180 24.719 37.581 -11.901 1.00 32.45 A O
HETATM 21 C1' GTP A 180 24.069 35.240 -12.240 1.00 16.17 A C
HETATM 22 N9 GTP A 180 22.724 35.005 -11.630 1.00 28.10 A N
HETATM 23 C8 GTP A 180 22.443 34.655 -10.325 1.00 27.05 A C
HETATM 24 N7 GTP A 180 21.168 34.483 -10.079 1.00 33.25 A N
HETATM 25 C5 GTP A 180 20.554 34.737 -11.307 1.00 26.23 A C
HETATM 26 C6 GTP A 180 19.183 34.712 -11.659 1.00 29.31 A C
HETATM 27 O6 GTP A 180 18.205 34.448 -10.957 1.00 40.80 A O
HETATM 28 N1 GTP A 180 19.000 35.036 -13.013 1.00 26.85 A N
HETATM 29 C2 GTP A 180 20.022 35.339 -13.903 1.00 28.70 A C
HETATM 30 N2 GTP A 180 19.627 35.619 -15.147 1.00 44.24 A N
HETATM 31 N3 GTP A 180 21.301 35.367 -13.569 1.00 21.67 A N
HETATM 32 C4 GTP A 180 21.489 35.054 -12.257 1.00 41.91 A C
END
"""
options = {
'remove_water': True,
'remove_ions': True,
'remove_hydrogens': True,
'add_ace': True,
'add_nme': True,
'preserve_ligands': True,
'separate_ligands': False,
'fix_missing_atoms': False,
'standardize_residues': False
}
print("Testing structure preparation...")
result = prepare_structure(test_pdb, options, "output")
print("\n=== STATISTICS ===")
print(f"Original atoms: {result['original_atoms']}")
print(f"Prepared atoms: {result['prepared_atoms']}")
print(f"Removed: {result['removed_components']}")
print(f"Added: {result['added_capping']}")
print(f"Ligands: {result['preserved_ligands']}")
print(f"Ligand present: {result['ligand_present']}")
print(f"\nTest completed! Check 'output' folder for results:")
print("- 1_protein_no_hydrogens.pdb (protein without hydrogens)")
print("- 2_protein_with_caps.pdb (protein with ACE/NME caps)")
print("- 3_ligands_extracted.pdb (extracted ligands, if any)")
print("- 4_ligands_corrected.pdb (corrected ligands, if any)")
print("- tleap_ready.pdb (final structure ready for tleap)")
if __name__ == "__main__":
test_structure_preparation()