Spaces:

hemantn
/

AmberFlow

Sleeping

File size: 30,799 Bytes

cc7c981

#!/usr/bin/env python3
"""
AMBER Structure Preparation Script using MDAnalysis
Complete pipeline: extract protein, add caps, handle ligands
"""

import os
import subprocess
import sys
import shutil

def run_command(cmd, description=""):
    """Run a command and return success status"""
    try:
        print(f"Running: {description}")
        print(f"Command: {cmd}")
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=120)
        print(f"Return code: {result.returncode}")
        if result.stdout:
            print(f"STDOUT: {result.stdout}")
        if result.stderr:
            print(f"STDERR: {result.stderr}")
        if result.returncode != 0:
            print(f"Error: {result.stderr}")
            return False
        return True
    except subprocess.TimeoutExpired:
        print(f"Timeout: {description}")
        return False
    except Exception as e:
        print(f"Error running {description}: {str(e)}")
        return False

def extract_protein_only(pdb_content, output_file, selected_chains=None):
    """Extract protein without hydrogens using MDAnalysis. Optionally restrict to selected chains."""
    # Write input content to output file first
    with open(output_file, 'w') as f:
        f.write(pdb_content)
    
    try:
        # Run MDAnalysis command with the output file as input
        chain_sel = ''
        if selected_chains:
            chain_filters = ' or '.join([f'chain {c}' for c in selected_chains])
            chain_sel = f' and ({chain_filters})'
        selection = f"protein{chain_sel} and not name H* 1H* 2H* 3H*"
        cmd = f'python -c "import MDAnalysis as mda; u=mda.Universe(\'{output_file}\'); u.select_atoms(\'{selection}\').write(\'{output_file}\')"'
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
        
        if result.returncode != 0:
            raise Exception(f"MDAnalysis error: {result.stderr}")
        
        return True
    except Exception as e:
        print(f"Error in extract_protein_only: {e}")
        return False

def add_capping_groups(input_file, output_file):
    """Add ACE and NME capping groups using add_caps.py"""
    # First add caps
    temp_capped = output_file.replace('.pdb', '_temp.pdb')
    cmd = f"python add_caps.py -i {input_file} -o {temp_capped}"
    if not run_command(cmd, f"Adding capping groups to {input_file}"):
        return False
    
    # Then add TER cards using awk
    cmd = f"awk '/NME/{{nme=NR}} /ACE/ && nme && NR > nme {{print \"TER\"; nme=0}} {{print}}' {temp_capped} > {output_file}"
    if not run_command(cmd, f"Adding TER cards to {temp_capped}"):
        return False
    
    # Clean up temp file
    if os.path.exists(temp_capped):
        os.remove(temp_capped)
    
    return True

def extract_selected_chains(pdb_content, output_file, selected_chains):
    """Extract selected chains using PyMOL commands"""
    try:
        # Write input content to temp file
        temp_input = output_file.replace('.pdb', '_temp_input.pdb')
        with open(temp_input, 'w') as f:
            f.write(pdb_content)
        
        # Build chain selection string
        chain_filters = ' or '.join([f'chain {c}' for c in selected_chains])
        selection = f"({chain_filters}) and polymer.protein"
        
        # Use PyMOL to extract chains
        cmd = f'''python -c "
import pymol
pymol.finish_launching(['pymol', '-c'])
pymol.cmd.load('{temp_input}')
pymol.cmd.save('{output_file}', '{selection}')
pymol.cmd.quit()
"'''
        
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
        
        # Clean up temp file
        if os.path.exists(temp_input):
            os.remove(temp_input)
        
        if result.returncode != 0:
            print(f"PyMOL chain extraction error: {result.stderr}")
            return False
        
        return True
    except Exception as e:
        print(f"Error extracting selected chains: {e}")
        return False

def extract_selected_ligands(pdb_content, output_file, selected_ligands):
    """Extract selected ligands using PyMOL commands"""
    try:
        # Write input content to temp file
        temp_input = output_file.replace('.pdb', '_temp_input.pdb')
        with open(temp_input, 'w') as f:
            f.write(pdb_content)
        
        # Build ligand selection string
        parts = []
        for lig in selected_ligands:
            resn = lig.get('resn', '').strip()
            chain = lig.get('chain', '').strip()
            if resn and chain:
                parts.append(f"(resn {resn} and chain {chain})")
            elif resn:
                parts.append(f"resn {resn}")
        
        if not parts:
            # No ligands to extract
            with open(output_file, 'w') as f:
                f.write('\n')
            return True
        
        selection = ' or '.join(parts)
        
        # Use PyMOL to extract ligands
        cmd = f'''python -c "
import pymol
pymol.finish_launching(['pymol', '-c'])
pymol.cmd.load('{temp_input}')
pymol.cmd.save('{output_file}', '{selection}')
pymol.cmd.quit()
"'''
        
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
        
        # Clean up temp file
        if os.path.exists(temp_input):
            os.remove(temp_input)
        
        if result.returncode != 0:
            print(f"PyMOL ligand extraction error: {result.stderr}")
            return False
        
        return True
    except Exception as e:
        print(f"Error extracting selected ligands: {e}")
        return False

def extract_ligands(pdb_content, output_file, ligand_residue_name=None, selected_ligands=None):
    """Extract ligands using MDAnalysis. Optionally restrict to selected ligands (list of dicts with resn, chain, resi)."""
    # Write input content to output file first
    with open(output_file, 'w') as f:
        f.write(pdb_content)
    
    try:
        # Run MDAnalysis command with the output file as input
        if selected_ligands:
            # Build selection from provided ligand list (RESN-CHAIN groups)
            parts = []
            for lig in selected_ligands:
                resn = lig.get('resn', '').strip()
                chain = lig.get('chain', '').strip()
                if resn and chain:
                    parts.append(f"(resname {resn} and segid {chain})")
                elif resn:
                    parts.append(f"resname {resn}")
            if parts:
                selection = ' or '.join(parts)
                cmd = f'''python -c "
import MDAnalysis as mda
u = mda.Universe('{output_file}')
u.select_atoms('{selection}').write('{output_file}')
"'''
            else:
                cmd = f"python -c \"open('{output_file}','w').write('\\n')\""
        elif ligand_residue_name:
            # Use specified ligand residue name - extract from both ATOM and HETATM records
            cmd = f'''python -c "
import MDAnalysis as mda
u = mda.Universe('{output_file}')
# Extract specific ligand residue from both ATOM and HETATM records
u.select_atoms('resname {ligand_residue_name}').write('{output_file}')
"'''
        else:
            # Auto-detect ligand residues
            cmd = f'''python -c "
import MDAnalysis as mda
u = mda.Universe('{output_file}')
# Get all unique residue names from HETATM records
hetatm_residues = set()
for atom in u.atoms:
    if atom.record_type == 'HETATM':
        hetatm_residues.add(atom.resname)
# Remove water and ions
ligand_residues = hetatm_residues - {{'HOH', 'WAT', 'TIP3', 'TIP4', 'SPC', 'SPCE', 'NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4', 'SO4'}}
if ligand_residues:
    resname_sel = ' or '.join([f'resname {{res}}' for res in ligand_residues])
    u.select_atoms(resname_sel).write('{output_file}')
else:
    # No ligands found, create empty file
    with open('{output_file}', 'w') as f:
        f.write('\\n')
"'''
        result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60)
        
        if result.returncode != 0:
            raise Exception(f"MDAnalysis error: {result.stderr}")
        
        # If specific ligand residue name was provided, convert ATOM to HETATM
        if ligand_residue_name:
            convert_atom_to_hetatm(output_file)
        
        return True
    except Exception as e:
        print(f"Error in extract_ligands: {e}")
        return False

def convert_atom_to_hetatm(pdb_file):
    """Convert ATOM records to HETATM in PDB file"""
    try:
        with open(pdb_file, 'r') as f:
            lines = f.readlines()
        
        # Convert ATOM to HETATM
        converted_lines = []
        for line in lines:
            if line.startswith('ATOM'):
                # Replace ATOM with HETATM
                converted_line = 'HETATM' + line[6:]
                converted_lines.append(converted_line)
            else:
                converted_lines.append(line)
        
        # Write back to file
        with open(pdb_file, 'w') as f:
            f.writelines(converted_lines)
        
        print(f"Converted ATOM records to HETATM in {pdb_file}")
        return True
    except Exception as e:
        print(f"Error converting ATOM to HETATM: {e}")
        return False

def correct_ligand_with_pymol(ligand_file, corrected_file):
    """Correct ligand using PyMOL"""
    ligand_path = os.path.abspath(ligand_file)
    corrected_path = os.path.abspath(corrected_file)
    if not os.path.isfile(ligand_path) or os.path.getsize(ligand_path) == 0:
        print("Ligand file missing or empty:", ligand_path)
        return False

    # Use PyMOL to add hydrogens and save corrected ligand
    cmd = f'pymol -cq {ligand_path} -d "h_add; save {corrected_path}; quit"'
    return run_command(cmd, f"Correcting ligand with PyMOL")

def remove_connect_records(pdb_file):
    """Remove CONNECT records from PDB file"""
    try:
        with open(pdb_file, 'r') as f:
            lines = f.readlines()
        
        # Filter out CONNECT records
        filtered_lines = [line for line in lines if not line.startswith('CONECT')]
        
        with open(pdb_file, 'w') as f:
            f.writelines(filtered_lines)
        
        print(f"Removed CONNECT records from {pdb_file}")
        return True
    except Exception as e:
        print(f"Error removing CONNECT records: {e}")
        return False

def merge_protein_and_ligand(protein_file, ligand_file, output_file):
    """Merge capped protein and corrected ligand with proper PDB formatting"""
    try:
        # Read protein file
        with open(protein_file, 'r') as f:
            protein_lines = f.readlines()
        
        # Read ligand file
        with open(ligand_file, 'r') as f:
            ligand_lines = f.readlines()
        
        # Process protein file: remove 'END' and add properly formatted 'TER'
        protein_processed = []
        last_atom_line = None
        for line in protein_lines:
            if line.strip() == 'END':
                # Create properly formatted TER card using the last atom's info
                if last_atom_line and last_atom_line.startswith('ATOM'):
                    # Extract atom number and residue info from last atom
                    atom_num = last_atom_line[6:11].strip()
                    res_name = last_atom_line[17:20].strip()
                    chain_id = last_atom_line[21:22].strip()
                    res_num = last_atom_line[22:26].strip()
                    ter_line = f"TER    {atom_num:>5}      {res_name} {chain_id}{res_num}\n"
                    protein_processed.append(ter_line)
                else:
                    protein_processed.append('TER\n')
            else:
                protein_processed.append(line)
                if line.startswith('ATOM'):
                    last_atom_line = line
        
        # Process ligand file: remove header info (CRYST, REMARK, etc.) and keep only ATOM/HETATM
        ligand_processed = []
        for line in ligand_lines:
            if line.startswith(('ATOM', 'HETATM')):
                ligand_processed.append(line)
        
        # Combine: protein + TER + ligand + END (no extra newline between TER and ligand)
        merged_content = ''.join(protein_processed) + ''.join(ligand_processed) + 'END\n'
        
        with open(output_file, 'w') as f:
            f.write(merged_content)
        
        return True
    except Exception as e:
        print(f"Error merging files: {str(e)}")
        return False

def prepare_structure(pdb_content, options, output_dir="output"):
    """Main function to prepare structure for AMBER simulation"""
    try:
        # Create output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)
        
        # Define all file paths in output directory
        input_file = os.path.join(output_dir, "0_original_input.pdb")
        user_chain_file = os.path.join(output_dir, "0_user_chain_selected.pdb")
        protein_file = os.path.join(output_dir, "1_protein_no_hydrogens.pdb")
        protein_capped_file = os.path.join(output_dir, "2_protein_with_caps.pdb")
        ligand_file = os.path.join(output_dir, "3_ligands_extracted.pdb")
        ligand_corrected_file = os.path.join(output_dir, "4_ligands_corrected.pdb")
        tleap_ready_file = os.path.join(output_dir, "tleap_ready.pdb")
        
        # Step 0: Save original input for reference
        print("Step 0: Saving original input...")
        with open(input_file, 'w') as f:
            f.write(pdb_content)
        
        # Step 0.5: Extract user-selected chains and ligands
        selected_chains = options.get('selected_chains', [])
        selected_ligands = options.get('selected_ligands', [])
        
        if selected_chains:
            print(f"Step 0.5a: Extracting selected chains: {', '.join(selected_chains)}")
            if not extract_selected_chains(pdb_content, user_chain_file, selected_chains):
                raise Exception("Failed to extract selected chains")
        else:
            print("Step 0.5a: No chains selected, using original structure")
            shutil.copy2(input_file, user_chain_file)
        
        if selected_ligands:
            ligand_names = [f"{l.get('resn', '')}-{l.get('chain', '')}" for l in selected_ligands]
            print(f"Step 0.5b: Extracting selected ligands: {ligand_names}")
            if not extract_selected_ligands(pdb_content, ligand_file, selected_ligands):
                raise Exception("Failed to extract selected ligands")
        else:
            print("Step 0.5b: No ligands selected, creating empty ligand file")
            with open(ligand_file, 'w') as f:
                f.write('\n')
        
        # Step 1: Extract protein only (remove hydrogens) from user-selected chains
        print("Step 1: Extracting protein without hydrogens from selected chains...")
        # Read the user-selected chain file
        with open(user_chain_file, 'r') as f:
            chain_content = f.read()
        
        if not extract_protein_only(chain_content, protein_file):
            raise Exception("Failed to extract protein")
        
        # Step 2: Add capping groups (only if add_ace or add_nme is True)
        add_ace = options.get('add_ace', True)
        add_nme = options.get('add_nme', True)
        
        if add_ace or add_nme:
            print("Step 2: Adding ACE and NME capping groups...")
            if not add_capping_groups(protein_file, protein_capped_file):
                raise Exception("Failed to add capping groups")
        else:
            print("Step 2: Skipping capping groups (add_ace=False, add_nme=False)")
            print("Using protein without capping - copying to capped file")
            # Copy protein file to capped file (no capping)
            shutil.copy2(protein_file, protein_capped_file)
        
        # Step 3: Handle ligands (use pre-extracted ligand file)
        preserve_ligands = options.get('preserve_ligands', True)
        ligand_present = False
        
        if preserve_ligands:
            print("Step 3: Processing pre-extracted ligands...")
            
            # Check if ligand file has content (not just empty or newline)
            with open(ligand_file, 'r') as f:
                ligand_content = f.read().strip()
            
            if ligand_content and len(ligand_content) > 1:
                ligand_present = True
                print("Found pre-extracted ligands")
                
                # Correct ligand with PyMOL
                if not correct_ligand_with_pymol(ligand_file, ligand_corrected_file):
                    print("Error: Failed to process ligand")
                    return {
                        'error': 'Failed to process ligand with PyMOL',
                        'prepared_structure': '',
                        'original_atoms': 0,
                        'prepared_atoms': 0,
                        'removed_components': {},
                        'added_capping': {},
                        'preserved_ligands': 0,
                        'ligand_present': False
                    }
                
                # Merge protein and ligand
                if not merge_protein_and_ligand(protein_capped_file, ligand_corrected_file, tleap_ready_file):
                    raise Exception("Failed to merge protein and ligand")
            else:
                print("No ligands found in pre-extracted file, using protein only")
                # Copy protein file to tleap_ready
                shutil.copy2(protein_capped_file, tleap_ready_file)
        else:
            print("Step 3: Skipping ligand processing (preserve_ligands=False)")
            print("Using protein only - copying capped protein to tleap_ready")
            # Copy protein file to tleap_ready (protein only, no ligands)
            shutil.copy2(protein_capped_file, tleap_ready_file)
        
        # Remove CONNECT records from tleap_ready.pdb (PyMOL adds them)
        print("Removing CONNECT records from tleap_ready.pdb...")
        remove_connect_records(tleap_ready_file)
        
        # Read the final prepared structure
        with open(tleap_ready_file, 'r') as f:
            prepared_content = f.read()
            
            # Calculate statistics
            original_atoms = len([line for line in pdb_content.split('\n') if line.startswith('ATOM')])
            prepared_atoms = len([line for line in prepared_content.split('\n') if line.startswith('ATOM')])
            
            # Calculate removed components
            water_count = len([line for line in pdb_content.split('\n') if line.startswith('HETATM') and line[17:20].strip() in ['HOH', 'WAT', 'TIP3', 'TIP4', 'TIP5', 'SPC', 'SPCE']])
            ion_count = len([line for line in pdb_content.split('\n') if line.startswith('HETATM') and line[17:20].strip() in ['NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4']])
            hydrogen_count = len([line for line in pdb_content.split('\n') if line.startswith('ATOM') and line[76:78].strip() == 'H'])
            
            # If not preserving ligands, count them as removed
            ligand_count = 0
            if not preserve_ligands and ligand_present:
                # Count ligands from the pre-extracted file
                with open(ligand_file, 'r') as f:
                    ligand_lines = [line for line in f if line.startswith('HETATM')]
                ligand_count = len(set(line[17:20].strip() for line in ligand_lines))
            
            removed_components = {
                'water': water_count,
                'ions': ion_count,
                'hydrogens': hydrogen_count,
                'ligands': ligand_count
            }
            
            # Calculate added capping groups (only if capping was performed)
            if add_ace or add_nme:
                # Count unique ACE and NME residues, not individual atoms
                ace_residues = set()
                nme_residues = set()
                
                for line in prepared_content.split('\n'):
                    if line.startswith('ATOM') and 'ACE' in line:
                        # Extract residue number to count unique ACE groups
                        res_num = line[22:26].strip()
                        ace_residues.add(res_num)
                    elif line.startswith('ATOM') and 'NME' in line:
                        # Extract residue number to count unique NME groups
                        res_num = line[22:26].strip()
                        nme_residues.add(res_num)
                
                added_capping = {
                    'ace_groups': len(ace_residues),
                    'nme_groups': len(nme_residues)
                }
            else:
                added_capping = {
                    'ace_groups': 0,
                    'nme_groups': 0
                }
            
            # Count preserved ligands from the pre-extracted file
            preserved_ligands = 0
            if ligand_present and preserve_ligands:
                with open(ligand_file, 'r') as f:
                    ligand_lines = [line for line in f if line.startswith('HETATM')]
                preserved_ligands = len(set(line[17:20].strip() for line in ligand_lines))
            
            result = {
                'prepared_structure': prepared_content,
                'original_atoms': original_atoms,
                'prepared_atoms': prepared_atoms,
                'removed_components': removed_components,
                'added_capping': added_capping,
                'preserved_ligands': preserved_ligands,
                'ligand_present': ligand_present,
                'separate_ligands': options.get('separate_ligands', False)
            }
            
            # If separate ligands is enabled and ligands are present, include ligand content
            if ligand_present and options.get('separate_ligands', False):
                with open(ligand_corrected_file, 'r') as f:
                    result['ligand_content'] = f.read()
            
            return result
        
    except Exception as e:
        return {
            'error': str(e),
            'prepared_structure': '',
            'original_atoms': 0,
            'prepared_atoms': 0,
            'removed_components': {},
            'added_capping': {},
            'preserved_ligands': 0,
            'ligand_present': False
        }

def parse_structure_info(pdb_content):
    """Parse structure information for display"""
    lines = pdb_content.split('\n')
    atom_count = 0
    chains = set()
    residues = set()
    water_molecules = 0
    ions = 0
    ligands = set()
    hetatoms = 0
    
    # Common water molecule names
    water_names = {'HOH', 'WAT', 'TIP3', 'TIP4', 'SPC', 'SPCE'}
    
    # Common ion names
    ion_names = {'NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4','SO4'}
    
    # Common ligand indicators
    ligand_indicators = {'ATP', 'ADP', 'AMP', 'GDP', 'GTP', 'NAD', 'FAD', 'HEM', 'HEME', 'COA', 'SAM', 'PLP', 'THF', 'FMN', 'FAD', 'NADP', 'UDP', 'CDP', 'TDP', 'GDP', 'ADP', 'ATP'}

    for line in lines:
        if line.startswith('ATOM'):
            atom_count += 1
            chain_id = line[21:22].strip()
            if chain_id:
                chains.add(chain_id)
            
            res_name = line[17:20].strip()
            res_num = line[22:26].strip()
            residues.add(f"{res_name}{res_num}")
        elif line.startswith('HETATM'):
            hetatoms += 1
            res_name = line[17:20].strip()
            
            if res_name in water_names:
                water_molecules += 1
            elif res_name in ion_names:
                ions += 1
            elif res_name in ligand_indicators:
                ligands.add(res_name)

    # Count unique water molecules
    unique_water_residues = set()
    for line in lines:
        if line.startswith('HETATM'):
            res_name = line[17:20].strip()
            res_num = line[22:26].strip()
            if res_name in water_names:
                unique_water_residues.add(f"{res_name}{res_num}")

    return {
        'atom_count': atom_count,
        'chains': list(chains),
        'residue_count': len(residues),
        'water_molecules': len(unique_water_residues),
        'ions': ions,
        'ligands': list(ligands),
        'hetatoms': hetatoms
    }

def test_structure_preparation():
    """Test function to verify structure preparation works correctly"""
    # Create a simple test PDB content
    test_pdb = """HEADER    TEST PROTEIN
ATOM      1  N   MET A   1      16.347  37.019  21.335  1.00 50.73           N  
ATOM      2  CA  MET A   1      15.737  37.120  20.027  1.00 45.30           C  
ATOM      3  C   MET A   1      15.955  35.698  19.546  1.00 41.78           C  
ATOM      4  O   MET A   1      16.847  35.123  20.123  1.00 40.15           O  
ATOM      5  CB  MET A   1      14.234  37.456  19.789  1.00 44.12           C  
ATOM      6  CG  MET A   1      13.456  36.123  19.234  1.00 43.45           C  
ATOM      7  SD  MET A   1      12.123  35.456  18.123  1.00 42.78           S  
ATOM      8  CE  MET A   1      11.456  34.123  17.456  1.00 42.11           C  
ATOM      9  N   ALA A   2      15.123  35.456  18.789  1.00 40.44           N  
ATOM     10  CA  ALA A   2      14.456  34.123  18.123  1.00 39.77           C  
ATOM     11  C   ALA A   2      13.123  33.456  17.456  1.00 39.10           C  
ATOM     12  O   ALA A   2      12.456  32.123  16.789  1.00 38.43           O  
ATOM     13  CB  ALA A   2      13.789  33.123  17.123  1.00 38.76           C  
ATOM     14  N   ALA A   3      12.789  32.456  16.123  1.00 38.09           N  
ATOM     15  CA  ALA A   3      11.456  31.789  15.456  1.00 37.42           C  
ATOM     16  C   ALA A   3      10.123  30.456  14.789  1.00 36.75           C  
ATOM     17  O   ALA A   3       9.456  29.123  14.123  1.00 36.08           O  
ATOM     18  CB  ALA A   3       9.789  29.456  13.456  1.00 35.41           C  
ATOM     19  OXT ALA A   3       8.123  28.789  13.456  1.00 35.74           O  
HETATM   20  O   HOH A   4      20.000  20.000  20.000  1.00 20.00           O  
HETATM   21  H1  HOH A   4      20.500  20.500  20.500  1.00 20.00           H  
HETATM   22  H2  HOH A   4      19.500  19.500  19.500  1.00 20.00           H  
HETATM   23  NA  NA  A   5      25.000  25.000  25.000  1.00 25.00          NA  
HETATM   24  CL  CL  A   6      30.000  30.000  30.000  1.00 30.00          CL  
HETATM    1  PG  GTP A 180      29.710  30.132  -5.989  1.00 52.48      A    P  
HETATM    2  O1G GTP A 180      29.197  28.937  -5.265  1.00 43.51      A    O  
HETATM    3  O2G GTP A 180      30.881  29.816  -6.827  1.00 63.11      A    O  
HETATM    4  O3G GTP A 180      30.013  31.278  -5.117  1.00 29.97      A    O  
HETATM    5  O3B GTP A 180      28.517  30.631  -6.995  1.00 23.23      A    O  
HETATM    6  PB  GTP A 180      27.017  31.171  -6.766  1.00 29.58      A    P  
HETATM    7  O1B GTP A 180      26.072  30.050  -6.958  1.00 17.62      A    O  
HETATM    8  O2B GTP A 180      26.960  31.913  -5.483  1.00 38.76      A    O  
HETATM    9  O3A GTP A 180      26.807  32.212  -7.961  1.00 13.12      A    O  
HETATM   10  PA  GTP A 180      26.277  33.726  -8.045  1.00 25.06      A    P  
HETATM   11  O1A GTP A 180      25.089  33.867  -7.187  1.00 44.06      A    O  
HETATM   12  O2A GTP A 180      27.427  34.635  -7.843  1.00 23.47      A    O  
HETATM   13  O5' GTP A 180      25.804  33.834  -9.555  1.00 42.05      A    O  
HETATM   14  C5' GTP A 180      26.615  33.475 -10.679  1.00 19.97      A    C  
HETATM   15  C4' GTP A 180      26.219  34.288 -11.894  1.00 14.90      A    C  
HETATM   16  O4' GTP A 180      24.826  34.017 -12.143  1.00 19.00      A    O  
HETATM   17  C3' GTP A 180      26.372  35.802 -11.724  1.00  4.96      A    C  
HETATM   18  O3' GTP A 180      26.880  36.347 -12.936  1.00 44.49      A    O  
HETATM   19  C2' GTP A 180      24.932  36.243 -11.481  1.00 17.12      A    C  
HETATM   20  O2' GTP A 180      24.719  37.581 -11.901  1.00 32.45      A    O  
HETATM   21  C1' GTP A 180      24.069  35.240 -12.240  1.00 16.17      A    C  
HETATM   22  N9  GTP A 180      22.724  35.005 -11.630  1.00 28.10      A    N  
HETATM   23  C8  GTP A 180      22.443  34.655 -10.325  1.00 27.05      A    C  
HETATM   24  N7  GTP A 180      21.168  34.483 -10.079  1.00 33.25      A    N  
HETATM   25  C5  GTP A 180      20.554  34.737 -11.307  1.00 26.23      A    C  
HETATM   26  C6  GTP A 180      19.183  34.712 -11.659  1.00 29.31      A    C  
HETATM   27  O6  GTP A 180      18.205  34.448 -10.957  1.00 40.80      A    O  
HETATM   28  N1  GTP A 180      19.000  35.036 -13.013  1.00 26.85      A    N  
HETATM   29  C2  GTP A 180      20.022  35.339 -13.903  1.00 28.70      A    C  
HETATM   30  N2  GTP A 180      19.627  35.619 -15.147  1.00 44.24      A    N  
HETATM   31  N3  GTP A 180      21.301  35.367 -13.569  1.00 21.67      A    N  
HETATM   32  C4  GTP A 180      21.489  35.054 -12.257  1.00 41.91      A    C  
END
"""
    
    options = {
        'remove_water': True,
        'remove_ions': True,
        'remove_hydrogens': True,
        'add_ace': True,
        'add_nme': True,
        'preserve_ligands': True,
        'separate_ligands': False,
        'fix_missing_atoms': False,
        'standardize_residues': False
    }
    
    print("Testing structure preparation...")
    result = prepare_structure(test_pdb, options, "output")
    
    print("\n=== STATISTICS ===")
    print(f"Original atoms: {result['original_atoms']}")
    print(f"Prepared atoms: {result['prepared_atoms']}")
    print(f"Removed: {result['removed_components']}")
    print(f"Added: {result['added_capping']}")
    print(f"Ligands: {result['preserved_ligands']}")
    print(f"Ligand present: {result['ligand_present']}")
    
    print(f"\nTest completed! Check 'output' folder for results:")
    print("- 1_protein_no_hydrogens.pdb (protein without hydrogens)")
    print("- 2_protein_with_caps.pdb (protein with ACE/NME caps)")
    print("- 3_ligands_extracted.pdb (extracted ligands, if any)")
    print("- 4_ligands_corrected.pdb (corrected ligands, if any)")
    print("- tleap_ready.pdb (final structure ready for tleap)")

if __name__ == "__main__":
    test_structure_preparation()