|
|
|
|
|
""" |
|
|
AMBER Structure Preparation Script using MDAnalysis |
|
|
Complete pipeline: extract protein, add caps, handle ligands |
|
|
""" |
|
|
|
|
|
import os |
|
|
import subprocess |
|
|
import sys |
|
|
import shutil |
|
|
|
|
|
def run_command(cmd, description=""): |
|
|
"""Run a command and return success status""" |
|
|
try: |
|
|
print(f"Running: {description}") |
|
|
print(f"Command: {cmd}") |
|
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=120) |
|
|
print(f"Return code: {result.returncode}") |
|
|
if result.stdout: |
|
|
print(f"STDOUT: {result.stdout}") |
|
|
if result.stderr: |
|
|
print(f"STDERR: {result.stderr}") |
|
|
if result.returncode != 0: |
|
|
print(f"Error: {result.stderr}") |
|
|
return False |
|
|
return True |
|
|
except subprocess.TimeoutExpired: |
|
|
print(f"Timeout: {description}") |
|
|
return False |
|
|
except Exception as e: |
|
|
print(f"Error running {description}: {str(e)}") |
|
|
return False |
|
|
|
|
|
def extract_protein_only(pdb_content, output_file, selected_chains=None): |
|
|
"""Extract protein without hydrogens using MDAnalysis. Optionally restrict to selected chains.""" |
|
|
|
|
|
with open(output_file, 'w') as f: |
|
|
f.write(pdb_content) |
|
|
|
|
|
try: |
|
|
|
|
|
chain_sel = '' |
|
|
if selected_chains: |
|
|
chain_filters = ' or '.join([f'chain {c}' for c in selected_chains]) |
|
|
chain_sel = f' and ({chain_filters})' |
|
|
selection = f"protein{chain_sel} and not name H* 1H* 2H* 3H*" |
|
|
cmd = f'python -c "import MDAnalysis as mda; u=mda.Universe(\'{output_file}\'); u.select_atoms(\'{selection}\').write(\'{output_file}\')"' |
|
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60) |
|
|
|
|
|
if result.returncode != 0: |
|
|
raise Exception(f"MDAnalysis error: {result.stderr}") |
|
|
|
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error in extract_protein_only: {e}") |
|
|
return False |
|
|
|
|
|
def add_capping_groups(input_file, output_file): |
|
|
"""Add ACE and NME capping groups using add_caps.py""" |
|
|
|
|
|
temp_capped = output_file.replace('.pdb', '_temp.pdb') |
|
|
cmd = f"python add_caps.py -i {input_file} -o {temp_capped}" |
|
|
if not run_command(cmd, f"Adding capping groups to {input_file}"): |
|
|
return False |
|
|
|
|
|
|
|
|
cmd = f"awk '/NME/{{nme=NR}} /ACE/ && nme && NR > nme {{print \"TER\"; nme=0}} {{print}}' {temp_capped} > {output_file}" |
|
|
if not run_command(cmd, f"Adding TER cards to {temp_capped}"): |
|
|
return False |
|
|
|
|
|
|
|
|
if os.path.exists(temp_capped): |
|
|
os.remove(temp_capped) |
|
|
|
|
|
return True |
|
|
|
|
|
def extract_selected_chains(pdb_content, output_file, selected_chains): |
|
|
"""Extract selected chains using PyMOL commands""" |
|
|
try: |
|
|
|
|
|
temp_input = output_file.replace('.pdb', '_temp_input.pdb') |
|
|
with open(temp_input, 'w') as f: |
|
|
f.write(pdb_content) |
|
|
|
|
|
|
|
|
chain_filters = ' or '.join([f'chain {c}' for c in selected_chains]) |
|
|
selection = f"({chain_filters}) and polymer.protein" |
|
|
|
|
|
|
|
|
cmd = f'''python -c " |
|
|
import pymol |
|
|
pymol.finish_launching(['pymol', '-c']) |
|
|
pymol.cmd.load('{temp_input}') |
|
|
pymol.cmd.save('{output_file}', '{selection}') |
|
|
pymol.cmd.quit() |
|
|
"''' |
|
|
|
|
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60) |
|
|
|
|
|
|
|
|
if os.path.exists(temp_input): |
|
|
os.remove(temp_input) |
|
|
|
|
|
if result.returncode != 0: |
|
|
print(f"PyMOL chain extraction error: {result.stderr}") |
|
|
return False |
|
|
|
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error extracting selected chains: {e}") |
|
|
return False |
|
|
|
|
|
def extract_selected_ligands(pdb_content, output_file, selected_ligands): |
|
|
"""Extract selected ligands using PyMOL commands""" |
|
|
try: |
|
|
|
|
|
temp_input = output_file.replace('.pdb', '_temp_input.pdb') |
|
|
with open(temp_input, 'w') as f: |
|
|
f.write(pdb_content) |
|
|
|
|
|
|
|
|
parts = [] |
|
|
for lig in selected_ligands: |
|
|
resn = lig.get('resn', '').strip() |
|
|
chain = lig.get('chain', '').strip() |
|
|
if resn and chain: |
|
|
parts.append(f"(resn {resn} and chain {chain})") |
|
|
elif resn: |
|
|
parts.append(f"resn {resn}") |
|
|
|
|
|
if not parts: |
|
|
|
|
|
with open(output_file, 'w') as f: |
|
|
f.write('\n') |
|
|
return True |
|
|
|
|
|
selection = ' or '.join(parts) |
|
|
|
|
|
|
|
|
cmd = f'''python -c " |
|
|
import pymol |
|
|
pymol.finish_launching(['pymol', '-c']) |
|
|
pymol.cmd.load('{temp_input}') |
|
|
pymol.cmd.save('{output_file}', '{selection}') |
|
|
pymol.cmd.quit() |
|
|
"''' |
|
|
|
|
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60) |
|
|
|
|
|
|
|
|
if os.path.exists(temp_input): |
|
|
os.remove(temp_input) |
|
|
|
|
|
if result.returncode != 0: |
|
|
print(f"PyMOL ligand extraction error: {result.stderr}") |
|
|
return False |
|
|
|
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error extracting selected ligands: {e}") |
|
|
return False |
|
|
|
|
|
def extract_ligands(pdb_content, output_file, ligand_residue_name=None, selected_ligands=None): |
|
|
"""Extract ligands using MDAnalysis. Optionally restrict to selected ligands (list of dicts with resn, chain, resi).""" |
|
|
|
|
|
with open(output_file, 'w') as f: |
|
|
f.write(pdb_content) |
|
|
|
|
|
try: |
|
|
|
|
|
if selected_ligands: |
|
|
|
|
|
parts = [] |
|
|
for lig in selected_ligands: |
|
|
resn = lig.get('resn', '').strip() |
|
|
chain = lig.get('chain', '').strip() |
|
|
if resn and chain: |
|
|
parts.append(f"(resname {resn} and segid {chain})") |
|
|
elif resn: |
|
|
parts.append(f"resname {resn}") |
|
|
if parts: |
|
|
selection = ' or '.join(parts) |
|
|
cmd = f'''python -c " |
|
|
import MDAnalysis as mda |
|
|
u = mda.Universe('{output_file}') |
|
|
u.select_atoms('{selection}').write('{output_file}') |
|
|
"''' |
|
|
else: |
|
|
cmd = f"python -c \"open('{output_file}','w').write('\\n')\"" |
|
|
elif ligand_residue_name: |
|
|
|
|
|
cmd = f'''python -c " |
|
|
import MDAnalysis as mda |
|
|
u = mda.Universe('{output_file}') |
|
|
# Extract specific ligand residue from both ATOM and HETATM records |
|
|
u.select_atoms('resname {ligand_residue_name}').write('{output_file}') |
|
|
"''' |
|
|
else: |
|
|
|
|
|
cmd = f'''python -c " |
|
|
import MDAnalysis as mda |
|
|
u = mda.Universe('{output_file}') |
|
|
# Get all unique residue names from HETATM records |
|
|
hetatm_residues = set() |
|
|
for atom in u.atoms: |
|
|
if atom.record_type == 'HETATM': |
|
|
hetatm_residues.add(atom.resname) |
|
|
# Remove water and ions |
|
|
ligand_residues = hetatm_residues - {{'HOH', 'WAT', 'TIP3', 'TIP4', 'SPC', 'SPCE', 'NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4', 'SO4'}} |
|
|
if ligand_residues: |
|
|
resname_sel = ' or '.join([f'resname {{res}}' for res in ligand_residues]) |
|
|
u.select_atoms(resname_sel).write('{output_file}') |
|
|
else: |
|
|
# No ligands found, create empty file |
|
|
with open('{output_file}', 'w') as f: |
|
|
f.write('\\n') |
|
|
"''' |
|
|
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=60) |
|
|
|
|
|
if result.returncode != 0: |
|
|
raise Exception(f"MDAnalysis error: {result.stderr}") |
|
|
|
|
|
|
|
|
if ligand_residue_name: |
|
|
convert_atom_to_hetatm(output_file) |
|
|
|
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error in extract_ligands: {e}") |
|
|
return False |
|
|
|
|
|
def convert_atom_to_hetatm(pdb_file): |
|
|
"""Convert ATOM records to HETATM in PDB file""" |
|
|
try: |
|
|
with open(pdb_file, 'r') as f: |
|
|
lines = f.readlines() |
|
|
|
|
|
|
|
|
converted_lines = [] |
|
|
for line in lines: |
|
|
if line.startswith('ATOM'): |
|
|
|
|
|
converted_line = 'HETATM' + line[6:] |
|
|
converted_lines.append(converted_line) |
|
|
else: |
|
|
converted_lines.append(line) |
|
|
|
|
|
|
|
|
with open(pdb_file, 'w') as f: |
|
|
f.writelines(converted_lines) |
|
|
|
|
|
print(f"Converted ATOM records to HETATM in {pdb_file}") |
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error converting ATOM to HETATM: {e}") |
|
|
return False |
|
|
|
|
|
def correct_ligand_with_pymol(ligand_file, corrected_file): |
|
|
"""Correct ligand using PyMOL""" |
|
|
ligand_path = os.path.abspath(ligand_file) |
|
|
corrected_path = os.path.abspath(corrected_file) |
|
|
if not os.path.isfile(ligand_path) or os.path.getsize(ligand_path) == 0: |
|
|
print("Ligand file missing or empty:", ligand_path) |
|
|
return False |
|
|
|
|
|
|
|
|
cmd = f'pymol -cq {ligand_path} -d "h_add; save {corrected_path}; quit"' |
|
|
return run_command(cmd, f"Correcting ligand with PyMOL") |
|
|
|
|
|
def remove_connect_records(pdb_file): |
|
|
"""Remove CONNECT records from PDB file""" |
|
|
try: |
|
|
with open(pdb_file, 'r') as f: |
|
|
lines = f.readlines() |
|
|
|
|
|
|
|
|
filtered_lines = [line for line in lines if not line.startswith('CONECT')] |
|
|
|
|
|
with open(pdb_file, 'w') as f: |
|
|
f.writelines(filtered_lines) |
|
|
|
|
|
print(f"Removed CONNECT records from {pdb_file}") |
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error removing CONNECT records: {e}") |
|
|
return False |
|
|
|
|
|
def merge_protein_and_ligand(protein_file, ligand_file, output_file): |
|
|
"""Merge capped protein and corrected ligand with proper PDB formatting""" |
|
|
try: |
|
|
|
|
|
with open(protein_file, 'r') as f: |
|
|
protein_lines = f.readlines() |
|
|
|
|
|
|
|
|
with open(ligand_file, 'r') as f: |
|
|
ligand_lines = f.readlines() |
|
|
|
|
|
|
|
|
protein_processed = [] |
|
|
last_atom_line = None |
|
|
for line in protein_lines: |
|
|
if line.strip() == 'END': |
|
|
|
|
|
if last_atom_line and last_atom_line.startswith('ATOM'): |
|
|
|
|
|
atom_num = last_atom_line[6:11].strip() |
|
|
res_name = last_atom_line[17:20].strip() |
|
|
chain_id = last_atom_line[21:22].strip() |
|
|
res_num = last_atom_line[22:26].strip() |
|
|
ter_line = f"TER {atom_num:>5} {res_name} {chain_id}{res_num}\n" |
|
|
protein_processed.append(ter_line) |
|
|
else: |
|
|
protein_processed.append('TER\n') |
|
|
else: |
|
|
protein_processed.append(line) |
|
|
if line.startswith('ATOM'): |
|
|
last_atom_line = line |
|
|
|
|
|
|
|
|
ligand_processed = [] |
|
|
for line in ligand_lines: |
|
|
if line.startswith(('ATOM', 'HETATM')): |
|
|
ligand_processed.append(line) |
|
|
|
|
|
|
|
|
merged_content = ''.join(protein_processed) + ''.join(ligand_processed) + 'END\n' |
|
|
|
|
|
with open(output_file, 'w') as f: |
|
|
f.write(merged_content) |
|
|
|
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error merging files: {str(e)}") |
|
|
return False |
|
|
|
|
|
def prepare_structure(pdb_content, options, output_dir="output"): |
|
|
"""Main function to prepare structure for AMBER simulation""" |
|
|
try: |
|
|
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
|
|
|
input_file = os.path.join(output_dir, "0_original_input.pdb") |
|
|
user_chain_file = os.path.join(output_dir, "0_user_chain_selected.pdb") |
|
|
protein_file = os.path.join(output_dir, "1_protein_no_hydrogens.pdb") |
|
|
protein_capped_file = os.path.join(output_dir, "2_protein_with_caps.pdb") |
|
|
ligand_file = os.path.join(output_dir, "3_ligands_extracted.pdb") |
|
|
ligand_corrected_file = os.path.join(output_dir, "4_ligands_corrected.pdb") |
|
|
tleap_ready_file = os.path.join(output_dir, "tleap_ready.pdb") |
|
|
|
|
|
|
|
|
print("Step 0: Saving original input...") |
|
|
with open(input_file, 'w') as f: |
|
|
f.write(pdb_content) |
|
|
|
|
|
|
|
|
selected_chains = options.get('selected_chains', []) |
|
|
selected_ligands = options.get('selected_ligands', []) |
|
|
|
|
|
if selected_chains: |
|
|
print(f"Step 0.5a: Extracting selected chains: {', '.join(selected_chains)}") |
|
|
if not extract_selected_chains(pdb_content, user_chain_file, selected_chains): |
|
|
raise Exception("Failed to extract selected chains") |
|
|
else: |
|
|
print("Step 0.5a: No chains selected, using original structure") |
|
|
shutil.copy2(input_file, user_chain_file) |
|
|
|
|
|
if selected_ligands: |
|
|
ligand_names = [f"{l.get('resn', '')}-{l.get('chain', '')}" for l in selected_ligands] |
|
|
print(f"Step 0.5b: Extracting selected ligands: {ligand_names}") |
|
|
if not extract_selected_ligands(pdb_content, ligand_file, selected_ligands): |
|
|
raise Exception("Failed to extract selected ligands") |
|
|
else: |
|
|
print("Step 0.5b: No ligands selected, creating empty ligand file") |
|
|
with open(ligand_file, 'w') as f: |
|
|
f.write('\n') |
|
|
|
|
|
|
|
|
print("Step 1: Extracting protein without hydrogens from selected chains...") |
|
|
|
|
|
with open(user_chain_file, 'r') as f: |
|
|
chain_content = f.read() |
|
|
|
|
|
if not extract_protein_only(chain_content, protein_file): |
|
|
raise Exception("Failed to extract protein") |
|
|
|
|
|
|
|
|
add_ace = options.get('add_ace', True) |
|
|
add_nme = options.get('add_nme', True) |
|
|
|
|
|
if add_ace or add_nme: |
|
|
print("Step 2: Adding ACE and NME capping groups...") |
|
|
if not add_capping_groups(protein_file, protein_capped_file): |
|
|
raise Exception("Failed to add capping groups") |
|
|
else: |
|
|
print("Step 2: Skipping capping groups (add_ace=False, add_nme=False)") |
|
|
print("Using protein without capping - copying to capped file") |
|
|
|
|
|
shutil.copy2(protein_file, protein_capped_file) |
|
|
|
|
|
|
|
|
preserve_ligands = options.get('preserve_ligands', True) |
|
|
ligand_present = False |
|
|
|
|
|
if preserve_ligands: |
|
|
print("Step 3: Processing pre-extracted ligands...") |
|
|
|
|
|
|
|
|
with open(ligand_file, 'r') as f: |
|
|
ligand_content = f.read().strip() |
|
|
|
|
|
if ligand_content and len(ligand_content) > 1: |
|
|
ligand_present = True |
|
|
print("Found pre-extracted ligands") |
|
|
|
|
|
|
|
|
if not correct_ligand_with_pymol(ligand_file, ligand_corrected_file): |
|
|
print("Error: Failed to process ligand") |
|
|
return { |
|
|
'error': 'Failed to process ligand with PyMOL', |
|
|
'prepared_structure': '', |
|
|
'original_atoms': 0, |
|
|
'prepared_atoms': 0, |
|
|
'removed_components': {}, |
|
|
'added_capping': {}, |
|
|
'preserved_ligands': 0, |
|
|
'ligand_present': False |
|
|
} |
|
|
|
|
|
|
|
|
if not merge_protein_and_ligand(protein_capped_file, ligand_corrected_file, tleap_ready_file): |
|
|
raise Exception("Failed to merge protein and ligand") |
|
|
else: |
|
|
print("No ligands found in pre-extracted file, using protein only") |
|
|
|
|
|
shutil.copy2(protein_capped_file, tleap_ready_file) |
|
|
else: |
|
|
print("Step 3: Skipping ligand processing (preserve_ligands=False)") |
|
|
print("Using protein only - copying capped protein to tleap_ready") |
|
|
|
|
|
shutil.copy2(protein_capped_file, tleap_ready_file) |
|
|
|
|
|
|
|
|
print("Removing CONNECT records from tleap_ready.pdb...") |
|
|
remove_connect_records(tleap_ready_file) |
|
|
|
|
|
|
|
|
with open(tleap_ready_file, 'r') as f: |
|
|
prepared_content = f.read() |
|
|
|
|
|
|
|
|
original_atoms = len([line for line in pdb_content.split('\n') if line.startswith('ATOM')]) |
|
|
prepared_atoms = len([line for line in prepared_content.split('\n') if line.startswith('ATOM')]) |
|
|
|
|
|
|
|
|
water_count = len([line for line in pdb_content.split('\n') if line.startswith('HETATM') and line[17:20].strip() in ['HOH', 'WAT', 'TIP3', 'TIP4', 'TIP5', 'SPC', 'SPCE']]) |
|
|
ion_count = len([line for line in pdb_content.split('\n') if line.startswith('HETATM') and line[17:20].strip() in ['NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4']]) |
|
|
hydrogen_count = len([line for line in pdb_content.split('\n') if line.startswith('ATOM') and line[76:78].strip() == 'H']) |
|
|
|
|
|
|
|
|
ligand_count = 0 |
|
|
if not preserve_ligands and ligand_present: |
|
|
|
|
|
with open(ligand_file, 'r') as f: |
|
|
ligand_lines = [line for line in f if line.startswith('HETATM')] |
|
|
ligand_count = len(set(line[17:20].strip() for line in ligand_lines)) |
|
|
|
|
|
removed_components = { |
|
|
'water': water_count, |
|
|
'ions': ion_count, |
|
|
'hydrogens': hydrogen_count, |
|
|
'ligands': ligand_count |
|
|
} |
|
|
|
|
|
|
|
|
if add_ace or add_nme: |
|
|
|
|
|
ace_residues = set() |
|
|
nme_residues = set() |
|
|
|
|
|
for line in prepared_content.split('\n'): |
|
|
if line.startswith('ATOM') and 'ACE' in line: |
|
|
|
|
|
res_num = line[22:26].strip() |
|
|
ace_residues.add(res_num) |
|
|
elif line.startswith('ATOM') and 'NME' in line: |
|
|
|
|
|
res_num = line[22:26].strip() |
|
|
nme_residues.add(res_num) |
|
|
|
|
|
added_capping = { |
|
|
'ace_groups': len(ace_residues), |
|
|
'nme_groups': len(nme_residues) |
|
|
} |
|
|
else: |
|
|
added_capping = { |
|
|
'ace_groups': 0, |
|
|
'nme_groups': 0 |
|
|
} |
|
|
|
|
|
|
|
|
preserved_ligands = 0 |
|
|
if ligand_present and preserve_ligands: |
|
|
with open(ligand_file, 'r') as f: |
|
|
ligand_lines = [line for line in f if line.startswith('HETATM')] |
|
|
preserved_ligands = len(set(line[17:20].strip() for line in ligand_lines)) |
|
|
|
|
|
result = { |
|
|
'prepared_structure': prepared_content, |
|
|
'original_atoms': original_atoms, |
|
|
'prepared_atoms': prepared_atoms, |
|
|
'removed_components': removed_components, |
|
|
'added_capping': added_capping, |
|
|
'preserved_ligands': preserved_ligands, |
|
|
'ligand_present': ligand_present, |
|
|
'separate_ligands': options.get('separate_ligands', False) |
|
|
} |
|
|
|
|
|
|
|
|
if ligand_present and options.get('separate_ligands', False): |
|
|
with open(ligand_corrected_file, 'r') as f: |
|
|
result['ligand_content'] = f.read() |
|
|
|
|
|
return result |
|
|
|
|
|
except Exception as e: |
|
|
return { |
|
|
'error': str(e), |
|
|
'prepared_structure': '', |
|
|
'original_atoms': 0, |
|
|
'prepared_atoms': 0, |
|
|
'removed_components': {}, |
|
|
'added_capping': {}, |
|
|
'preserved_ligands': 0, |
|
|
'ligand_present': False |
|
|
} |
|
|
|
|
|
def parse_structure_info(pdb_content): |
|
|
"""Parse structure information for display""" |
|
|
lines = pdb_content.split('\n') |
|
|
atom_count = 0 |
|
|
chains = set() |
|
|
residues = set() |
|
|
water_molecules = 0 |
|
|
ions = 0 |
|
|
ligands = set() |
|
|
hetatoms = 0 |
|
|
|
|
|
|
|
|
water_names = {'HOH', 'WAT', 'TIP3', 'TIP4', 'SPC', 'SPCE'} |
|
|
|
|
|
|
|
|
ion_names = {'NA', 'CL', 'K', 'MG', 'CA', 'ZN', 'FE', 'MN', 'CU', 'NI', 'CO', 'CD', 'HG', 'PB', 'SR', 'BA', 'RB', 'CS', 'LI', 'F', 'BR', 'I', 'PO4', 'PO3', 'H2PO4', 'HPO4', 'H3PO4','SO4'} |
|
|
|
|
|
|
|
|
ligand_indicators = {'ATP', 'ADP', 'AMP', 'GDP', 'GTP', 'NAD', 'FAD', 'HEM', 'HEME', 'COA', 'SAM', 'PLP', 'THF', 'FMN', 'FAD', 'NADP', 'UDP', 'CDP', 'TDP', 'GDP', 'ADP', 'ATP'} |
|
|
|
|
|
for line in lines: |
|
|
if line.startswith('ATOM'): |
|
|
atom_count += 1 |
|
|
chain_id = line[21:22].strip() |
|
|
if chain_id: |
|
|
chains.add(chain_id) |
|
|
|
|
|
res_name = line[17:20].strip() |
|
|
res_num = line[22:26].strip() |
|
|
residues.add(f"{res_name}{res_num}") |
|
|
elif line.startswith('HETATM'): |
|
|
hetatoms += 1 |
|
|
res_name = line[17:20].strip() |
|
|
|
|
|
if res_name in water_names: |
|
|
water_molecules += 1 |
|
|
elif res_name in ion_names: |
|
|
ions += 1 |
|
|
elif res_name in ligand_indicators: |
|
|
ligands.add(res_name) |
|
|
|
|
|
|
|
|
unique_water_residues = set() |
|
|
for line in lines: |
|
|
if line.startswith('HETATM'): |
|
|
res_name = line[17:20].strip() |
|
|
res_num = line[22:26].strip() |
|
|
if res_name in water_names: |
|
|
unique_water_residues.add(f"{res_name}{res_num}") |
|
|
|
|
|
return { |
|
|
'atom_count': atom_count, |
|
|
'chains': list(chains), |
|
|
'residue_count': len(residues), |
|
|
'water_molecules': len(unique_water_residues), |
|
|
'ions': ions, |
|
|
'ligands': list(ligands), |
|
|
'hetatoms': hetatoms |
|
|
} |
|
|
|
|
|
def test_structure_preparation(): |
|
|
"""Test function to verify structure preparation works correctly""" |
|
|
|
|
|
test_pdb = """HEADER TEST PROTEIN |
|
|
ATOM 1 N MET A 1 16.347 37.019 21.335 1.00 50.73 N |
|
|
ATOM 2 CA MET A 1 15.737 37.120 20.027 1.00 45.30 C |
|
|
ATOM 3 C MET A 1 15.955 35.698 19.546 1.00 41.78 C |
|
|
ATOM 4 O MET A 1 16.847 35.123 20.123 1.00 40.15 O |
|
|
ATOM 5 CB MET A 1 14.234 37.456 19.789 1.00 44.12 C |
|
|
ATOM 6 CG MET A 1 13.456 36.123 19.234 1.00 43.45 C |
|
|
ATOM 7 SD MET A 1 12.123 35.456 18.123 1.00 42.78 S |
|
|
ATOM 8 CE MET A 1 11.456 34.123 17.456 1.00 42.11 C |
|
|
ATOM 9 N ALA A 2 15.123 35.456 18.789 1.00 40.44 N |
|
|
ATOM 10 CA ALA A 2 14.456 34.123 18.123 1.00 39.77 C |
|
|
ATOM 11 C ALA A 2 13.123 33.456 17.456 1.00 39.10 C |
|
|
ATOM 12 O ALA A 2 12.456 32.123 16.789 1.00 38.43 O |
|
|
ATOM 13 CB ALA A 2 13.789 33.123 17.123 1.00 38.76 C |
|
|
ATOM 14 N ALA A 3 12.789 32.456 16.123 1.00 38.09 N |
|
|
ATOM 15 CA ALA A 3 11.456 31.789 15.456 1.00 37.42 C |
|
|
ATOM 16 C ALA A 3 10.123 30.456 14.789 1.00 36.75 C |
|
|
ATOM 17 O ALA A 3 9.456 29.123 14.123 1.00 36.08 O |
|
|
ATOM 18 CB ALA A 3 9.789 29.456 13.456 1.00 35.41 C |
|
|
ATOM 19 OXT ALA A 3 8.123 28.789 13.456 1.00 35.74 O |
|
|
HETATM 20 O HOH A 4 20.000 20.000 20.000 1.00 20.00 O |
|
|
HETATM 21 H1 HOH A 4 20.500 20.500 20.500 1.00 20.00 H |
|
|
HETATM 22 H2 HOH A 4 19.500 19.500 19.500 1.00 20.00 H |
|
|
HETATM 23 NA NA A 5 25.000 25.000 25.000 1.00 25.00 NA |
|
|
HETATM 24 CL CL A 6 30.000 30.000 30.000 1.00 30.00 CL |
|
|
HETATM 1 PG GTP A 180 29.710 30.132 -5.989 1.00 52.48 A P |
|
|
HETATM 2 O1G GTP A 180 29.197 28.937 -5.265 1.00 43.51 A O |
|
|
HETATM 3 O2G GTP A 180 30.881 29.816 -6.827 1.00 63.11 A O |
|
|
HETATM 4 O3G GTP A 180 30.013 31.278 -5.117 1.00 29.97 A O |
|
|
HETATM 5 O3B GTP A 180 28.517 30.631 -6.995 1.00 23.23 A O |
|
|
HETATM 6 PB GTP A 180 27.017 31.171 -6.766 1.00 29.58 A P |
|
|
HETATM 7 O1B GTP A 180 26.072 30.050 -6.958 1.00 17.62 A O |
|
|
HETATM 8 O2B GTP A 180 26.960 31.913 -5.483 1.00 38.76 A O |
|
|
HETATM 9 O3A GTP A 180 26.807 32.212 -7.961 1.00 13.12 A O |
|
|
HETATM 10 PA GTP A 180 26.277 33.726 -8.045 1.00 25.06 A P |
|
|
HETATM 11 O1A GTP A 180 25.089 33.867 -7.187 1.00 44.06 A O |
|
|
HETATM 12 O2A GTP A 180 27.427 34.635 -7.843 1.00 23.47 A O |
|
|
HETATM 13 O5' GTP A 180 25.804 33.834 -9.555 1.00 42.05 A O |
|
|
HETATM 14 C5' GTP A 180 26.615 33.475 -10.679 1.00 19.97 A C |
|
|
HETATM 15 C4' GTP A 180 26.219 34.288 -11.894 1.00 14.90 A C |
|
|
HETATM 16 O4' GTP A 180 24.826 34.017 -12.143 1.00 19.00 A O |
|
|
HETATM 17 C3' GTP A 180 26.372 35.802 -11.724 1.00 4.96 A C |
|
|
HETATM 18 O3' GTP A 180 26.880 36.347 -12.936 1.00 44.49 A O |
|
|
HETATM 19 C2' GTP A 180 24.932 36.243 -11.481 1.00 17.12 A C |
|
|
HETATM 20 O2' GTP A 180 24.719 37.581 -11.901 1.00 32.45 A O |
|
|
HETATM 21 C1' GTP A 180 24.069 35.240 -12.240 1.00 16.17 A C |
|
|
HETATM 22 N9 GTP A 180 22.724 35.005 -11.630 1.00 28.10 A N |
|
|
HETATM 23 C8 GTP A 180 22.443 34.655 -10.325 1.00 27.05 A C |
|
|
HETATM 24 N7 GTP A 180 21.168 34.483 -10.079 1.00 33.25 A N |
|
|
HETATM 25 C5 GTP A 180 20.554 34.737 -11.307 1.00 26.23 A C |
|
|
HETATM 26 C6 GTP A 180 19.183 34.712 -11.659 1.00 29.31 A C |
|
|
HETATM 27 O6 GTP A 180 18.205 34.448 -10.957 1.00 40.80 A O |
|
|
HETATM 28 N1 GTP A 180 19.000 35.036 -13.013 1.00 26.85 A N |
|
|
HETATM 29 C2 GTP A 180 20.022 35.339 -13.903 1.00 28.70 A C |
|
|
HETATM 30 N2 GTP A 180 19.627 35.619 -15.147 1.00 44.24 A N |
|
|
HETATM 31 N3 GTP A 180 21.301 35.367 -13.569 1.00 21.67 A N |
|
|
HETATM 32 C4 GTP A 180 21.489 35.054 -12.257 1.00 41.91 A C |
|
|
END |
|
|
""" |
|
|
|
|
|
options = { |
|
|
'remove_water': True, |
|
|
'remove_ions': True, |
|
|
'remove_hydrogens': True, |
|
|
'add_ace': True, |
|
|
'add_nme': True, |
|
|
'preserve_ligands': True, |
|
|
'separate_ligands': False, |
|
|
'fix_missing_atoms': False, |
|
|
'standardize_residues': False |
|
|
} |
|
|
|
|
|
print("Testing structure preparation...") |
|
|
result = prepare_structure(test_pdb, options, "output") |
|
|
|
|
|
print("\n=== STATISTICS ===") |
|
|
print(f"Original atoms: {result['original_atoms']}") |
|
|
print(f"Prepared atoms: {result['prepared_atoms']}") |
|
|
print(f"Removed: {result['removed_components']}") |
|
|
print(f"Added: {result['added_capping']}") |
|
|
print(f"Ligands: {result['preserved_ligands']}") |
|
|
print(f"Ligand present: {result['ligand_present']}") |
|
|
|
|
|
print(f"\nTest completed! Check 'output' folder for results:") |
|
|
print("- 1_protein_no_hydrogens.pdb (protein without hydrogens)") |
|
|
print("- 2_protein_with_caps.pdb (protein with ACE/NME caps)") |
|
|
print("- 3_ligands_extracted.pdb (extracted ligands, if any)") |
|
|
print("- 4_ligands_corrected.pdb (corrected ligands, if any)") |
|
|
print("- tleap_ready.pdb (final structure ready for tleap)") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
test_structure_preparation() |