File size: 4,146 Bytes
ad9572d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | """
Convert PXDesign CIF outputs to PDB format for evaluation pipeline.
PXDesign outputs .cif files with:
- Chain IDs like A0/B0 (multi-char, not PDB-compatible)
- Non-standard residue name 'xpb' for designed binder residues
This script converts them to PDB format with:
- Single-char chain IDs (A, B)
- Preserved residue names (xpb is kept; eval tools handle it)
Usage:
python code/scripts/pxdesign_guidance/convert_cif_to_pdb.py
"""
import os
import sys
from glob import glob
from Bio.PDB import MMCIFParser, PDBIO, Select
_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
_PROJECT_DIR = os.path.abspath(os.path.join(_SCRIPT_DIR, '../../..'))
class ChainRenamer(Select):
"""Rename multi-char chain IDs to single-char for PDB format."""
def __init__(self, chain_map):
self.chain_map = chain_map
def accept_chain(self, chain):
return 1
def accept_residue(self, residue):
return 1
def accept_atom(self, atom):
return 1
def convert_cif_to_pdb(cif_path, pdb_path):
"""Convert a single CIF file to PDB format."""
parser = MMCIFParser(QUIET=True)
structure = parser.get_structure('s', cif_path)
model = structure[0]
# Build chain ID mapping (A0->A, B0->B, etc.)
chain_map = {}
used_ids = set()
for chain in model.get_chains():
old_id = chain.id
# Use first character
new_id = old_id[0] if old_id else 'A'
# Avoid duplicates
while new_id in used_ids:
new_id = chr(ord(new_id) + 1)
used_ids.add(new_id)
chain_map[old_id] = new_id
# Rename chains and fix non-standard residue names
chains_to_rename = list(model.get_chains())
for chain in chains_to_rename:
old_id = chain.id
new_id = chain_map.get(old_id, old_id)
if old_id != new_id:
chain.id = new_id
# Rename 'xpb' residues to 'GLY' (backbone-only binder residues)
for residue in chain.get_residues():
if residue.resname.strip().lower() == 'xpb':
residue.resname = 'GLY'
# Write PDB
io = PDBIO()
io.set_structure(structure)
io.save(pdb_path)
return True
def convert_directory(src_dir, method_name):
"""Convert all CIF files in a directory tree to PDB."""
cif_files = sorted(glob(os.path.join(src_dir, '**/*.cif'), recursive=True))
cif_files = [f for f in cif_files if 'sample' in os.path.basename(f).lower()]
if not cif_files:
print(f" No CIF files found in {src_dir}")
return 0
# Create converted_pdbs directory
converted_dir = os.path.join(src_dir, 'converted_pdbs')
os.makedirs(converted_dir, exist_ok=True)
n_converted = 0
for cif_path in cif_files:
basename = os.path.basename(cif_path).replace('.cif', '.pdb')
# For TDS/SMC with round subdirs, include round info
rel_path = os.path.relpath(cif_path, src_dir)
parts = rel_path.split(os.sep)
if any(p.startswith('round_') for p in parts):
round_part = [p for p in parts if p.startswith('round_')][0]
basename = f"{round_part}_{basename}"
pdb_path = os.path.join(converted_dir, basename)
try:
convert_cif_to_pdb(cif_path, pdb_path)
n_converted += 1
except Exception as e:
print(f" Failed {cif_path}: {e}")
print(f" Converted {n_converted}/{len(cif_files)} CIF -> PDB in {converted_dir}")
return n_converted
def main():
methods = {
'pxdesign_guided': os.path.join(_PROJECT_DIR, 'results/pxdesign_guided'),
'pxdesign_tds': os.path.join(_PROJECT_DIR, 'results/pxdesign_tds'),
'pxdesign_smc': os.path.join(_PROJECT_DIR, 'results/pxdesign_smc'),
}
# Langevin outputs are already PDB (post-hoc refinement)
total = 0
for name, src_dir in methods.items():
print(f"\n{name}:")
if os.path.exists(src_dir):
total += convert_directory(src_dir, name)
else:
print(f" Directory not found: {src_dir}")
print(f"\nTotal converted: {total}")
if __name__ == '__main__':
main()
|