AlloGen / code /scripts /pxdesign_guidance /convert_cif_to_pdb.py
chq1155's picture
AlloGen public release: Q_theta scorer + PXDesign guidance + Colab demo
ad9572d
"""
Convert PXDesign CIF outputs to PDB format for evaluation pipeline.
PXDesign outputs .cif files with:
- Chain IDs like A0/B0 (multi-char, not PDB-compatible)
- Non-standard residue name 'xpb' for designed binder residues
This script converts them to PDB format with:
- Single-char chain IDs (A, B)
- Preserved residue names (xpb is kept; eval tools handle it)
Usage:
python code/scripts/pxdesign_guidance/convert_cif_to_pdb.py
"""
import os
import sys
from glob import glob
from Bio.PDB import MMCIFParser, PDBIO, Select
_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
_PROJECT_DIR = os.path.abspath(os.path.join(_SCRIPT_DIR, '../../..'))
class ChainRenamer(Select):
"""Rename multi-char chain IDs to single-char for PDB format."""
def __init__(self, chain_map):
self.chain_map = chain_map
def accept_chain(self, chain):
return 1
def accept_residue(self, residue):
return 1
def accept_atom(self, atom):
return 1
def convert_cif_to_pdb(cif_path, pdb_path):
"""Convert a single CIF file to PDB format."""
parser = MMCIFParser(QUIET=True)
structure = parser.get_structure('s', cif_path)
model = structure[0]
# Build chain ID mapping (A0->A, B0->B, etc.)
chain_map = {}
used_ids = set()
for chain in model.get_chains():
old_id = chain.id
# Use first character
new_id = old_id[0] if old_id else 'A'
# Avoid duplicates
while new_id in used_ids:
new_id = chr(ord(new_id) + 1)
used_ids.add(new_id)
chain_map[old_id] = new_id
# Rename chains and fix non-standard residue names
chains_to_rename = list(model.get_chains())
for chain in chains_to_rename:
old_id = chain.id
new_id = chain_map.get(old_id, old_id)
if old_id != new_id:
chain.id = new_id
# Rename 'xpb' residues to 'GLY' (backbone-only binder residues)
for residue in chain.get_residues():
if residue.resname.strip().lower() == 'xpb':
residue.resname = 'GLY'
# Write PDB
io = PDBIO()
io.set_structure(structure)
io.save(pdb_path)
return True
def convert_directory(src_dir, method_name):
"""Convert all CIF files in a directory tree to PDB."""
cif_files = sorted(glob(os.path.join(src_dir, '**/*.cif'), recursive=True))
cif_files = [f for f in cif_files if 'sample' in os.path.basename(f).lower()]
if not cif_files:
print(f" No CIF files found in {src_dir}")
return 0
# Create converted_pdbs directory
converted_dir = os.path.join(src_dir, 'converted_pdbs')
os.makedirs(converted_dir, exist_ok=True)
n_converted = 0
for cif_path in cif_files:
basename = os.path.basename(cif_path).replace('.cif', '.pdb')
# For TDS/SMC with round subdirs, include round info
rel_path = os.path.relpath(cif_path, src_dir)
parts = rel_path.split(os.sep)
if any(p.startswith('round_') for p in parts):
round_part = [p for p in parts if p.startswith('round_')][0]
basename = f"{round_part}_{basename}"
pdb_path = os.path.join(converted_dir, basename)
try:
convert_cif_to_pdb(cif_path, pdb_path)
n_converted += 1
except Exception as e:
print(f" Failed {cif_path}: {e}")
print(f" Converted {n_converted}/{len(cif_files)} CIF -> PDB in {converted_dir}")
return n_converted
def main():
methods = {
'pxdesign_guided': os.path.join(_PROJECT_DIR, 'results/pxdesign_guided'),
'pxdesign_tds': os.path.join(_PROJECT_DIR, 'results/pxdesign_tds'),
'pxdesign_smc': os.path.join(_PROJECT_DIR, 'results/pxdesign_smc'),
}
# Langevin outputs are already PDB (post-hoc refinement)
total = 0
for name, src_dir in methods.items():
print(f"\n{name}:")
if os.path.exists(src_dir):
total += convert_directory(src_dir, name)
else:
print(f" Directory not found: {src_dir}")
print(f"\nTotal converted: {total}")
if __name__ == '__main__':
main()