""" Convert PXDesign CIF outputs to PDB format for evaluation pipeline. PXDesign outputs .cif files with: - Chain IDs like A0/B0 (multi-char, not PDB-compatible) - Non-standard residue name 'xpb' for designed binder residues This script converts them to PDB format with: - Single-char chain IDs (A, B) - Preserved residue names (xpb is kept; eval tools handle it) Usage: python code/scripts/pxdesign_guidance/convert_cif_to_pdb.py """ import os import sys from glob import glob from Bio.PDB import MMCIFParser, PDBIO, Select _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) _PROJECT_DIR = os.path.abspath(os.path.join(_SCRIPT_DIR, '../../..')) class ChainRenamer(Select): """Rename multi-char chain IDs to single-char for PDB format.""" def __init__(self, chain_map): self.chain_map = chain_map def accept_chain(self, chain): return 1 def accept_residue(self, residue): return 1 def accept_atom(self, atom): return 1 def convert_cif_to_pdb(cif_path, pdb_path): """Convert a single CIF file to PDB format.""" parser = MMCIFParser(QUIET=True) structure = parser.get_structure('s', cif_path) model = structure[0] # Build chain ID mapping (A0->A, B0->B, etc.) chain_map = {} used_ids = set() for chain in model.get_chains(): old_id = chain.id # Use first character new_id = old_id[0] if old_id else 'A' # Avoid duplicates while new_id in used_ids: new_id = chr(ord(new_id) + 1) used_ids.add(new_id) chain_map[old_id] = new_id # Rename chains and fix non-standard residue names chains_to_rename = list(model.get_chains()) for chain in chains_to_rename: old_id = chain.id new_id = chain_map.get(old_id, old_id) if old_id != new_id: chain.id = new_id # Rename 'xpb' residues to 'GLY' (backbone-only binder residues) for residue in chain.get_residues(): if residue.resname.strip().lower() == 'xpb': residue.resname = 'GLY' # Write PDB io = PDBIO() io.set_structure(structure) io.save(pdb_path) return True def convert_directory(src_dir, method_name): """Convert all CIF files in a directory tree to PDB.""" cif_files = sorted(glob(os.path.join(src_dir, '**/*.cif'), recursive=True)) cif_files = [f for f in cif_files if 'sample' in os.path.basename(f).lower()] if not cif_files: print(f" No CIF files found in {src_dir}") return 0 # Create converted_pdbs directory converted_dir = os.path.join(src_dir, 'converted_pdbs') os.makedirs(converted_dir, exist_ok=True) n_converted = 0 for cif_path in cif_files: basename = os.path.basename(cif_path).replace('.cif', '.pdb') # For TDS/SMC with round subdirs, include round info rel_path = os.path.relpath(cif_path, src_dir) parts = rel_path.split(os.sep) if any(p.startswith('round_') for p in parts): round_part = [p for p in parts if p.startswith('round_')][0] basename = f"{round_part}_{basename}" pdb_path = os.path.join(converted_dir, basename) try: convert_cif_to_pdb(cif_path, pdb_path) n_converted += 1 except Exception as e: print(f" Failed {cif_path}: {e}") print(f" Converted {n_converted}/{len(cif_files)} CIF -> PDB in {converted_dir}") return n_converted def main(): methods = { 'pxdesign_guided': os.path.join(_PROJECT_DIR, 'results/pxdesign_guided'), 'pxdesign_tds': os.path.join(_PROJECT_DIR, 'results/pxdesign_tds'), 'pxdesign_smc': os.path.join(_PROJECT_DIR, 'results/pxdesign_smc'), } # Langevin outputs are already PDB (post-hoc refinement) total = 0 for name, src_dir in methods.items(): print(f"\n{name}:") if os.path.exists(src_dir): total += convert_directory(src_dir, name) else: print(f" Directory not found: {src_dir}") print(f"\nTotal converted: {total}") if __name__ == '__main__': main()