| """ |
| Convert PXDesign CIF outputs to PDB format for evaluation pipeline. |
| |
| PXDesign outputs .cif files with: |
| - Chain IDs like A0/B0 (multi-char, not PDB-compatible) |
| - Non-standard residue name 'xpb' for designed binder residues |
| |
| This script converts them to PDB format with: |
| - Single-char chain IDs (A, B) |
| - Preserved residue names (xpb is kept; eval tools handle it) |
| |
| Usage: |
| python code/scripts/pxdesign_guidance/convert_cif_to_pdb.py |
| """ |
| import os |
| import sys |
| from glob import glob |
|
|
| from Bio.PDB import MMCIFParser, PDBIO, Select |
|
|
| _SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) |
| _PROJECT_DIR = os.path.abspath(os.path.join(_SCRIPT_DIR, '../../..')) |
|
|
|
|
| class ChainRenamer(Select): |
| """Rename multi-char chain IDs to single-char for PDB format.""" |
| def __init__(self, chain_map): |
| self.chain_map = chain_map |
|
|
| def accept_chain(self, chain): |
| return 1 |
|
|
| def accept_residue(self, residue): |
| return 1 |
|
|
| def accept_atom(self, atom): |
| return 1 |
|
|
|
|
| def convert_cif_to_pdb(cif_path, pdb_path): |
| """Convert a single CIF file to PDB format.""" |
| parser = MMCIFParser(QUIET=True) |
| structure = parser.get_structure('s', cif_path) |
| model = structure[0] |
|
|
| |
| chain_map = {} |
| used_ids = set() |
| for chain in model.get_chains(): |
| old_id = chain.id |
| |
| new_id = old_id[0] if old_id else 'A' |
| |
| while new_id in used_ids: |
| new_id = chr(ord(new_id) + 1) |
| used_ids.add(new_id) |
| chain_map[old_id] = new_id |
|
|
| |
| chains_to_rename = list(model.get_chains()) |
| for chain in chains_to_rename: |
| old_id = chain.id |
| new_id = chain_map.get(old_id, old_id) |
| if old_id != new_id: |
| chain.id = new_id |
| |
| for residue in chain.get_residues(): |
| if residue.resname.strip().lower() == 'xpb': |
| residue.resname = 'GLY' |
|
|
| |
| io = PDBIO() |
| io.set_structure(structure) |
| io.save(pdb_path) |
| return True |
|
|
|
|
| def convert_directory(src_dir, method_name): |
| """Convert all CIF files in a directory tree to PDB.""" |
| cif_files = sorted(glob(os.path.join(src_dir, '**/*.cif'), recursive=True)) |
| cif_files = [f for f in cif_files if 'sample' in os.path.basename(f).lower()] |
|
|
| if not cif_files: |
| print(f" No CIF files found in {src_dir}") |
| return 0 |
|
|
| |
| converted_dir = os.path.join(src_dir, 'converted_pdbs') |
| os.makedirs(converted_dir, exist_ok=True) |
|
|
| n_converted = 0 |
| for cif_path in cif_files: |
| basename = os.path.basename(cif_path).replace('.cif', '.pdb') |
| |
| rel_path = os.path.relpath(cif_path, src_dir) |
| parts = rel_path.split(os.sep) |
| if any(p.startswith('round_') for p in parts): |
| round_part = [p for p in parts if p.startswith('round_')][0] |
| basename = f"{round_part}_{basename}" |
|
|
| pdb_path = os.path.join(converted_dir, basename) |
| try: |
| convert_cif_to_pdb(cif_path, pdb_path) |
| n_converted += 1 |
| except Exception as e: |
| print(f" Failed {cif_path}: {e}") |
|
|
| print(f" Converted {n_converted}/{len(cif_files)} CIF -> PDB in {converted_dir}") |
| return n_converted |
|
|
|
|
| def main(): |
| methods = { |
| 'pxdesign_guided': os.path.join(_PROJECT_DIR, 'results/pxdesign_guided'), |
| 'pxdesign_tds': os.path.join(_PROJECT_DIR, 'results/pxdesign_tds'), |
| 'pxdesign_smc': os.path.join(_PROJECT_DIR, 'results/pxdesign_smc'), |
| } |
| |
|
|
| total = 0 |
| for name, src_dir in methods.items(): |
| print(f"\n{name}:") |
| if os.path.exists(src_dir): |
| total += convert_directory(src_dir, name) |
| else: |
| print(f" Directory not found: {src_dir}") |
|
|
| print(f"\nTotal converted: {total}") |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|