File size: 4,146 Bytes
ad9572d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""
Convert PXDesign CIF outputs to PDB format for evaluation pipeline.

PXDesign outputs .cif files with:
- Chain IDs like A0/B0 (multi-char, not PDB-compatible)
- Non-standard residue name 'xpb' for designed binder residues

This script converts them to PDB format with:
- Single-char chain IDs (A, B)
- Preserved residue names (xpb is kept; eval tools handle it)

Usage:
    python code/scripts/pxdesign_guidance/convert_cif_to_pdb.py
"""
import os
import sys
from glob import glob

from Bio.PDB import MMCIFParser, PDBIO, Select

_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
_PROJECT_DIR = os.path.abspath(os.path.join(_SCRIPT_DIR, '../../..'))


class ChainRenamer(Select):
    """Rename multi-char chain IDs to single-char for PDB format."""
    def __init__(self, chain_map):
        self.chain_map = chain_map

    def accept_chain(self, chain):
        return 1

    def accept_residue(self, residue):
        return 1

    def accept_atom(self, atom):
        return 1


def convert_cif_to_pdb(cif_path, pdb_path):
    """Convert a single CIF file to PDB format."""
    parser = MMCIFParser(QUIET=True)
    structure = parser.get_structure('s', cif_path)
    model = structure[0]

    # Build chain ID mapping (A0->A, B0->B, etc.)
    chain_map = {}
    used_ids = set()
    for chain in model.get_chains():
        old_id = chain.id
        # Use first character
        new_id = old_id[0] if old_id else 'A'
        # Avoid duplicates
        while new_id in used_ids:
            new_id = chr(ord(new_id) + 1)
        used_ids.add(new_id)
        chain_map[old_id] = new_id

    # Rename chains and fix non-standard residue names
    chains_to_rename = list(model.get_chains())
    for chain in chains_to_rename:
        old_id = chain.id
        new_id = chain_map.get(old_id, old_id)
        if old_id != new_id:
            chain.id = new_id
        # Rename 'xpb' residues to 'GLY' (backbone-only binder residues)
        for residue in chain.get_residues():
            if residue.resname.strip().lower() == 'xpb':
                residue.resname = 'GLY'

    # Write PDB
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_path)
    return True


def convert_directory(src_dir, method_name):
    """Convert all CIF files in a directory tree to PDB."""
    cif_files = sorted(glob(os.path.join(src_dir, '**/*.cif'), recursive=True))
    cif_files = [f for f in cif_files if 'sample' in os.path.basename(f).lower()]

    if not cif_files:
        print(f"  No CIF files found in {src_dir}")
        return 0

    # Create converted_pdbs directory
    converted_dir = os.path.join(src_dir, 'converted_pdbs')
    os.makedirs(converted_dir, exist_ok=True)

    n_converted = 0
    for cif_path in cif_files:
        basename = os.path.basename(cif_path).replace('.cif', '.pdb')
        # For TDS/SMC with round subdirs, include round info
        rel_path = os.path.relpath(cif_path, src_dir)
        parts = rel_path.split(os.sep)
        if any(p.startswith('round_') for p in parts):
            round_part = [p for p in parts if p.startswith('round_')][0]
            basename = f"{round_part}_{basename}"

        pdb_path = os.path.join(converted_dir, basename)
        try:
            convert_cif_to_pdb(cif_path, pdb_path)
            n_converted += 1
        except Exception as e:
            print(f"  Failed {cif_path}: {e}")

    print(f"  Converted {n_converted}/{len(cif_files)} CIF -> PDB in {converted_dir}")
    return n_converted


def main():
    methods = {
        'pxdesign_guided': os.path.join(_PROJECT_DIR, 'results/pxdesign_guided'),
        'pxdesign_tds': os.path.join(_PROJECT_DIR, 'results/pxdesign_tds'),
        'pxdesign_smc': os.path.join(_PROJECT_DIR, 'results/pxdesign_smc'),
    }
    # Langevin outputs are already PDB (post-hoc refinement)

    total = 0
    for name, src_dir in methods.items():
        print(f"\n{name}:")
        if os.path.exists(src_dir):
            total += convert_directory(src_dir, name)
        else:
            print(f"  Directory not found: {src_dir}")

    print(f"\nTotal converted: {total}")


if __name__ == '__main__':
    main()