File size: 8,369 Bytes
a6c9f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
"""
RMSD Calculation Utilities for RNA Structure Comparison
Fixed version with explicit purine-pyrimidine atom mapping
"""

import numpy as np


def parse_residue_atoms(fname):
    """
    Parse PDB file and organize atoms by residue.
    
    Args:
        fname: Path to PDB file
    
    Returns:
        List of residues, where each residue is a dict with:
        - 'resnum': residue number
        - 'resname': residue name (A, C, G, U)
        - 'atoms': dict of {atom_name: [x, y, z]}
    """
    with open(fname) as f:
        content = f.readlines()
    
    residues = {}
    
    for line in content:
        record = line[0:6].strip()
        if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
            atomname = line[12:16].strip()
            resname = line[17:20].strip()  # residue name (A, C, G, U)
            resnum = int(line[22:26].strip())  # residue number
            
            x = float(line[30:38].strip())
            y = float(line[38:46].strip())
            z = float(line[46:54].strip())
            
            # Initialize residue if not seen before
            if resnum not in residues:
                residues[resnum] = {
                    'resnum': resnum,
                    'resname': resname,
                    'atoms': {}
                }
            
            residues[resnum]['atoms'][atomname] = [x, y, z]
    
    # Convert to sorted list by residue number
    sorted_residues = [residues[k] for k in sorted(residues.keys())]
    
    return sorted_residues


def get_backbone_sugar_coords_from_residue(residue):
    """
    Extract backbone and sugar atom coordinates from a residue dict.
    
    Args:
        residue: Dict with 'atoms' key containing atom coordinates
    
    Returns:
        List of [x, y, z] coordinates in consistent order
    """
    # Define the order of backbone and sugar atoms
    backbone_sugar_atoms = ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"]
    
    coords = []
    atoms = residue['atoms']
    
    for atom_name in backbone_sugar_atoms:
        if atom_name in atoms:
            coords.append(atoms[atom_name])
    
    return coords


def get_base_coords_from_residue(residue):
    """
    Extract the three key base atom coordinates from a residue.
    
    Returns list of [x, y, z] coordinates in the correct order:
    - For purines (A, G): N9, C8, C4
    - For pyrimidines (C, U): N1, C2, C6
    
    These are ordered to enable proper purine-pyrimidine mapping:
    N9 <-> N1, C8 <-> C2, C4 <-> C6
    
    Args:
        residue: Dict with 'resname' and 'atoms' keys
    
    Returns:
        List of [x, y, z] coordinates
    """
    resname = residue['resname']
    atoms = residue['atoms']
    coords = []
    
    if resname in ['A', 'G']:  # Purines
        base_atoms = ['N9', 'C8', 'C4']
    elif resname in ['C', 'U']:  # Pyrimidines
        base_atoms = ['N1', 'C2', 'C6']
    else:
        # Unknown residue type
        return coords
    
    for atom_name in base_atoms:
        if atom_name in atoms:
            coords.append(atoms[atom_name])
    
    return coords


def get_backbone_sugar_and_selectbase_coords_fixed(fname):
    """
    Extract backbone, sugar, and select base atom coordinates.
    Ensures proper ordering for purine-pyrimidine mapping.
    
    For each residue, extracts:
    1. All backbone and sugar atoms (in consistent order)
    2. Three base atoms:
       - Purines (A, G): N9, C8, C4
       - Pyrimidines (C, U): N1, C2, C6
    
    This ordering ensures that when comparing structures with different sequences,
    the atoms are correctly mapped (N9<->N1, C8<->C2, C4<->C6).
    
    Args:
        fname: Path to PDB file
    
    Returns:
        Numpy array of coordinates
    """
    residues = parse_residue_atoms(fname)
    
    all_coords = []
    
    for residue in residues:
        # Get backbone and sugar coordinates
        backbone_coords = get_backbone_sugar_coords_from_residue(residue)
        all_coords.extend(backbone_coords)
        
        # Get base coordinates
        base_coords = get_base_coords_from_residue(residue)
        all_coords.extend(base_coords)
    
    return np.asarray(all_coords)


def calculate_COM(coords):
    """
    Calculate center of mass (geometric center) of coordinates.
    
    Args:
        coords: Numpy array of shape (N, 3)
    
    Returns:
        Numpy array of shape (3,) representing the center of mass
    """
    L = coords.shape[0]
    COM = np.sum(coords, axis=0) / float(L)
    return COM


def calculate_rotation_rmsd(coords1, coords2, COM1, COM2):
    """
    Calculate rotation matrix and RMSD using Kabsch algorithm.
    
    Args:
        coords1: Coordinates of structure 1 (N, 3)
        coords2: Coordinates of structure 2 (N, 3)
        COM1: Center of mass of structure 1 (3,)
        COM2: Center of mass of structure 2 (3,)
    
    Returns:
        U: Rotation matrix (3, 3)
        RMSD: Root mean square deviation (float)
    """
    sel1 = coords1 - COM1
    sel2 = coords2 - COM2
    
    # Check for consistency
    if len(sel1) != len(sel2):
        return None, None
    
    L = len(sel1)
    assert L > 0
    
    # Initial residual, see Kabsch.
    R0 = np.sum(np.sum(sel1 * sel1, axis=0), axis=0) + np.sum(np.sum(sel2 * sel2, axis=0), axis=0)
    
    # Calculate the components of the rotation matrix (V,W)
    # S is used to calculate the error (RMSD)
    V, S, W = np.linalg.svd(np.dot(sel2.T, sel1))
    
    # Calculate if the product of the determinants is + or -
    # if negative reflect the rotation matrix components prior
    # determining the rotation matrix (U)
    reflect = float(str(float(np.linalg.det(V) * np.linalg.det(W))))
    
    if reflect == -1.0:
        S[-1] = -S[-1]
        V[:, -1] = -V[:, -1]
    
    U = np.dot(V, W)
    
    # Calculate the RMSD using sigma from the SVD calculation above
    RMSD = R0 - (2.0 * sum(S))
    RMSD = np.sqrt(abs(RMSD / L))
    
    return U, RMSD


def translate_rotate_coords(coords, COM, U=None):
    """
    Translate and optionally rotate coordinates.
    
    Args:
        coords: Coordinates to transform (N, 3)
        COM: Center of mass to translate by (3,)
        U: Rotation matrix (3, 3), optional
    
    Returns:
        Transformed coordinates (N, 3)
    """
    # Translate only
    if U is None:
        return coords - COM
    
    # Translate and rotate
    return np.dot((coords - COM), U)


def get_all_atom_coords(fname):
    """
    Get all atom coordinates from a PDB file.
    
    Args:
        fname: Path to PDB file
    
    Returns:
        Numpy array of coordinates (N, 3)
    """
    with open(fname) as f:
        content = f.readlines()
    
    coords = []
    for line in content:
        record = line[0:6].strip()
        if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
            x = float(line[30:38].strip())
            y = float(line[38:46].strip())
            z = float(line[46:54].strip())
            coords.append([x, y, z])
    
    return np.asarray(coords)


def apply_transformation_to_pdb(fname, U, COM, output_fname):
    """
    Apply rotation and translation to a PDB file and save result.
    
    Args:
        fname: Input PDB file path
        U: Rotation matrix (3, 3)
        COM: Center of mass to translate from (3,)
        output_fname: Output PDB file path
    """
    with open(fname) as f:
        lines = f.readlines()
    
    with open(output_fname, 'w') as f:
        for line in lines:
            record = line[0:6].strip()
            if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
                # Extract coordinates
                x = float(line[30:38].strip())
                y = float(line[38:46].strip())
                z = float(line[46:54].strip())
                
                # Transform
                coord = np.array([x, y, z])
                new_coord = np.dot((coord - COM), U)
                
                # Write transformed line
                new_line = (
                    line[:30] +
                    f"{new_coord[0]:8.3f}" +
                    f"{new_coord[1]:8.3f}" +
                    f"{new_coord[2]:8.3f}" +
                    line[54:]
                )
                f.write(new_line)
            else:
                f.write(line)