Spaces:
Sleeping
Sleeping
| """ | |
| RMSD Calculation Utilities for RNA Structure Comparison | |
| Fixed version with explicit purine-pyrimidine atom mapping | |
| """ | |
| import numpy as np | |
| def parse_residue_atoms(fname): | |
| """ | |
| Parse PDB file and organize atoms by residue. | |
| Args: | |
| fname: Path to PDB file | |
| Returns: | |
| List of residues, where each residue is a dict with: | |
| - 'resnum': residue number | |
| - 'resname': residue name (A, C, G, U) | |
| - 'atoms': dict of {atom_name: [x, y, z]} | |
| """ | |
| with open(fname) as f: | |
| content = f.readlines() | |
| residues = {} | |
| for line in content: | |
| record = line[0:6].strip() | |
| if record == 'ATOM' or record == 'HETATM' or record == 'HETAT': | |
| atomname = line[12:16].strip() | |
| resname = line[17:20].strip() # residue name (A, C, G, U) | |
| resnum = int(line[22:26].strip()) # residue number | |
| x = float(line[30:38].strip()) | |
| y = float(line[38:46].strip()) | |
| z = float(line[46:54].strip()) | |
| # Initialize residue if not seen before | |
| if resnum not in residues: | |
| residues[resnum] = { | |
| 'resnum': resnum, | |
| 'resname': resname, | |
| 'atoms': {} | |
| } | |
| residues[resnum]['atoms'][atomname] = [x, y, z] | |
| # Convert to sorted list by residue number | |
| sorted_residues = [residues[k] for k in sorted(residues.keys())] | |
| return sorted_residues | |
| def get_backbone_sugar_coords_from_residue(residue): | |
| """ | |
| Extract backbone and sugar atom coordinates from a residue dict. | |
| Args: | |
| residue: Dict with 'atoms' key containing atom coordinates | |
| Returns: | |
| List of [x, y, z] coordinates in consistent order | |
| """ | |
| # Define the order of backbone and sugar atoms | |
| backbone_sugar_atoms = ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"] | |
| coords = [] | |
| atoms = residue['atoms'] | |
| for atom_name in backbone_sugar_atoms: | |
| if atom_name in atoms: | |
| coords.append(atoms[atom_name]) | |
| return coords | |
| def get_base_coords_from_residue(residue): | |
| """ | |
| Extract the three key base atom coordinates from a residue. | |
| Returns list of [x, y, z] coordinates in the correct order: | |
| - For purines (A, G): N9, C8, C4 | |
| - For pyrimidines (C, U): N1, C2, C6 | |
| These are ordered to enable proper purine-pyrimidine mapping: | |
| N9 <-> N1, C8 <-> C2, C4 <-> C6 | |
| Args: | |
| residue: Dict with 'resname' and 'atoms' keys | |
| Returns: | |
| List of [x, y, z] coordinates | |
| """ | |
| resname = residue['resname'] | |
| atoms = residue['atoms'] | |
| coords = [] | |
| if resname in ['A', 'G']: # Purines | |
| base_atoms = ['N9', 'C8', 'C4'] | |
| elif resname in ['C', 'U']: # Pyrimidines | |
| base_atoms = ['N1', 'C2', 'C6'] | |
| else: | |
| # Unknown residue type | |
| return coords | |
| for atom_name in base_atoms: | |
| if atom_name in atoms: | |
| coords.append(atoms[atom_name]) | |
| return coords | |
| def get_backbone_sugar_and_selectbase_coords_fixed(fname): | |
| """ | |
| Extract backbone, sugar, and select base atom coordinates. | |
| Ensures proper ordering for purine-pyrimidine mapping. | |
| For each residue, extracts: | |
| 1. All backbone and sugar atoms (in consistent order) | |
| 2. Three base atoms: | |
| - Purines (A, G): N9, C8, C4 | |
| - Pyrimidines (C, U): N1, C2, C6 | |
| This ordering ensures that when comparing structures with different sequences, | |
| the atoms are correctly mapped (N9<->N1, C8<->C2, C4<->C6). | |
| Args: | |
| fname: Path to PDB file | |
| Returns: | |
| Numpy array of coordinates | |
| """ | |
| residues = parse_residue_atoms(fname) | |
| all_coords = [] | |
| for residue in residues: | |
| # Get backbone and sugar coordinates | |
| backbone_coords = get_backbone_sugar_coords_from_residue(residue) | |
| all_coords.extend(backbone_coords) | |
| # Get base coordinates | |
| base_coords = get_base_coords_from_residue(residue) | |
| all_coords.extend(base_coords) | |
| return np.asarray(all_coords) | |
| def calculate_COM(coords): | |
| """ | |
| Calculate center of mass (geometric center) of coordinates. | |
| Args: | |
| coords: Numpy array of shape (N, 3) | |
| Returns: | |
| Numpy array of shape (3,) representing the center of mass | |
| """ | |
| L = coords.shape[0] | |
| COM = np.sum(coords, axis=0) / float(L) | |
| return COM | |
| def calculate_rotation_rmsd(coords1, coords2, COM1, COM2): | |
| """ | |
| Calculate rotation matrix and RMSD using Kabsch algorithm. | |
| Args: | |
| coords1: Coordinates of structure 1 (N, 3) | |
| coords2: Coordinates of structure 2 (N, 3) | |
| COM1: Center of mass of structure 1 (3,) | |
| COM2: Center of mass of structure 2 (3,) | |
| Returns: | |
| U: Rotation matrix (3, 3) | |
| RMSD: Root mean square deviation (float) | |
| """ | |
| sel1 = coords1 - COM1 | |
| sel2 = coords2 - COM2 | |
| # Check for consistency | |
| if len(sel1) != len(sel2): | |
| return None, None | |
| L = len(sel1) | |
| assert L > 0 | |
| # Initial residual, see Kabsch. | |
| R0 = np.sum(np.sum(sel1 * sel1, axis=0), axis=0) + np.sum(np.sum(sel2 * sel2, axis=0), axis=0) | |
| # Calculate the components of the rotation matrix (V,W) | |
| # S is used to calculate the error (RMSD) | |
| V, S, W = np.linalg.svd(np.dot(sel2.T, sel1)) | |
| # Calculate if the product of the determinants is + or - | |
| # if negative reflect the rotation matrix components prior | |
| # determining the rotation matrix (U) | |
| reflect = float(str(float(np.linalg.det(V) * np.linalg.det(W)))) | |
| if reflect == -1.0: | |
| S[-1] = -S[-1] | |
| V[:, -1] = -V[:, -1] | |
| U = np.dot(V, W) | |
| # Calculate the RMSD using sigma from the SVD calculation above | |
| RMSD = R0 - (2.0 * sum(S)) | |
| RMSD = np.sqrt(abs(RMSD / L)) | |
| return U, RMSD | |
| def translate_rotate_coords(coords, COM, U=None): | |
| """ | |
| Translate and optionally rotate coordinates. | |
| Args: | |
| coords: Coordinates to transform (N, 3) | |
| COM: Center of mass to translate by (3,) | |
| U: Rotation matrix (3, 3), optional | |
| Returns: | |
| Transformed coordinates (N, 3) | |
| """ | |
| # Translate only | |
| if U is None: | |
| return coords - COM | |
| # Translate and rotate | |
| return np.dot((coords - COM), U) | |
| def get_all_atom_coords(fname): | |
| """ | |
| Get all atom coordinates from a PDB file. | |
| Args: | |
| fname: Path to PDB file | |
| Returns: | |
| Numpy array of coordinates (N, 3) | |
| """ | |
| with open(fname) as f: | |
| content = f.readlines() | |
| coords = [] | |
| for line in content: | |
| record = line[0:6].strip() | |
| if record == 'ATOM' or record == 'HETATM' or record == 'HETAT': | |
| x = float(line[30:38].strip()) | |
| y = float(line[38:46].strip()) | |
| z = float(line[46:54].strip()) | |
| coords.append([x, y, z]) | |
| return np.asarray(coords) | |
| def apply_transformation_to_pdb(fname, U, COM, output_fname): | |
| """ | |
| Apply rotation and translation to a PDB file and save result. | |
| Args: | |
| fname: Input PDB file path | |
| U: Rotation matrix (3, 3) | |
| COM: Center of mass to translate from (3,) | |
| output_fname: Output PDB file path | |
| """ | |
| with open(fname) as f: | |
| lines = f.readlines() | |
| with open(output_fname, 'w') as f: | |
| for line in lines: | |
| record = line[0:6].strip() | |
| if record == 'ATOM' or record == 'HETATM' or record == 'HETAT': | |
| # Extract coordinates | |
| x = float(line[30:38].strip()) | |
| y = float(line[38:46].strip()) | |
| z = float(line[46:54].strip()) | |
| # Transform | |
| coord = np.array([x, y, z]) | |
| new_coord = np.dot((coord - COM), U) | |
| # Write transformed line | |
| new_line = ( | |
| line[:30] + | |
| f"{new_coord[0]:8.3f}" + | |
| f"{new_coord[1]:8.3f}" + | |
| f"{new_coord[2]:8.3f}" + | |
| line[54:] | |
| ) | |
| f.write(new_line) | |
| else: | |
| f.write(line) | |