Spaces:

HouBioLab
/

MotifAlign

Sleeping

File size: 8,369 Bytes

a6c9f2a

"""
RMSD Calculation Utilities for RNA Structure Comparison
Fixed version with explicit purine-pyrimidine atom mapping
"""

import numpy as np


def parse_residue_atoms(fname):
    """
    Parse PDB file and organize atoms by residue.
    
    Args:
        fname: Path to PDB file
    
    Returns:
        List of residues, where each residue is a dict with:
        - 'resnum': residue number
        - 'resname': residue name (A, C, G, U)
        - 'atoms': dict of {atom_name: [x, y, z]}
    """
    with open(fname) as f:
        content = f.readlines()
    
    residues = {}
    
    for line in content:
        record = line[0:6].strip()
        if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
            atomname = line[12:16].strip()
            resname = line[17:20].strip()  # residue name (A, C, G, U)
            resnum = int(line[22:26].strip())  # residue number
            
            x = float(line[30:38].strip())
            y = float(line[38:46].strip())
            z = float(line[46:54].strip())
            
            # Initialize residue if not seen before
            if resnum not in residues:
                residues[resnum] = {
                    'resnum': resnum,
                    'resname': resname,
                    'atoms': {}
                }
            
            residues[resnum]['atoms'][atomname] = [x, y, z]
    
    # Convert to sorted list by residue number
    sorted_residues = [residues[k] for k in sorted(residues.keys())]
    
    return sorted_residues


def get_backbone_sugar_coords_from_residue(residue):
    """
    Extract backbone and sugar atom coordinates from a residue dict.
    
    Args:
        residue: Dict with 'atoms' key containing atom coordinates
    
    Returns:
        List of [x, y, z] coordinates in consistent order
    """
    # Define the order of backbone and sugar atoms
    backbone_sugar_atoms = ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"]
    
    coords = []
    atoms = residue['atoms']
    
    for atom_name in backbone_sugar_atoms:
        if atom_name in atoms:
            coords.append(atoms[atom_name])
    
    return coords


def get_base_coords_from_residue(residue):
    """
    Extract the three key base atom coordinates from a residue.
    
    Returns list of [x, y, z] coordinates in the correct order:
    - For purines (A, G): N9, C8, C4
    - For pyrimidines (C, U): N1, C2, C6
    
    These are ordered to enable proper purine-pyrimidine mapping:
    N9 <-> N1, C8 <-> C2, C4 <-> C6
    
    Args:
        residue: Dict with 'resname' and 'atoms' keys
    
    Returns:
        List of [x, y, z] coordinates
    """
    resname = residue['resname']
    atoms = residue['atoms']
    coords = []
    
    if resname in ['A', 'G']:  # Purines
        base_atoms = ['N9', 'C8', 'C4']
    elif resname in ['C', 'U']:  # Pyrimidines
        base_atoms = ['N1', 'C2', 'C6']
    else:
        # Unknown residue type
        return coords
    
    for atom_name in base_atoms:
        if atom_name in atoms:
            coords.append(atoms[atom_name])
    
    return coords


def get_backbone_sugar_and_selectbase_coords_fixed(fname):
    """
    Extract backbone, sugar, and select base atom coordinates.
    Ensures proper ordering for purine-pyrimidine mapping.
    
    For each residue, extracts:
    1. All backbone and sugar atoms (in consistent order)
    2. Three base atoms:
       - Purines (A, G): N9, C8, C4
       - Pyrimidines (C, U): N1, C2, C6
    
    This ordering ensures that when comparing structures with different sequences,
    the atoms are correctly mapped (N9<->N1, C8<->C2, C4<->C6).
    
    Args:
        fname: Path to PDB file
    
    Returns:
        Numpy array of coordinates
    """
    residues = parse_residue_atoms(fname)
    
    all_coords = []
    
    for residue in residues:
        # Get backbone and sugar coordinates
        backbone_coords = get_backbone_sugar_coords_from_residue(residue)
        all_coords.extend(backbone_coords)
        
        # Get base coordinates
        base_coords = get_base_coords_from_residue(residue)
        all_coords.extend(base_coords)
    
    return np.asarray(all_coords)


def calculate_COM(coords):
    """
    Calculate center of mass (geometric center) of coordinates.
    
    Args:
        coords: Numpy array of shape (N, 3)
    
    Returns:
        Numpy array of shape (3,) representing the center of mass
    """
    L = coords.shape[0]
    COM = np.sum(coords, axis=0) / float(L)
    return COM


def calculate_rotation_rmsd(coords1, coords2, COM1, COM2):
    """
    Calculate rotation matrix and RMSD using Kabsch algorithm.
    
    Args:
        coords1: Coordinates of structure 1 (N, 3)
        coords2: Coordinates of structure 2 (N, 3)
        COM1: Center of mass of structure 1 (3,)
        COM2: Center of mass of structure 2 (3,)
    
    Returns:
        U: Rotation matrix (3, 3)
        RMSD: Root mean square deviation (float)
    """
    sel1 = coords1 - COM1
    sel2 = coords2 - COM2
    
    # Check for consistency
    if len(sel1) != len(sel2):
        return None, None
    
    L = len(sel1)
    assert L > 0
    
    # Initial residual, see Kabsch.
    R0 = np.sum(np.sum(sel1 * sel1, axis=0), axis=0) + np.sum(np.sum(sel2 * sel2, axis=0), axis=0)
    
    # Calculate the components of the rotation matrix (V,W)
    # S is used to calculate the error (RMSD)
    V, S, W = np.linalg.svd(np.dot(sel2.T, sel1))
    
    # Calculate if the product of the determinants is + or -
    # if negative reflect the rotation matrix components prior
    # determining the rotation matrix (U)
    reflect = float(str(float(np.linalg.det(V) * np.linalg.det(W))))
    
    if reflect == -1.0:
        S[-1] = -S[-1]
        V[:, -1] = -V[:, -1]
    
    U = np.dot(V, W)
    
    # Calculate the RMSD using sigma from the SVD calculation above
    RMSD = R0 - (2.0 * sum(S))
    RMSD = np.sqrt(abs(RMSD / L))
    
    return U, RMSD


def translate_rotate_coords(coords, COM, U=None):
    """
    Translate and optionally rotate coordinates.
    
    Args:
        coords: Coordinates to transform (N, 3)
        COM: Center of mass to translate by (3,)
        U: Rotation matrix (3, 3), optional
    
    Returns:
        Transformed coordinates (N, 3)
    """
    # Translate only
    if U is None:
        return coords - COM
    
    # Translate and rotate
    return np.dot((coords - COM), U)


def get_all_atom_coords(fname):
    """
    Get all atom coordinates from a PDB file.
    
    Args:
        fname: Path to PDB file
    
    Returns:
        Numpy array of coordinates (N, 3)
    """
    with open(fname) as f:
        content = f.readlines()
    
    coords = []
    for line in content:
        record = line[0:6].strip()
        if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
            x = float(line[30:38].strip())
            y = float(line[38:46].strip())
            z = float(line[46:54].strip())
            coords.append([x, y, z])
    
    return np.asarray(coords)


def apply_transformation_to_pdb(fname, U, COM, output_fname):
    """
    Apply rotation and translation to a PDB file and save result.
    
    Args:
        fname: Input PDB file path
        U: Rotation matrix (3, 3)
        COM: Center of mass to translate from (3,)
        output_fname: Output PDB file path
    """
    with open(fname) as f:
        lines = f.readlines()
    
    with open(output_fname, 'w') as f:
        for line in lines:
            record = line[0:6].strip()
            if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
                # Extract coordinates
                x = float(line[30:38].strip())
                y = float(line[38:46].strip())
                z = float(line[46:54].strip())
                
                # Transform
                coord = np.array([x, y, z])
                new_coord = np.dot((coord - COM), U)
                
                # Write transformed line
                new_line = (
                    line[:30] +
                    f"{new_coord[0]:8.3f}" +
                    f"{new_coord[1]:8.3f}" +
                    f"{new_coord[2]:8.3f}" +
                    line[54:]
                )
                f.write(new_line)
            else:
                f.write(line)