Spaces:

42Cummer
/

BroteinShake

Sleeping

File size: 7,772 Bytes

import os
from Bio.PDB import PDBParser, Superimposer, PDBIO

def get_core_rmsd(reference_pdb, design_pdb, plddt_threshold=70.0):
    """
    Calculate RMSD using only high-confidence residues (pLDDT > threshold).
    This focuses on the core scaffold alignment, ignoring low-confidence regions.
    
    Handles both normalized (0-1) and raw pLDDT (0-100) values in B-factor column.
    """
    parser = PDBParser(QUIET=True)
    ref_struct = parser.get_structure("ref", reference_pdb)
    des_struct = parser.get_structure("des", design_pdb)

    ref_atoms = []
    des_atoms = []
    
    # Detect if B-factors are normalized (0-1) or raw pLDDT (0-100)
    sample_bfactor = None
    for res in des_struct.get_residues():
        if 'CA' in res:
            sample_bfactor = res['CA'].get_bfactor()
            break
    
    # If max B-factor is < 1.0, assume normalized (0-1 scale)
    # Otherwise assume raw pLDDT (0-100 scale)
    is_normalized = sample_bfactor is not None and sample_bfactor < 1.0
    
    # Adjust threshold based on scale
    if is_normalized:
        # Normalized: 70 pLDDT = 0.70
        actual_threshold = plddt_threshold / 100.0
    else:
        # Raw pLDDT: use threshold as-is
        actual_threshold = plddt_threshold

    # Iterate through residues and filter by B-factor (pLDDT is stored there)
    for ref_res, des_res in zip(ref_struct.get_residues(), des_struct.get_residues()):
        # ESMFold/AlphaFold store pLDDT in the B-factor column
        # We only take Alpha Carbons (CA) for a standard backbone alignment
        if 'CA' in des_res and 'CA' in ref_res:
            plddt = des_res['CA'].get_bfactor()
            
            if plddt >= actual_threshold:
                ref_atoms.append(ref_res['CA'])
                des_atoms.append(des_res['CA'])

    if len(ref_atoms) == 0:
        # Fallback to all residues if no high-confidence ones found
        ref_atoms = [a for a in ref_struct.get_atoms() if a.get_name() == 'CA']
        des_atoms = [a for a in des_struct.get_atoms() if a.get_name() == 'CA']
        min_len = min(len(ref_atoms), len(des_atoms))
        ref_atoms = ref_atoms[:min_len]
        des_atoms = des_atoms[:min_len]

    # Superimpose and calculate RMSD
    super_imposer = Superimposer()
    super_imposer.set_atoms(ref_atoms, des_atoms)
    super_imposer.apply(des_struct.get_atoms())

    return super_imposer.rms, len(ref_atoms)

def polish_design(target_pdb_id, uploaded_file_path, plddt_threshold=70.0):
    """
    Performs high-precision structural alignment using core-scaffold RMSD.
    Uses only high-confidence residues (pLDDT > threshold) for more meaningful metrics.
    Returns both global and core RMSD values.
    """
    # 1. Setup paths
    target_path = os.path.join("data", f"{target_pdb_id.lower()}.pdb")
    output_name = "Refined_Shuttle.pdb"
    
    # 2. ALIGNMENT using core-scaffold RMSD (high-confidence residues only)
    parser = PDBParser(QUIET=True)
    target_struct = parser.get_structure("target", target_path)
    design_struct = parser.get_structure("design", uploaded_file_path)
    
    # Get atoms for alignment - filter by pLDDT if available
    ref_atoms = []
    des_atoms = []
    ref_atoms_high_conf = []  # For pLDDT > 80
    des_atoms_high_conf = []  # For pLDDT > 80
    
    # Detect if B-factors are normalized (0-1) or raw pLDDT (0-100)
    sample_bfactor = None
    for res in design_struct.get_residues():
        if 'CA' in res:
            sample_bfactor = res['CA'].get_bfactor()
            break
    
    is_normalized = sample_bfactor is not None and sample_bfactor < 1.0
    actual_threshold = (plddt_threshold / 100.0) if is_normalized else plddt_threshold
    high_conf_threshold = (80.0 / 100.0) if is_normalized else 80.0
    
    # Collect atoms for alignment (using plddt_threshold)
    # Also collect high-confidence atoms (pLDDT > 80) for detailed report
    for ref_res, des_res in zip(target_struct.get_residues(), design_struct.get_residues()):
        if 'CA' in des_res and 'CA' in ref_res:
            plddt = des_res['CA'].get_bfactor()
            if plddt >= actual_threshold:
                ref_atoms.append(ref_res['CA'])
                des_atoms.append(des_res['CA'])
            if plddt >= high_conf_threshold:
                ref_atoms_high_conf.append(ref_res['CA'])
                des_atoms_high_conf.append(des_res['CA'])
    
    # Fallback to all CA atoms if no high-confidence ones found
    if len(ref_atoms) == 0:
        print(f"⚠️ No residues with pLDDT >= {plddt_threshold}. Using all residues.")
        ref_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA']
        des_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA']
        min_len = min(len(ref_atoms), len(des_atoms))
        ref_atoms = ref_atoms[:min_len]
        des_atoms = des_atoms[:min_len]
    
    # Perform alignment using the main threshold atoms
    sup = Superimposer()
    sup.set_atoms(ref_atoms, des_atoms)
    sup.apply(design_struct.get_atoms())
    
    core_rmsd = sup.rms
    num_residues = len(ref_atoms)
    print(f"🎯 Core-Scaffold RMSD (pLDDT > {plddt_threshold}): {core_rmsd:.3f} Å ({num_residues} residues)")
    
    # Calculate global RMSD (all CA atoms)
    all_ref_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA']
    all_des_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA']
    min_len = min(len(all_ref_atoms), len(all_des_atoms))
    all_ref_atoms = all_ref_atoms[:min_len]
    all_des_atoms = all_des_atoms[:min_len]
    
    # Calculate global RMSD after alignment
    sup_global = Superimposer()
    sup_global.set_atoms(all_ref_atoms, all_des_atoms)
    global_rmsd = sup_global.rms
    
    # Calculate high-confidence core RMSD (pLDDT > 80)
    high_conf_rmsd = None
    if len(ref_atoms_high_conf) > 0:
        sup_high_conf = Superimposer()
        sup_high_conf.set_atoms(ref_atoms_high_conf, des_atoms_high_conf)
        high_conf_rmsd = sup_high_conf.rms
    else:
        # If no high-confidence atoms, use core_rmsd as fallback
        high_conf_rmsd = core_rmsd

    # 3. EXPORT
    # This saves the design in the same 3D coordinate space as the human receptor
    io = PDBIO()
    io.set_structure(design_struct)
    io.save(output_name)
    
    return output_name, global_rmsd, core_rmsd, high_conf_rmsd

def process_results(target_pdb_id, result_pdb, global_rmsd, core_rmsd):
    """
    Generate a detailed structural validation report with tiered RMSD analysis.
    
    Args:
        target_pdb_id: Target PDB ID
        result_pdb: Path to the aligned result PDB
        global_rmsd: Global RMSD (all residues)
        core_rmsd: High-confidence core RMSD (pLDDT > 80)
    
    Returns:
        str: Formatted validation report
    """
    # Calculate the tiers we found earlier
    # pLDDT > 80: High Fidelity Core
    # pLDDT < 50: Disordered Loop
    
    # Determine design status based on core RMSD
    if core_rmsd < 1.0:
        status = "Success - High-Precision Core Match"
        status_emoji = "✅"
    elif core_rmsd < 2.0:
        status = "Good - Minor Core Deviation"
        status_emoji = "⚠️"
    else:
        status = "Possible Fold Drift - Review Required"
        status_emoji = "❌"
    
    report = f"""🔬 Structural Validation Report

Target: {target_pdb_id.upper()}

RMSD Metrics:
  • Global RMSD: {global_rmsd:.2f} Å (all residues)
  • High-Confidence Core RMSD (pLDDT > 80): {core_rmsd:.2f} Å

Design Status: {status_emoji} {status}

Interpretation:
  • Core RMSD < 1.0 Å: Excellent scaffold preservation
  • Core RMSD 1.0-2.0 Å: Good structural match
  • Core RMSD > 2.0 Å: Possible fold drift, review structure
"""
    return report