File size: 7,772 Bytes
32c275c
91a5709
5e03e0c
e639e39
91a5709
e639e39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91a5709
af2fbfa
 
13d4986
32c275c
e639e39
af2fbfa
32c275c
af2fbfa
32c275c
e639e39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af2fbfa
e639e39
 
 
 
 
 
 
 
 
 
 
 
91a5709
32c275c
e639e39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91a5709
 
 
 
 
 
32c275c
e639e39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50f7884
e639e39
50f7884
e639e39
50f7884
 
 
e639e39
50f7884
e639e39
50f7884
 
 
 
e639e39
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import os
from Bio.PDB import PDBParser, Superimposer, PDBIO

def get_core_rmsd(reference_pdb, design_pdb, plddt_threshold=70.0):
    """
    Calculate RMSD using only high-confidence residues (pLDDT > threshold).
    This focuses on the core scaffold alignment, ignoring low-confidence regions.
    
    Handles both normalized (0-1) and raw pLDDT (0-100) values in B-factor column.
    """
    parser = PDBParser(QUIET=True)
    ref_struct = parser.get_structure("ref", reference_pdb)
    des_struct = parser.get_structure("des", design_pdb)

    ref_atoms = []
    des_atoms = []
    
    # Detect if B-factors are normalized (0-1) or raw pLDDT (0-100)
    sample_bfactor = None
    for res in des_struct.get_residues():
        if 'CA' in res:
            sample_bfactor = res['CA'].get_bfactor()
            break
    
    # If max B-factor is < 1.0, assume normalized (0-1 scale)
    # Otherwise assume raw pLDDT (0-100 scale)
    is_normalized = sample_bfactor is not None and sample_bfactor < 1.0
    
    # Adjust threshold based on scale
    if is_normalized:
        # Normalized: 70 pLDDT = 0.70
        actual_threshold = plddt_threshold / 100.0
    else:
        # Raw pLDDT: use threshold as-is
        actual_threshold = plddt_threshold

    # Iterate through residues and filter by B-factor (pLDDT is stored there)
    for ref_res, des_res in zip(ref_struct.get_residues(), des_struct.get_residues()):
        # ESMFold/AlphaFold store pLDDT in the B-factor column
        # We only take Alpha Carbons (CA) for a standard backbone alignment
        if 'CA' in des_res and 'CA' in ref_res:
            plddt = des_res['CA'].get_bfactor()
            
            if plddt >= actual_threshold:
                ref_atoms.append(ref_res['CA'])
                des_atoms.append(des_res['CA'])

    if len(ref_atoms) == 0:
        # Fallback to all residues if no high-confidence ones found
        ref_atoms = [a for a in ref_struct.get_atoms() if a.get_name() == 'CA']
        des_atoms = [a for a in des_struct.get_atoms() if a.get_name() == 'CA']
        min_len = min(len(ref_atoms), len(des_atoms))
        ref_atoms = ref_atoms[:min_len]
        des_atoms = des_atoms[:min_len]

    # Superimpose and calculate RMSD
    super_imposer = Superimposer()
    super_imposer.set_atoms(ref_atoms, des_atoms)
    super_imposer.apply(des_struct.get_atoms())

    return super_imposer.rms, len(ref_atoms)

def polish_design(target_pdb_id, uploaded_file_path, plddt_threshold=70.0):
    """
    Performs high-precision structural alignment using core-scaffold RMSD.
    Uses only high-confidence residues (pLDDT > threshold) for more meaningful metrics.
    Returns both global and core RMSD values.
    """
    # 1. Setup paths
    target_path = os.path.join("data", f"{target_pdb_id.lower()}.pdb")
    output_name = "Refined_Shuttle.pdb"
    
    # 2. ALIGNMENT using core-scaffold RMSD (high-confidence residues only)
    parser = PDBParser(QUIET=True)
    target_struct = parser.get_structure("target", target_path)
    design_struct = parser.get_structure("design", uploaded_file_path)
    
    # Get atoms for alignment - filter by pLDDT if available
    ref_atoms = []
    des_atoms = []
    ref_atoms_high_conf = []  # For pLDDT > 80
    des_atoms_high_conf = []  # For pLDDT > 80
    
    # Detect if B-factors are normalized (0-1) or raw pLDDT (0-100)
    sample_bfactor = None
    for res in design_struct.get_residues():
        if 'CA' in res:
            sample_bfactor = res['CA'].get_bfactor()
            break
    
    is_normalized = sample_bfactor is not None and sample_bfactor < 1.0
    actual_threshold = (plddt_threshold / 100.0) if is_normalized else plddt_threshold
    high_conf_threshold = (80.0 / 100.0) if is_normalized else 80.0
    
    # Collect atoms for alignment (using plddt_threshold)
    # Also collect high-confidence atoms (pLDDT > 80) for detailed report
    for ref_res, des_res in zip(target_struct.get_residues(), design_struct.get_residues()):
        if 'CA' in des_res and 'CA' in ref_res:
            plddt = des_res['CA'].get_bfactor()
            if plddt >= actual_threshold:
                ref_atoms.append(ref_res['CA'])
                des_atoms.append(des_res['CA'])
            if plddt >= high_conf_threshold:
                ref_atoms_high_conf.append(ref_res['CA'])
                des_atoms_high_conf.append(des_res['CA'])
    
    # Fallback to all CA atoms if no high-confidence ones found
    if len(ref_atoms) == 0:
        print(f"⚠️ No residues with pLDDT >= {plddt_threshold}. Using all residues.")
        ref_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA']
        des_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA']
        min_len = min(len(ref_atoms), len(des_atoms))
        ref_atoms = ref_atoms[:min_len]
        des_atoms = des_atoms[:min_len]
    
    # Perform alignment using the main threshold atoms
    sup = Superimposer()
    sup.set_atoms(ref_atoms, des_atoms)
    sup.apply(design_struct.get_atoms())
    
    core_rmsd = sup.rms
    num_residues = len(ref_atoms)
    print(f"🎯 Core-Scaffold RMSD (pLDDT > {plddt_threshold}): {core_rmsd:.3f} Å ({num_residues} residues)")
    
    # Calculate global RMSD (all CA atoms)
    all_ref_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA']
    all_des_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA']
    min_len = min(len(all_ref_atoms), len(all_des_atoms))
    all_ref_atoms = all_ref_atoms[:min_len]
    all_des_atoms = all_des_atoms[:min_len]
    
    # Calculate global RMSD after alignment
    sup_global = Superimposer()
    sup_global.set_atoms(all_ref_atoms, all_des_atoms)
    global_rmsd = sup_global.rms
    
    # Calculate high-confidence core RMSD (pLDDT > 80)
    high_conf_rmsd = None
    if len(ref_atoms_high_conf) > 0:
        sup_high_conf = Superimposer()
        sup_high_conf.set_atoms(ref_atoms_high_conf, des_atoms_high_conf)
        high_conf_rmsd = sup_high_conf.rms
    else:
        # If no high-confidence atoms, use core_rmsd as fallback
        high_conf_rmsd = core_rmsd

    # 3. EXPORT
    # This saves the design in the same 3D coordinate space as the human receptor
    io = PDBIO()
    io.set_structure(design_struct)
    io.save(output_name)
    
    return output_name, global_rmsd, core_rmsd, high_conf_rmsd

def process_results(target_pdb_id, result_pdb, global_rmsd, core_rmsd):
    """
    Generate a detailed structural validation report with tiered RMSD analysis.
    
    Args:
        target_pdb_id: Target PDB ID
        result_pdb: Path to the aligned result PDB
        global_rmsd: Global RMSD (all residues)
        core_rmsd: High-confidence core RMSD (pLDDT > 80)
    
    Returns:
        str: Formatted validation report
    """
    # Calculate the tiers we found earlier
    # pLDDT > 80: High Fidelity Core
    # pLDDT < 50: Disordered Loop
    
    # Determine design status based on core RMSD
    if core_rmsd < 1.0:
        status = "Success - High-Precision Core Match"
        status_emoji = "✅"
    elif core_rmsd < 2.0:
        status = "Good - Minor Core Deviation"
        status_emoji = "⚠️"
    else:
        status = "Possible Fold Drift - Review Required"
        status_emoji = "❌"
    
    report = f"""🔬 Structural Validation Report

Target: {target_pdb_id.upper()}

RMSD Metrics:
  • Global RMSD: {global_rmsd:.2f} Å (all residues)
  • High-Confidence Core RMSD (pLDDT > 80): {core_rmsd:.2f} Å

Design Status: {status_emoji} {status}

Interpretation:
  • Core RMSD < 1.0 Å: Excellent scaffold preservation
  • Core RMSD 1.0-2.0 Å: Good structural match
  • Core RMSD > 2.0 Å: Possible fold drift, review structure
"""
    return report