Spaces:
Running
Running
Upload 5 files
Browse files- scripts/generator.py +75 -50
- scripts/refine.py +176 -13
scripts/generator.py
CHANGED
|
@@ -11,21 +11,14 @@ def run_broteinshake_generator(pdb_path, fixed_chains, variable_chains, num_seqs
|
|
| 11 |
|
| 12 |
Args:
|
| 13 |
pdb_path: Path to the target complex (e.g., 'data/3KAS.pdb').
|
| 14 |
-
fixed_chains: Chains to remain unchanged (e.g., 'A').
|
| 15 |
-
variable_chains: Chains to be redesigned/repainted (e.g., 'B').
|
| 16 |
"""
|
| 17 |
# 1. Setup project identifiers and directories
|
| 18 |
pdb_name = os.path.basename(pdb_path).split('.')[0]
|
| 19 |
output_dir = f"./generated/{pdb_name}"
|
| 20 |
os.makedirs(output_dir, exist_ok=True)
|
| 21 |
|
| 22 |
-
# 2. Parse the PDB into JSONL format for the model
|
| 23 |
-
# parse_multiple_chains.py expects a folder, not a file
|
| 24 |
-
pdb_dir = os.path.dirname(os.path.abspath(pdb_path))
|
| 25 |
-
if not pdb_dir:
|
| 26 |
-
pdb_dir = "."
|
| 27 |
-
jsonl_path = os.path.join(output_dir, "parsed_pdbs.jsonl")
|
| 28 |
-
|
| 29 |
# Get the project root directory (where ProteinMPNN should be)
|
| 30 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 31 |
project_root = os.path.dirname(script_dir)
|
|
@@ -42,56 +35,88 @@ def run_broteinshake_generator(pdb_path, fixed_chains, variable_chains, num_seqs
|
|
| 42 |
stderr=subprocess.DEVNULL
|
| 43 |
)
|
| 44 |
|
| 45 |
-
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
chain_id_json = os.path.join(output_dir, "chain_id_dict.json")
|
| 66 |
-
with open(chain_id_json, 'w') as f:
|
| 67 |
-
json.dump(chain_id_dict, f)
|
| 68 |
-
|
| 69 |
-
# fixed_positions_jsonl is for specific residue positions, not entire chains
|
| 70 |
-
# Since we're fixing entire chains via chain_id_dict, we don't need fixed_positions_jsonl
|
| 71 |
-
fixed_chain_json = os.path.join(output_dir, "fixed_chain_dict.json")
|
| 72 |
-
# Create empty file or omit the argument - let's just not pass it
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
f"--sampling_temp {temp} "
|
| 85 |
-
f"--seed 42"
|
| 86 |
-
)
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
# Suppress warnings by redirecting stderr
|
| 91 |
env = os.environ.copy()
|
| 92 |
env['PYTHONWARNINGS'] = 'ignore'
|
| 93 |
subprocess.run(mpnn_cmd, shell=True, check=True, env=env, stderr=subprocess.DEVNULL)
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
print(f"✅ Success! Fold the top sequences at https://esmatlas.com/resources?action=fold")
|
| 96 |
|
| 97 |
if __name__ == "__main__":
|
|
|
|
| 11 |
|
| 12 |
Args:
|
| 13 |
pdb_path: Path to the target complex (e.g., 'data/3KAS.pdb').
|
| 14 |
+
fixed_chains: Chains to remain unchanged (e.g., 'A'). Empty for single-chain proteins.
|
| 15 |
+
variable_chains: Chains to be redesigned/repainted (e.g., 'B'). For single-chain, this is the only chain.
|
| 16 |
"""
|
| 17 |
# 1. Setup project identifiers and directories
|
| 18 |
pdb_name = os.path.basename(pdb_path).split('.')[0]
|
| 19 |
output_dir = f"./generated/{pdb_name}"
|
| 20 |
os.makedirs(output_dir, exist_ok=True)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Get the project root directory (where ProteinMPNN should be)
|
| 23 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 24 |
project_root = os.path.dirname(script_dir)
|
|
|
|
| 35 |
stderr=subprocess.DEVNULL
|
| 36 |
)
|
| 37 |
|
| 38 |
+
mpnn_script = os.path.join(proteinmpnn_dir, "protein_mpnn_run.py")
|
| 39 |
|
| 40 |
+
# 2. Check if single-chain protein (no fixed chains means single-chain)
|
| 41 |
+
if not fixed_chains or len(fixed_chains) == 0:
|
| 42 |
+
# Single-chain protein: use direct PDB path command
|
| 43 |
+
# For single-chain, variable_chains should be the only chain (e.g., "A")
|
| 44 |
+
chain_to_design = variable_chains[0] if variable_chains else "A"
|
| 45 |
+
|
| 46 |
+
mpnn_cmd = (
|
| 47 |
+
f"python -W ignore {mpnn_script} "
|
| 48 |
+
f"--pdb_path {pdb_path} "
|
| 49 |
+
f"--pdb_path_chains {chain_to_design} "
|
| 50 |
+
f"--out_folder {output_dir} "
|
| 51 |
+
f"--num_seq_per_target {num_seqs} "
|
| 52 |
+
f"--sampling_temp {temp} "
|
| 53 |
+
f"--seed 42 "
|
| 54 |
+
f"--batch_size 1"
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
print(f"🚀 Designing sequences for {pdb_name} (single-chain mode)...")
|
| 58 |
+
print(f"✏️ Redesigning chain: {chain_to_design}")
|
| 59 |
+
else:
|
| 60 |
+
# Multi-chain protein: use JSONL-based command
|
| 61 |
+
# 2. Parse the PDB into JSONL format for the model
|
| 62 |
+
pdb_dir = os.path.dirname(os.path.abspath(pdb_path))
|
| 63 |
+
if not pdb_dir:
|
| 64 |
+
pdb_dir = "."
|
| 65 |
+
jsonl_path = os.path.join(output_dir, "parsed_pdbs.jsonl")
|
| 66 |
+
|
| 67 |
+
parse_script = os.path.join(proteinmpnn_dir, "helper_scripts", "parse_multiple_chains.py")
|
| 68 |
+
|
| 69 |
+
parse_cmd = f"python -W ignore {parse_script} --input_path={pdb_dir}/ --output_path={jsonl_path}"
|
| 70 |
+
subprocess.run(parse_cmd, shell=True, check=True, stderr=subprocess.DEVNULL)
|
| 71 |
|
| 72 |
+
# Update the name in parsed JSONL to include "_clones"
|
| 73 |
+
pdb_name_clones = f"{pdb_name}_clones"
|
| 74 |
+
with open(jsonl_path, 'r') as f:
|
| 75 |
+
jsonl_data = json.loads(f.readline())
|
| 76 |
+
jsonl_data['name'] = pdb_name_clones
|
| 77 |
+
with open(jsonl_path, 'w') as f:
|
| 78 |
+
f.write(json.dumps(jsonl_data) + '\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
+
# 3. Generate the Chain Configuration JSONs (The 'Engine' Logic)
|
| 81 |
+
# Format: {"name": [masked_chains_list, visible_chains_list]}
|
| 82 |
+
# masked_chains = chains to redesign, visible_chains = chains to keep fixed
|
| 83 |
+
masked_chains_list = [c for c in variable_chains]
|
| 84 |
+
visible_chains_list = [c for c in fixed_chains]
|
| 85 |
+
chain_id_dict = {pdb_name_clones: [masked_chains_list, visible_chains_list]}
|
| 86 |
+
|
| 87 |
+
chain_id_json = os.path.join(output_dir, "chain_id_dict.json")
|
| 88 |
+
with open(chain_id_json, 'w') as f:
|
| 89 |
+
json.dump(chain_id_dict, f)
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
+
# 4. Execute optimized ProteinMPNN command for multi-chain
|
| 92 |
+
mpnn_cmd = (
|
| 93 |
+
f"python -W ignore {mpnn_script} "
|
| 94 |
+
f"--jsonl_path {jsonl_path} "
|
| 95 |
+
f"--chain_id_jsonl {chain_id_json} "
|
| 96 |
+
f"--out_folder {output_dir} "
|
| 97 |
+
f"--num_seq_per_target {num_seqs} "
|
| 98 |
+
f"--sampling_temp {temp} "
|
| 99 |
+
f"--seed 42"
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
print(f"🚀 Designing sequences for {pdb_name}...")
|
| 103 |
+
print(f"🔒 Fixed: {fixed_chains} | ✏️ Redesigning: {variable_chains}")
|
| 104 |
+
|
| 105 |
# Suppress warnings by redirecting stderr
|
| 106 |
env = os.environ.copy()
|
| 107 |
env['PYTHONWARNINGS'] = 'ignore'
|
| 108 |
subprocess.run(mpnn_cmd, shell=True, check=True, env=env, stderr=subprocess.DEVNULL)
|
| 109 |
|
| 110 |
+
# For single-chain proteins, ProteinMPNN saves as {pdb_name}.fa
|
| 111 |
+
# Rename it to {pdb_name}_clones.fa for consistency
|
| 112 |
+
if not fixed_chains or len(fixed_chains) == 0:
|
| 113 |
+
seqs_dir = os.path.join(output_dir, "seqs")
|
| 114 |
+
old_file = os.path.join(seqs_dir, f"{pdb_name}.fa")
|
| 115 |
+
new_file = os.path.join(seqs_dir, f"{pdb_name}_clones.fa")
|
| 116 |
+
if os.path.exists(old_file) and not os.path.exists(new_file):
|
| 117 |
+
os.rename(old_file, new_file)
|
| 118 |
+
print(f"📝 Renamed {pdb_name}.fa → {pdb_name}_clones.fa")
|
| 119 |
+
|
| 120 |
print(f"✅ Success! Fold the top sequences at https://esmatlas.com/resources?action=fold")
|
| 121 |
|
| 122 |
if __name__ == "__main__":
|
scripts/refine.py
CHANGED
|
@@ -1,31 +1,148 @@
|
|
| 1 |
import os
|
| 2 |
from Bio.PDB import PDBParser, Superimposer, PDBIO
|
| 3 |
|
| 4 |
-
def
|
| 5 |
"""
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
# 1. Setup paths
|
| 9 |
-
# target_path should point to your local data/3kas.pdb
|
| 10 |
target_path = os.path.join("data", f"{target_pdb_id.lower()}.pdb")
|
| 11 |
output_name = "Refined_Shuttle.pdb"
|
| 12 |
|
| 13 |
-
# 2. ALIGNMENT (
|
| 14 |
parser = PDBParser(QUIET=True)
|
| 15 |
target_struct = parser.get_structure("target", target_path)
|
| 16 |
design_struct = parser.get_structure("design", uploaded_file_path)
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
sup.apply(design_struct.get_atoms())
|
| 27 |
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# 3. EXPORT
|
| 31 |
# This saves the design in the same 3D coordinate space as the human receptor
|
|
@@ -33,4 +150,50 @@ def polish_design(target_pdb_id, uploaded_file_path):
|
|
| 33 |
io.set_structure(design_struct)
|
| 34 |
io.save(output_name)
|
| 35 |
|
| 36 |
-
return output_name,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
from Bio.PDB import PDBParser, Superimposer, PDBIO
|
| 3 |
|
| 4 |
+
def get_core_rmsd(reference_pdb, design_pdb, plddt_threshold=70.0):
|
| 5 |
"""
|
| 6 |
+
Calculate RMSD using only high-confidence residues (pLDDT > threshold).
|
| 7 |
+
This focuses on the core scaffold alignment, ignoring low-confidence regions.
|
| 8 |
+
|
| 9 |
+
Handles both normalized (0-1) and raw pLDDT (0-100) values in B-factor column.
|
| 10 |
+
"""
|
| 11 |
+
parser = PDBParser(QUIET=True)
|
| 12 |
+
ref_struct = parser.get_structure("ref", reference_pdb)
|
| 13 |
+
des_struct = parser.get_structure("des", design_pdb)
|
| 14 |
+
|
| 15 |
+
ref_atoms = []
|
| 16 |
+
des_atoms = []
|
| 17 |
+
|
| 18 |
+
# Detect if B-factors are normalized (0-1) or raw pLDDT (0-100)
|
| 19 |
+
sample_bfactor = None
|
| 20 |
+
for res in des_struct.get_residues():
|
| 21 |
+
if 'CA' in res:
|
| 22 |
+
sample_bfactor = res['CA'].get_bfactor()
|
| 23 |
+
break
|
| 24 |
+
|
| 25 |
+
# If max B-factor is < 1.0, assume normalized (0-1 scale)
|
| 26 |
+
# Otherwise assume raw pLDDT (0-100 scale)
|
| 27 |
+
is_normalized = sample_bfactor is not None and sample_bfactor < 1.0
|
| 28 |
+
|
| 29 |
+
# Adjust threshold based on scale
|
| 30 |
+
if is_normalized:
|
| 31 |
+
# Normalized: 70 pLDDT = 0.70
|
| 32 |
+
actual_threshold = plddt_threshold / 100.0
|
| 33 |
+
else:
|
| 34 |
+
# Raw pLDDT: use threshold as-is
|
| 35 |
+
actual_threshold = plddt_threshold
|
| 36 |
+
|
| 37 |
+
# Iterate through residues and filter by B-factor (pLDDT is stored there)
|
| 38 |
+
for ref_res, des_res in zip(ref_struct.get_residues(), des_struct.get_residues()):
|
| 39 |
+
# ESMFold/AlphaFold store pLDDT in the B-factor column
|
| 40 |
+
# We only take Alpha Carbons (CA) for a standard backbone alignment
|
| 41 |
+
if 'CA' in des_res and 'CA' in ref_res:
|
| 42 |
+
plddt = des_res['CA'].get_bfactor()
|
| 43 |
+
|
| 44 |
+
if plddt >= actual_threshold:
|
| 45 |
+
ref_atoms.append(ref_res['CA'])
|
| 46 |
+
des_atoms.append(des_res['CA'])
|
| 47 |
+
|
| 48 |
+
if len(ref_atoms) == 0:
|
| 49 |
+
# Fallback to all residues if no high-confidence ones found
|
| 50 |
+
ref_atoms = [a for a in ref_struct.get_atoms() if a.get_name() == 'CA']
|
| 51 |
+
des_atoms = [a for a in des_struct.get_atoms() if a.get_name() == 'CA']
|
| 52 |
+
min_len = min(len(ref_atoms), len(des_atoms))
|
| 53 |
+
ref_atoms = ref_atoms[:min_len]
|
| 54 |
+
des_atoms = des_atoms[:min_len]
|
| 55 |
+
|
| 56 |
+
# Superimpose and calculate RMSD
|
| 57 |
+
super_imposer = Superimposer()
|
| 58 |
+
super_imposer.set_atoms(ref_atoms, des_atoms)
|
| 59 |
+
super_imposer.apply(des_struct.get_atoms())
|
| 60 |
+
|
| 61 |
+
return super_imposer.rms, len(ref_atoms)
|
| 62 |
+
|
| 63 |
+
def polish_design(target_pdb_id, uploaded_file_path, plddt_threshold=70.0):
|
| 64 |
+
"""
|
| 65 |
+
Performs high-precision structural alignment using core-scaffold RMSD.
|
| 66 |
+
Uses only high-confidence residues (pLDDT > threshold) for more meaningful metrics.
|
| 67 |
+
Returns both global and core RMSD values.
|
| 68 |
"""
|
| 69 |
# 1. Setup paths
|
|
|
|
| 70 |
target_path = os.path.join("data", f"{target_pdb_id.lower()}.pdb")
|
| 71 |
output_name = "Refined_Shuttle.pdb"
|
| 72 |
|
| 73 |
+
# 2. ALIGNMENT using core-scaffold RMSD (high-confidence residues only)
|
| 74 |
parser = PDBParser(QUIET=True)
|
| 75 |
target_struct = parser.get_structure("target", target_path)
|
| 76 |
design_struct = parser.get_structure("design", uploaded_file_path)
|
| 77 |
|
| 78 |
+
# Get atoms for alignment - filter by pLDDT if available
|
| 79 |
+
ref_atoms = []
|
| 80 |
+
des_atoms = []
|
| 81 |
+
ref_atoms_high_conf = [] # For pLDDT > 80
|
| 82 |
+
des_atoms_high_conf = [] # For pLDDT > 80
|
| 83 |
+
|
| 84 |
+
# Detect if B-factors are normalized (0-1) or raw pLDDT (0-100)
|
| 85 |
+
sample_bfactor = None
|
| 86 |
+
for res in design_struct.get_residues():
|
| 87 |
+
if 'CA' in res:
|
| 88 |
+
sample_bfactor = res['CA'].get_bfactor()
|
| 89 |
+
break
|
| 90 |
+
|
| 91 |
+
is_normalized = sample_bfactor is not None and sample_bfactor < 1.0
|
| 92 |
+
actual_threshold = (plddt_threshold / 100.0) if is_normalized else plddt_threshold
|
| 93 |
+
high_conf_threshold = (80.0 / 100.0) if is_normalized else 80.0
|
| 94 |
+
|
| 95 |
+
# Collect atoms for alignment (using plddt_threshold)
|
| 96 |
+
# Also collect high-confidence atoms (pLDDT > 80) for detailed report
|
| 97 |
+
for ref_res, des_res in zip(target_struct.get_residues(), design_struct.get_residues()):
|
| 98 |
+
if 'CA' in des_res and 'CA' in ref_res:
|
| 99 |
+
plddt = des_res['CA'].get_bfactor()
|
| 100 |
+
if plddt >= actual_threshold:
|
| 101 |
+
ref_atoms.append(ref_res['CA'])
|
| 102 |
+
des_atoms.append(des_res['CA'])
|
| 103 |
+
if plddt >= high_conf_threshold:
|
| 104 |
+
ref_atoms_high_conf.append(ref_res['CA'])
|
| 105 |
+
des_atoms_high_conf.append(des_res['CA'])
|
| 106 |
|
| 107 |
+
# Fallback to all CA atoms if no high-confidence ones found
|
| 108 |
+
if len(ref_atoms) == 0:
|
| 109 |
+
print(f"⚠️ No residues with pLDDT >= {plddt_threshold}. Using all residues.")
|
| 110 |
+
ref_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA']
|
| 111 |
+
des_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA']
|
| 112 |
+
min_len = min(len(ref_atoms), len(des_atoms))
|
| 113 |
+
ref_atoms = ref_atoms[:min_len]
|
| 114 |
+
des_atoms = des_atoms[:min_len]
|
| 115 |
+
|
| 116 |
+
# Perform alignment using the main threshold atoms
|
| 117 |
+
sup = Superimposer()
|
| 118 |
+
sup.set_atoms(ref_atoms, des_atoms)
|
| 119 |
sup.apply(design_struct.get_atoms())
|
| 120 |
|
| 121 |
+
core_rmsd = sup.rms
|
| 122 |
+
num_residues = len(ref_atoms)
|
| 123 |
+
print(f"🎯 Core-Scaffold RMSD (pLDDT > {plddt_threshold}): {core_rmsd:.3f} Å ({num_residues} residues)")
|
| 124 |
+
|
| 125 |
+
# Calculate global RMSD (all CA atoms)
|
| 126 |
+
all_ref_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA']
|
| 127 |
+
all_des_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA']
|
| 128 |
+
min_len = min(len(all_ref_atoms), len(all_des_atoms))
|
| 129 |
+
all_ref_atoms = all_ref_atoms[:min_len]
|
| 130 |
+
all_des_atoms = all_des_atoms[:min_len]
|
| 131 |
+
|
| 132 |
+
# Calculate global RMSD after alignment
|
| 133 |
+
sup_global = Superimposer()
|
| 134 |
+
sup_global.set_atoms(all_ref_atoms, all_des_atoms)
|
| 135 |
+
global_rmsd = sup_global.rms
|
| 136 |
+
|
| 137 |
+
# Calculate high-confidence core RMSD (pLDDT > 80)
|
| 138 |
+
high_conf_rmsd = None
|
| 139 |
+
if len(ref_atoms_high_conf) > 0:
|
| 140 |
+
sup_high_conf = Superimposer()
|
| 141 |
+
sup_high_conf.set_atoms(ref_atoms_high_conf, des_atoms_high_conf)
|
| 142 |
+
high_conf_rmsd = sup_high_conf.rms
|
| 143 |
+
else:
|
| 144 |
+
# If no high-confidence atoms, use core_rmsd as fallback
|
| 145 |
+
high_conf_rmsd = core_rmsd
|
| 146 |
|
| 147 |
# 3. EXPORT
|
| 148 |
# This saves the design in the same 3D coordinate space as the human receptor
|
|
|
|
| 150 |
io.set_structure(design_struct)
|
| 151 |
io.save(output_name)
|
| 152 |
|
| 153 |
+
return output_name, global_rmsd, core_rmsd, high_conf_rmsd
|
| 154 |
+
|
| 155 |
+
def process_results(target_pdb_id, result_pdb, global_rmsd, core_rmsd):
|
| 156 |
+
"""
|
| 157 |
+
Generate a detailed structural validation report with tiered RMSD analysis.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
target_pdb_id: Target PDB ID
|
| 161 |
+
result_pdb: Path to the aligned result PDB
|
| 162 |
+
global_rmsd: Global RMSD (all residues)
|
| 163 |
+
core_rmsd: High-confidence core RMSD (pLDDT > 80)
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
str: Formatted validation report
|
| 167 |
+
"""
|
| 168 |
+
# Calculate the tiers we found earlier
|
| 169 |
+
# pLDDT > 80: High Fidelity Core
|
| 170 |
+
# pLDDT < 50: Disordered Loop
|
| 171 |
+
|
| 172 |
+
# Determine design status based on core RMSD
|
| 173 |
+
if core_rmsd < 1.0:
|
| 174 |
+
status = "Success - High-Precision Core Match"
|
| 175 |
+
status_emoji = "✅"
|
| 176 |
+
elif core_rmsd < 2.0:
|
| 177 |
+
status = "Good - Minor Core Deviation"
|
| 178 |
+
status_emoji = "⚠️"
|
| 179 |
+
else:
|
| 180 |
+
status = "Possible Fold Drift - Review Required"
|
| 181 |
+
status_emoji = "❌"
|
| 182 |
+
|
| 183 |
+
report = f"""
|
| 184 |
+
### 🔬 Structural Validation Report
|
| 185 |
+
|
| 186 |
+
**Target:** {target_pdb_id.upper()}
|
| 187 |
+
|
| 188 |
+
**RMSD Metrics:**
|
| 189 |
+
- **Global RMSD:** {global_rmsd:.2f} Å (all residues)
|
| 190 |
+
- **High-Confidence Core RMSD (pLDDT > 80):** {core_rmsd:.2f} Å
|
| 191 |
+
|
| 192 |
+
**Design Status:** {status_emoji} {status}
|
| 193 |
+
|
| 194 |
+
**Interpretation:**
|
| 195 |
+
- Core RMSD < 1.0 Å: Excellent scaffold preservation
|
| 196 |
+
- Core RMSD 1.0-2.0 Å: Good structural match
|
| 197 |
+
- Core RMSD > 2.0 Å: Possible fold drift, review structure
|
| 198 |
+
"""
|
| 199 |
+
return report
|