Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- app.py +978 -0
- rmsd_utils.py +294 -0
- visualization.py +673 -0
app.py
ADDED
|
@@ -0,0 +1,978 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RNA Motif Structure Comparison Tool
|
| 3 |
+
Streamlit app for comparing RNA motif structures with flexible residue selection
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import io
|
| 11 |
+
import tempfile
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
# Import our RMSD calculation functions
|
| 15 |
+
from rmsd_utils import (
|
| 16 |
+
parse_residue_atoms,
|
| 17 |
+
get_backbone_sugar_and_selectbase_coords_fixed,
|
| 18 |
+
calculate_COM,
|
| 19 |
+
calculate_rotation_rmsd,
|
| 20 |
+
translate_rotate_coords
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
from visualization import create_structure_visualization
|
| 24 |
+
|
| 25 |
+
# Page configuration
|
| 26 |
+
st.set_page_config(
|
| 27 |
+
page_title="RNA Motif Structure Comparison",
|
| 28 |
+
page_icon="🧬",
|
| 29 |
+
layout="wide",
|
| 30 |
+
initial_sidebar_state="expanded"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Custom CSS
|
| 34 |
+
st.markdown("""
|
| 35 |
+
<style>
|
| 36 |
+
.main-header {
|
| 37 |
+
font-size: 2.5rem;
|
| 38 |
+
font-weight: bold;
|
| 39 |
+
color: #1f77b4;
|
| 40 |
+
margin-bottom: 1rem;
|
| 41 |
+
}
|
| 42 |
+
.sub-header {
|
| 43 |
+
font-size: 1.2rem;
|
| 44 |
+
color: #666;
|
| 45 |
+
margin-bottom: 2rem;
|
| 46 |
+
}
|
| 47 |
+
.metric-box {
|
| 48 |
+
background-color: #f0f2f6;
|
| 49 |
+
padding: 1rem;
|
| 50 |
+
border-radius: 0.5rem;
|
| 51 |
+
margin: 0.5rem 0;
|
| 52 |
+
}
|
| 53 |
+
</style>
|
| 54 |
+
""", unsafe_allow_html=True)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def save_uploaded_file(uploaded_file, directory):
|
| 58 |
+
"""Save an uploaded file to a temporary directory"""
|
| 59 |
+
file_path = os.path.join(directory, uploaded_file.name)
|
| 60 |
+
with open(file_path, "wb") as f:
|
| 61 |
+
f.write(uploaded_file.getbuffer())
|
| 62 |
+
return file_path
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def get_structure_info(pdb_path):
|
| 66 |
+
"""
|
| 67 |
+
Get information about a structure's residues.
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
pdb_path: Path to PDB file
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
|
| 74 |
+
"""
|
| 75 |
+
residues = parse_residue_atoms(pdb_path)
|
| 76 |
+
|
| 77 |
+
structure_info = []
|
| 78 |
+
for idx, res in enumerate(residues):
|
| 79 |
+
structure_info.append({
|
| 80 |
+
'index': idx,
|
| 81 |
+
'resnum': res['resnum'],
|
| 82 |
+
'resname': res['resname'],
|
| 83 |
+
'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})"
|
| 84 |
+
})
|
| 85 |
+
|
| 86 |
+
return structure_info
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def display_structure_selector(files, temp_dir, set_name):
|
| 90 |
+
"""
|
| 91 |
+
Display structure information and allow users to select residues.
|
| 92 |
+
|
| 93 |
+
Args:
|
| 94 |
+
files: List of uploaded files
|
| 95 |
+
temp_dir: Temporary directory containing files
|
| 96 |
+
set_name: Name of the set (e.g., "Reference" or "Query")
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
Dict mapping filename to list of selected residue indices
|
| 100 |
+
"""
|
| 101 |
+
if not files:
|
| 102 |
+
return {}
|
| 103 |
+
|
| 104 |
+
st.subheader(f"📋 {set_name} Structure Preview & Selection")
|
| 105 |
+
|
| 106 |
+
selections = {}
|
| 107 |
+
|
| 108 |
+
for file in files:
|
| 109 |
+
file_path = os.path.join(temp_dir, file.name)
|
| 110 |
+
structure_info = get_structure_info(file_path)
|
| 111 |
+
|
| 112 |
+
with st.expander(f"🔍 {file.name} ({len(structure_info)} residues)"):
|
| 113 |
+
# Display residue table
|
| 114 |
+
info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
|
| 115 |
+
info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
|
| 116 |
+
info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
|
| 117 |
+
info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
|
| 118 |
+
|
| 119 |
+
st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
|
| 120 |
+
|
| 121 |
+
# Selection method
|
| 122 |
+
selection_method = st.radio(
|
| 123 |
+
f"Selection method for {file.name}",
|
| 124 |
+
["Select by range", "Select specific residues", "Use all residues"],
|
| 125 |
+
key=f"method_{set_name}_{file.name}",
|
| 126 |
+
horizontal=True
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
selected_indices = []
|
| 130 |
+
|
| 131 |
+
if selection_method == "Select by range":
|
| 132 |
+
col1, col2 = st.columns(2)
|
| 133 |
+
with col1:
|
| 134 |
+
start_idx = st.number_input(
|
| 135 |
+
"Start index (1-based)",
|
| 136 |
+
min_value=1,
|
| 137 |
+
max_value=len(structure_info),
|
| 138 |
+
value=1,
|
| 139 |
+
key=f"start_{set_name}_{file.name}"
|
| 140 |
+
)
|
| 141 |
+
with col2:
|
| 142 |
+
end_idx = st.number_input(
|
| 143 |
+
"End index (1-based, inclusive)",
|
| 144 |
+
min_value=1,
|
| 145 |
+
max_value=len(structure_info),
|
| 146 |
+
value=min(4, len(structure_info)),
|
| 147 |
+
key=f"end_{set_name}_{file.name}"
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
if start_idx <= end_idx:
|
| 151 |
+
selected_indices = list(range(start_idx - 1, end_idx))
|
| 152 |
+
st.info(f"✓ Selected residues: {[i+1 for i in selected_indices]}")
|
| 153 |
+
else:
|
| 154 |
+
st.error("Start index must be ≤ end index")
|
| 155 |
+
|
| 156 |
+
elif selection_method == "Select specific residues":
|
| 157 |
+
# Multi-select for specific residues
|
| 158 |
+
selected_names = st.multiselect(
|
| 159 |
+
"Select residues",
|
| 160 |
+
options=[info['full_name'] for info in structure_info],
|
| 161 |
+
default=[structure_info[i]['full_name'] for i in range(min(4, len(structure_info)))],
|
| 162 |
+
key=f"specific_{set_name}_{file.name}"
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# Map back to indices
|
| 166 |
+
name_to_idx = {info['full_name']: info['index'] for info in structure_info}
|
| 167 |
+
selected_indices = [name_to_idx[name] for name in selected_names]
|
| 168 |
+
selected_indices.sort()
|
| 169 |
+
|
| 170 |
+
if selected_indices:
|
| 171 |
+
st.info(f"✓ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
|
| 172 |
+
|
| 173 |
+
else: # Use all residues
|
| 174 |
+
selected_indices = list(range(len(structure_info)))
|
| 175 |
+
st.info(f"✓ Using all {len(selected_indices)} residues")
|
| 176 |
+
|
| 177 |
+
# Show selected residues details
|
| 178 |
+
if selected_indices:
|
| 179 |
+
selected_df = info_df[info_df['Index (0-based)'].isin(selected_indices)]
|
| 180 |
+
st.markdown("**Selected residues:**")
|
| 181 |
+
st.dataframe(selected_df, use_container_width=True)
|
| 182 |
+
|
| 183 |
+
selections[file.name] = selected_indices
|
| 184 |
+
|
| 185 |
+
return selections
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def save_uploaded_file(uploaded_file, directory):
|
| 189 |
+
"""Save an uploaded file to a temporary directory"""
|
| 190 |
+
file_path = os.path.join(directory, uploaded_file.name)
|
| 191 |
+
with open(file_path, "wb") as f:
|
| 192 |
+
f.write(uploaded_file.getbuffer())
|
| 193 |
+
return file_path
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def extract_window_coords(residues, window_indices):
|
| 197 |
+
"""
|
| 198 |
+
Extract coordinates for a specific window of residues.
|
| 199 |
+
|
| 200 |
+
Args:
|
| 201 |
+
residues: List of all residues
|
| 202 |
+
window_indices: List of indices to extract
|
| 203 |
+
|
| 204 |
+
Returns:
|
| 205 |
+
numpy array of coordinates
|
| 206 |
+
"""
|
| 207 |
+
from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
|
| 208 |
+
|
| 209 |
+
all_coords = []
|
| 210 |
+
for idx in window_indices:
|
| 211 |
+
if idx < len(residues):
|
| 212 |
+
residue = residues[idx]
|
| 213 |
+
# Get backbone and sugar coordinates
|
| 214 |
+
backbone_coords = get_backbone_sugar_coords_from_residue(residue)
|
| 215 |
+
all_coords.extend(backbone_coords)
|
| 216 |
+
# Get base coordinates
|
| 217 |
+
base_coords = get_base_coords_from_residue(residue)
|
| 218 |
+
all_coords.extend(base_coords)
|
| 219 |
+
|
| 220 |
+
return np.asarray(all_coords)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def compare_structures_with_selection(reference_files, query_files, ref_selections, query_selections, temp_dir):
|
| 224 |
+
"""
|
| 225 |
+
Compare reference and query structures using user-selected residues (direct comparison).
|
| 226 |
+
Only compares structures with matching selection sizes.
|
| 227 |
+
|
| 228 |
+
Args:
|
| 229 |
+
reference_files: List of reference motif files
|
| 230 |
+
query_files: List of query motif files
|
| 231 |
+
ref_selections: Dict mapping filename to selected residue indices
|
| 232 |
+
query_selections: Dict mapping filename to selected residue indices
|
| 233 |
+
temp_dir: Temporary directory containing files
|
| 234 |
+
|
| 235 |
+
Returns:
|
| 236 |
+
DataFrame with comparison results
|
| 237 |
+
"""
|
| 238 |
+
results = []
|
| 239 |
+
|
| 240 |
+
# Count valid comparisons
|
| 241 |
+
total_comparisons = 0
|
| 242 |
+
for ref_file in reference_files:
|
| 243 |
+
ref_indices = ref_selections.get(ref_file.name, [])
|
| 244 |
+
if len(ref_indices) < 2:
|
| 245 |
+
continue
|
| 246 |
+
for query_file in query_files:
|
| 247 |
+
query_indices = query_selections.get(query_file.name, [])
|
| 248 |
+
if len(query_indices) < 2:
|
| 249 |
+
continue
|
| 250 |
+
# Only compare if they have the same number of selected residues
|
| 251 |
+
if len(ref_indices) == len(query_indices):
|
| 252 |
+
total_comparisons += 1
|
| 253 |
+
|
| 254 |
+
if total_comparisons == 0:
|
| 255 |
+
st.error("No valid comparisons found. Ensure selected regions have matching sizes.")
|
| 256 |
+
return pd.DataFrame()
|
| 257 |
+
|
| 258 |
+
progress_bar = st.progress(0)
|
| 259 |
+
status_text = st.empty()
|
| 260 |
+
|
| 261 |
+
comparison_count = 0
|
| 262 |
+
|
| 263 |
+
for ref_file in reference_files:
|
| 264 |
+
ref_name = ref_file.name
|
| 265 |
+
ref_path = os.path.join(temp_dir, ref_name)
|
| 266 |
+
ref_indices = ref_selections.get(ref_name, [])
|
| 267 |
+
|
| 268 |
+
if len(ref_indices) < 2:
|
| 269 |
+
continue
|
| 270 |
+
|
| 271 |
+
# Parse reference motif
|
| 272 |
+
ref_residues = parse_residue_atoms(ref_path)
|
| 273 |
+
|
| 274 |
+
# Extract coordinates for selected residues
|
| 275 |
+
ref_coords = extract_window_coords(ref_residues, ref_indices)
|
| 276 |
+
ref_com = calculate_COM(ref_coords)
|
| 277 |
+
|
| 278 |
+
# Get residue description
|
| 279 |
+
ref_residue_desc = f"[{','.join([str(i+1) for i in ref_indices])}]"
|
| 280 |
+
ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_indices if i < len(ref_residues)])
|
| 281 |
+
|
| 282 |
+
for query_file in query_files:
|
| 283 |
+
query_name = query_file.name
|
| 284 |
+
query_path = os.path.join(temp_dir, query_name)
|
| 285 |
+
query_indices = query_selections.get(query_name, [])
|
| 286 |
+
|
| 287 |
+
if len(query_indices) < 2:
|
| 288 |
+
continue
|
| 289 |
+
|
| 290 |
+
# Only compare if same number of residues
|
| 291 |
+
if len(ref_indices) != len(query_indices):
|
| 292 |
+
continue
|
| 293 |
+
|
| 294 |
+
# Parse query motif
|
| 295 |
+
query_residues = parse_residue_atoms(query_path)
|
| 296 |
+
|
| 297 |
+
# Extract coordinates for selected residues
|
| 298 |
+
query_coords = extract_window_coords(query_residues, query_indices)
|
| 299 |
+
query_com = calculate_COM(query_coords)
|
| 300 |
+
|
| 301 |
+
# Get residue description
|
| 302 |
+
query_residue_desc = f"[{','.join([str(i+1) for i in query_indices])}]"
|
| 303 |
+
query_sequence = ''.join([query_residues[i]['resname'] for i in query_indices if i < len(query_residues)])
|
| 304 |
+
|
| 305 |
+
# Calculate RMSD
|
| 306 |
+
U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
|
| 307 |
+
|
| 308 |
+
if U is None or RMSD is None:
|
| 309 |
+
RMSD = 999.0
|
| 310 |
+
U = np.eye(3)
|
| 311 |
+
|
| 312 |
+
# Store results
|
| 313 |
+
results.append({
|
| 314 |
+
'Reference': ref_name,
|
| 315 |
+
'Ref_Residues': ref_residue_desc,
|
| 316 |
+
'Ref_Sequence': ref_sequence,
|
| 317 |
+
'Ref_Indices': ref_indices,
|
| 318 |
+
'Query': query_name,
|
| 319 |
+
'Query_Residues': query_residue_desc,
|
| 320 |
+
'Query_Sequence': query_sequence,
|
| 321 |
+
'Query_Indices': query_indices,
|
| 322 |
+
'Num_Residues': len(ref_indices),
|
| 323 |
+
'RMSD': RMSD,
|
| 324 |
+
'Rotation_Matrix': U,
|
| 325 |
+
'Ref_COM': ref_com,
|
| 326 |
+
'Query_COM': query_com,
|
| 327 |
+
'Ref_Path': ref_path,
|
| 328 |
+
'Query_Path': query_path
|
| 329 |
+
})
|
| 330 |
+
|
| 331 |
+
comparison_count += 1
|
| 332 |
+
progress = comparison_count / total_comparisons
|
| 333 |
+
progress_bar.progress(progress)
|
| 334 |
+
status_text.text(f"Processing: {ref_name}{ref_residue_desc} vs {query_name}{query_residue_desc}")
|
| 335 |
+
|
| 336 |
+
progress_bar.empty()
|
| 337 |
+
status_text.empty()
|
| 338 |
+
|
| 339 |
+
return pd.DataFrame(results)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def compare_structures_with_windows(reference_files, query_files, ref_selections, query_selections,
|
| 343 |
+
window_size, window_type, temp_dir):
|
| 344 |
+
"""
|
| 345 |
+
Compare reference and query structures using sliding windows on selected residues.
|
| 346 |
+
Allows comparison of different-sized selections.
|
| 347 |
+
|
| 348 |
+
Args:
|
| 349 |
+
reference_files: List of reference motif files
|
| 350 |
+
query_files: List of query motif files
|
| 351 |
+
ref_selections: Dict mapping filename to selected residue indices
|
| 352 |
+
query_selections: Dict mapping filename to selected residue indices
|
| 353 |
+
window_size: Size of comparison window
|
| 354 |
+
window_type: "contiguous" or "non-contiguous"
|
| 355 |
+
temp_dir: Temporary directory containing files
|
| 356 |
+
|
| 357 |
+
Returns:
|
| 358 |
+
DataFrame with comparison results
|
| 359 |
+
"""
|
| 360 |
+
from itertools import combinations
|
| 361 |
+
|
| 362 |
+
results = []
|
| 363 |
+
|
| 364 |
+
def generate_windows_from_selection(selected_indices, win_size, win_type):
|
| 365 |
+
"""Generate windows from selected indices"""
|
| 366 |
+
if len(selected_indices) < win_size:
|
| 367 |
+
return []
|
| 368 |
+
|
| 369 |
+
if win_type == "contiguous":
|
| 370 |
+
windows = []
|
| 371 |
+
for i in range(len(selected_indices) - win_size + 1):
|
| 372 |
+
windows.append(selected_indices[i:i + win_size])
|
| 373 |
+
return windows
|
| 374 |
+
else: # non-contiguous
|
| 375 |
+
return [list(combo) for combo in combinations(selected_indices, win_size)]
|
| 376 |
+
|
| 377 |
+
# Count total comparisons
|
| 378 |
+
total_comparisons = 0
|
| 379 |
+
for ref_file in reference_files:
|
| 380 |
+
ref_indices = ref_selections.get(ref_file.name, [])
|
| 381 |
+
ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
|
| 382 |
+
if not ref_windows:
|
| 383 |
+
continue
|
| 384 |
+
|
| 385 |
+
for query_file in query_files:
|
| 386 |
+
query_indices = query_selections.get(query_file.name, [])
|
| 387 |
+
query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
|
| 388 |
+
if not query_windows:
|
| 389 |
+
continue
|
| 390 |
+
total_comparisons += len(ref_windows) * len(query_windows)
|
| 391 |
+
|
| 392 |
+
if total_comparisons == 0:
|
| 393 |
+
st.error(f"No valid comparisons found. Ensure selected regions have at least {window_size} residues.")
|
| 394 |
+
return pd.DataFrame()
|
| 395 |
+
|
| 396 |
+
progress_bar = st.progress(0)
|
| 397 |
+
status_text = st.empty()
|
| 398 |
+
comparison_count = 0
|
| 399 |
+
|
| 400 |
+
for ref_file in reference_files:
|
| 401 |
+
ref_name = ref_file.name
|
| 402 |
+
ref_path = os.path.join(temp_dir, ref_name)
|
| 403 |
+
ref_indices = ref_selections.get(ref_name, [])
|
| 404 |
+
|
| 405 |
+
# Generate windows from selected residues
|
| 406 |
+
ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
|
| 407 |
+
|
| 408 |
+
if not ref_windows:
|
| 409 |
+
st.warning(f"Skipping {ref_name}: selected {len(ref_indices)} residues, need at least {window_size}")
|
| 410 |
+
continue
|
| 411 |
+
|
| 412 |
+
# Parse reference motif
|
| 413 |
+
ref_residues = parse_residue_atoms(ref_path)
|
| 414 |
+
|
| 415 |
+
for ref_window in ref_windows:
|
| 416 |
+
# Extract coordinates for this window
|
| 417 |
+
ref_coords = extract_window_coords(ref_residues, ref_window)
|
| 418 |
+
ref_com = calculate_COM(ref_coords)
|
| 419 |
+
|
| 420 |
+
# Get descriptions
|
| 421 |
+
ref_window_desc = f"[{','.join([str(i+1) for i in ref_window])}]"
|
| 422 |
+
ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_window if i < len(ref_residues)])
|
| 423 |
+
|
| 424 |
+
for query_file in query_files:
|
| 425 |
+
query_name = query_file.name
|
| 426 |
+
query_path = os.path.join(temp_dir, query_name)
|
| 427 |
+
query_indices = query_selections.get(query_name, [])
|
| 428 |
+
|
| 429 |
+
# Generate windows from selected residues
|
| 430 |
+
query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
|
| 431 |
+
|
| 432 |
+
if not query_windows:
|
| 433 |
+
continue
|
| 434 |
+
|
| 435 |
+
# Parse query motif
|
| 436 |
+
query_residues = parse_residue_atoms(query_path)
|
| 437 |
+
|
| 438 |
+
for query_window in query_windows:
|
| 439 |
+
# Extract coordinates for this window
|
| 440 |
+
query_coords = extract_window_coords(query_residues, query_window)
|
| 441 |
+
query_com = calculate_COM(query_coords)
|
| 442 |
+
|
| 443 |
+
# Get descriptions
|
| 444 |
+
query_window_desc = f"[{','.join([str(i+1) for i in query_window])}]"
|
| 445 |
+
query_sequence = ''.join([query_residues[i]['resname'] for i in query_window if i < len(query_residues)])
|
| 446 |
+
|
| 447 |
+
# Calculate RMSD
|
| 448 |
+
U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
|
| 449 |
+
|
| 450 |
+
if U is None or RMSD is None:
|
| 451 |
+
RMSD = 999.0
|
| 452 |
+
U = np.eye(3)
|
| 453 |
+
|
| 454 |
+
# Store results
|
| 455 |
+
results.append({
|
| 456 |
+
'Reference': ref_name,
|
| 457 |
+
'Ref_Residues': ref_window_desc,
|
| 458 |
+
'Ref_Sequence': ref_sequence,
|
| 459 |
+
'Ref_Indices': ref_window,
|
| 460 |
+
'Query': query_name,
|
| 461 |
+
'Query_Residues': query_window_desc,
|
| 462 |
+
'Query_Sequence': query_sequence,
|
| 463 |
+
'Query_Indices': query_window,
|
| 464 |
+
'Num_Residues': window_size,
|
| 465 |
+
'RMSD': RMSD,
|
| 466 |
+
'Rotation_Matrix': U,
|
| 467 |
+
'Ref_COM': ref_com,
|
| 468 |
+
'Query_COM': query_com,
|
| 469 |
+
'Ref_Path': ref_path,
|
| 470 |
+
'Query_Path': query_path
|
| 471 |
+
})
|
| 472 |
+
|
| 473 |
+
comparison_count += 1
|
| 474 |
+
progress = comparison_count / total_comparisons
|
| 475 |
+
progress_bar.progress(progress)
|
| 476 |
+
status_text.text(f"Processing: {ref_name}{ref_window_desc} vs {query_name}{query_window_desc}")
|
| 477 |
+
|
| 478 |
+
progress_bar.empty()
|
| 479 |
+
status_text.empty()
|
| 480 |
+
|
| 481 |
+
return pd.DataFrame(results)
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
def main():
|
| 485 |
+
# Header
|
| 486 |
+
st.markdown('<p class="main-header">🧬 RNA Motif Structure Comparison</p>', unsafe_allow_html=True)
|
| 487 |
+
st.markdown('<p class="sub-header">Compare RNA motifs with flexible residue selection</p>', unsafe_allow_html=True)
|
| 488 |
+
|
| 489 |
+
# Sidebar
|
| 490 |
+
st.sidebar.header("⚙️ Configuration")
|
| 491 |
+
|
| 492 |
+
# File upload
|
| 493 |
+
st.sidebar.subheader("1️⃣ Upload Structures")
|
| 494 |
+
reference_files = st.sidebar.file_uploader(
|
| 495 |
+
"Upload Reference Motif PDB files (Set A)",
|
| 496 |
+
type=['pdb', 'PDB'],
|
| 497 |
+
accept_multiple_files=True,
|
| 498 |
+
key="reference",
|
| 499 |
+
help="Upload RNA motif structures to use as reference"
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
query_files = st.sidebar.file_uploader(
|
| 503 |
+
"Upload Query Motif PDB files (Set B)",
|
| 504 |
+
type=['pdb', 'PDB'],
|
| 505 |
+
accept_multiple_files=True,
|
| 506 |
+
key="query",
|
| 507 |
+
help="Upload RNA motif structures to compare against reference"
|
| 508 |
+
)
|
| 509 |
+
|
| 510 |
+
# Main content area
|
| 511 |
+
if not reference_files or not query_files:
|
| 512 |
+
st.info("👈 Please upload reference and query motif PDB files to begin analysis")
|
| 513 |
+
|
| 514 |
+
# Show example info
|
| 515 |
+
with st.expander("ℹ️ About this tool"):
|
| 516 |
+
st.markdown("""
|
| 517 |
+
### Purpose
|
| 518 |
+
This tool compares the 3D structures of RNA motifs with **flexible residue selection** and **multiple comparison modes**.
|
| 519 |
+
|
| 520 |
+
### Workflow
|
| 521 |
+
1. **Upload PDB files** for reference and query motifs
|
| 522 |
+
2. **Preview structures** and see all residues in each file
|
| 523 |
+
3. **Select residues** to include in comparison (e.g., exclude stem bases, keep only loop)
|
| 524 |
+
4. **Choose comparison mode**:
|
| 525 |
+
- **Direct comparison**: Compare selected regions directly (must be same size)
|
| 526 |
+
- **Window-based comparison**: Generate windows from selections (handles different sizes)
|
| 527 |
+
5. **Run analysis** using RMSD-based structural alignment
|
| 528 |
+
|
| 529 |
+
### Comparison Modes
|
| 530 |
+
|
| 531 |
+
#### Direct Comparison (Same Size)
|
| 532 |
+
- Compares your exact selections
|
| 533 |
+
- Example: You select 4 loop residues from each structure
|
| 534 |
+
- Result: Direct 4-residue vs 4-residue comparison
|
| 535 |
+
- Best for: When all structures have same-sized regions of interest
|
| 536 |
+
|
| 537 |
+
#### Window-Based Comparison (Different Sizes)
|
| 538 |
+
- Generates sliding windows from your selections
|
| 539 |
+
- Example: You select 4 loop residues from ref, 6 loop residues from query
|
| 540 |
+
- Set window size to 4
|
| 541 |
+
- Result: Ref's 4 residues compared against all 4-residue windows from query's 6
|
| 542 |
+
- Best for: When structures have different-sized regions but you want to find similar sub-regions
|
| 543 |
+
|
| 544 |
+
### Selection Methods
|
| 545 |
+
- **By range**: Select consecutive residues (e.g., residues 3-6 for a tetraloop)
|
| 546 |
+
- **Specific residues**: Pick any combination of residues (e.g., 1,3,5,7)
|
| 547 |
+
- **All residues**: Use the entire structure
|
| 548 |
+
|
| 549 |
+
### Method Details
|
| 550 |
+
- RMSD calculated using backbone, sugar, and select base atoms
|
| 551 |
+
- Base atoms mapped: purines (N9,C8,C4) ↔ pyrimidines (N1,C2,C6)
|
| 552 |
+
- Kabsch algorithm for optimal structural alignment
|
| 553 |
+
|
| 554 |
+
### Example Use Cases
|
| 555 |
+
|
| 556 |
+
**Case 1: Extract loops from 2+4+2 structures (Direct)**
|
| 557 |
+
- All structures have 8 residues (2 stem + 4 loop + 2 stem)
|
| 558 |
+
- Select residues 3-6 for all structures (the 4-residue loop)
|
| 559 |
+
- Use "Direct comparison"
|
| 560 |
+
- Result: Compare loop vs loop directly
|
| 561 |
+
|
| 562 |
+
**Case 2: Compare 4-mer loop vs 6-mer loop (Window-based)**
|
| 563 |
+
- Structure A: Select residues 3-6 (4 loop residues)
|
| 564 |
+
- Structure B: Select residues 2-7 (6 loop residues)
|
| 565 |
+
- Use "Window-based comparison" with window size = 4
|
| 566 |
+
- Result: Structure A compared against 3 windows from Structure B
|
| 567 |
+
|
| 568 |
+
**Case 3: Find similar regions in different structures (Window-based)**
|
| 569 |
+
- Reference: Select 5 residues of interest
|
| 570 |
+
- Query: Select 10 residues from larger region
|
| 571 |
+
- Use "Window-based comparison" with window size = 5
|
| 572 |
+
- Result: Find which 5-residue window in query best matches reference
|
| 573 |
+
|
| 574 |
+
### Output
|
| 575 |
+
- RMSD values for all comparisons
|
| 576 |
+
- Interactive 3D visualization of aligned structures
|
| 577 |
+
- Rotation and translation matrices
|
| 578 |
+
- Sequence information for compared regions
|
| 579 |
+
""")
|
| 580 |
+
|
| 581 |
+
return
|
| 582 |
+
|
| 583 |
+
# Create temporary directory for file processing
|
| 584 |
+
temp_dir = tempfile.mkdtemp()
|
| 585 |
+
|
| 586 |
+
# Save uploaded files
|
| 587 |
+
for file in reference_files:
|
| 588 |
+
save_uploaded_file(file, temp_dir)
|
| 589 |
+
for file in query_files:
|
| 590 |
+
save_uploaded_file(file, temp_dir)
|
| 591 |
+
|
| 592 |
+
# Display file info
|
| 593 |
+
st.markdown("---")
|
| 594 |
+
col1, col2 = st.columns(2)
|
| 595 |
+
with col1:
|
| 596 |
+
st.metric("Reference Motifs", len(reference_files))
|
| 597 |
+
with col2:
|
| 598 |
+
st.metric("Query Motifs", len(query_files))
|
| 599 |
+
|
| 600 |
+
# Structure preview and selection
|
| 601 |
+
st.markdown("---")
|
| 602 |
+
|
| 603 |
+
# Get residue selections for reference and query sets
|
| 604 |
+
ref_selections = display_structure_selector(reference_files, temp_dir, "Reference")
|
| 605 |
+
|
| 606 |
+
st.markdown("---")
|
| 607 |
+
|
| 608 |
+
query_selections = display_structure_selector(query_files, temp_dir, "Query")
|
| 609 |
+
|
| 610 |
+
# Validate selections
|
| 611 |
+
st.markdown("---")
|
| 612 |
+
valid_selections = True
|
| 613 |
+
min_residues = 2
|
| 614 |
+
|
| 615 |
+
for filename, indices in ref_selections.items():
|
| 616 |
+
if len(indices) < min_residues:
|
| 617 |
+
st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
|
| 618 |
+
valid_selections = False
|
| 619 |
+
|
| 620 |
+
for filename, indices in query_selections.items():
|
| 621 |
+
if len(indices) < min_residues:
|
| 622 |
+
st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
|
| 623 |
+
valid_selections = False
|
| 624 |
+
|
| 625 |
+
# Check if all selections have the same number of residues
|
| 626 |
+
ref_lengths = set(len(indices) for indices in ref_selections.values())
|
| 627 |
+
query_lengths = set(len(indices) for indices in query_selections.values())
|
| 628 |
+
all_lengths = ref_lengths.union(query_lengths)
|
| 629 |
+
|
| 630 |
+
if len(all_lengths) > 1:
|
| 631 |
+
st.warning(f"⚠️ Selected regions have different sizes: {sorted(all_lengths)} residues. Only structures with matching sizes will be compared.")
|
| 632 |
+
|
| 633 |
+
# Run analysis button
|
| 634 |
+
st.sidebar.markdown("---")
|
| 635 |
+
st.sidebar.subheader("2️⃣ Comparison Method")
|
| 636 |
+
|
| 637 |
+
comparison_mode = st.sidebar.radio(
|
| 638 |
+
"How to compare structures?",
|
| 639 |
+
["Direct comparison (same size)", "Window-based comparison (different sizes)"],
|
| 640 |
+
help="""
|
| 641 |
+
Direct: Compare selected regions directly (must have same size)
|
| 642 |
+
Window-based: Generate sliding windows for flexible comparison
|
| 643 |
+
"""
|
| 644 |
+
)
|
| 645 |
+
|
| 646 |
+
window_size = None
|
| 647 |
+
window_type = None
|
| 648 |
+
|
| 649 |
+
if comparison_mode == "Window-based comparison (different sizes)":
|
| 650 |
+
st.sidebar.markdown("**Window Configuration**")
|
| 651 |
+
|
| 652 |
+
window_size = st.sidebar.number_input(
|
| 653 |
+
"Window Size",
|
| 654 |
+
min_value=2,
|
| 655 |
+
max_value=20,
|
| 656 |
+
value=4,
|
| 657 |
+
step=1,
|
| 658 |
+
help="Number of residues per comparison window"
|
| 659 |
+
)
|
| 660 |
+
|
| 661 |
+
window_type = st.sidebar.radio(
|
| 662 |
+
"Window Type",
|
| 663 |
+
["contiguous", "non-contiguous"],
|
| 664 |
+
help="Contiguous: sliding windows. Non-contiguous: all combinations"
|
| 665 |
+
)
|
| 666 |
+
|
| 667 |
+
st.sidebar.markdown("---")
|
| 668 |
+
st.sidebar.subheader("3️⃣ Run Analysis")
|
| 669 |
+
|
| 670 |
+
if st.sidebar.button("🚀 Run Analysis", type="primary", disabled=not valid_selections):
|
| 671 |
+
if not valid_selections:
|
| 672 |
+
st.error("Please fix selection errors before running analysis")
|
| 673 |
+
return
|
| 674 |
+
|
| 675 |
+
with st.spinner("Analyzing structures..."):
|
| 676 |
+
if comparison_mode == "Direct comparison (same size)":
|
| 677 |
+
results_df = compare_structures_with_selection(
|
| 678 |
+
reference_files,
|
| 679 |
+
query_files,
|
| 680 |
+
ref_selections,
|
| 681 |
+
query_selections,
|
| 682 |
+
temp_dir
|
| 683 |
+
)
|
| 684 |
+
else: # Window-based comparison
|
| 685 |
+
results_df = compare_structures_with_windows(
|
| 686 |
+
reference_files,
|
| 687 |
+
query_files,
|
| 688 |
+
ref_selections,
|
| 689 |
+
query_selections,
|
| 690 |
+
window_size,
|
| 691 |
+
window_type,
|
| 692 |
+
temp_dir
|
| 693 |
+
)
|
| 694 |
+
|
| 695 |
+
# Store results in session state
|
| 696 |
+
st.session_state['results_df'] = results_df
|
| 697 |
+
st.session_state['ref_selections'] = ref_selections
|
| 698 |
+
st.session_state['query_selections'] = query_selections
|
| 699 |
+
st.session_state['comparison_mode'] = comparison_mode
|
| 700 |
+
|
| 701 |
+
if len(results_df) > 0:
|
| 702 |
+
st.success(f"✅ Analysis complete! {len(results_df)} comparisons performed.")
|
| 703 |
+
else:
|
| 704 |
+
st.warning("⚠️ No comparisons could be performed. Check that structures meet comparison requirements.")
|
| 705 |
+
|
| 706 |
+
# Display results if available
|
| 707 |
+
if 'results_df' in st.session_state and len(st.session_state['results_df']) > 0:
|
| 708 |
+
results_df = st.session_state['results_df']
|
| 709 |
+
|
| 710 |
+
# Add RMSD threshold filter
|
| 711 |
+
st.sidebar.markdown("---")
|
| 712 |
+
st.sidebar.subheader("4️⃣ Filter Results")
|
| 713 |
+
rmsd_threshold = st.sidebar.slider(
|
| 714 |
+
"RMSD Threshold (Å)",
|
| 715 |
+
min_value=0.0,
|
| 716 |
+
max_value=5.0,
|
| 717 |
+
value=2.0,
|
| 718 |
+
step=0.1,
|
| 719 |
+
help="Only show results below this RMSD value"
|
| 720 |
+
)
|
| 721 |
+
|
| 722 |
+
# Show comparison mode
|
| 723 |
+
if 'comparison_mode' in st.session_state:
|
| 724 |
+
mode_display = "Direct" if "Direct" in st.session_state['comparison_mode'] else "Window-based"
|
| 725 |
+
st.sidebar.info(f"**Mode**: {mode_display}")
|
| 726 |
+
|
| 727 |
+
# Filter by threshold
|
| 728 |
+
filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold].copy()
|
| 729 |
+
|
| 730 |
+
# Summary statistics
|
| 731 |
+
st.markdown("---")
|
| 732 |
+
st.subheader("📊 Summary Statistics")
|
| 733 |
+
|
| 734 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 735 |
+
with col1:
|
| 736 |
+
st.metric("Total Comparisons", len(results_df))
|
| 737 |
+
with col2:
|
| 738 |
+
st.metric("Below Threshold", len(filtered_df))
|
| 739 |
+
with col3:
|
| 740 |
+
st.metric("Best RMSD", f"{results_df['RMSD'].min():.3f} Å")
|
| 741 |
+
with col4:
|
| 742 |
+
st.metric("Mean RMSD", f"{results_df['RMSD'].mean():.3f} Å")
|
| 743 |
+
|
| 744 |
+
# Results table
|
| 745 |
+
st.markdown("---")
|
| 746 |
+
st.subheader("🔍 Comparison Results")
|
| 747 |
+
|
| 748 |
+
# Prepare display dataframe
|
| 749 |
+
display_df = filtered_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']].copy()
|
| 750 |
+
display_df = display_df.sort_values('RMSD').reset_index(drop=True)
|
| 751 |
+
display_df['RMSD'] = display_df['RMSD'].round(3)
|
| 752 |
+
|
| 753 |
+
# Display with selection
|
| 754 |
+
st.dataframe(
|
| 755 |
+
display_df,
|
| 756 |
+
use_container_width=True,
|
| 757 |
+
height=300
|
| 758 |
+
)
|
| 759 |
+
|
| 760 |
+
# Structure selection for visualization
|
| 761 |
+
st.markdown("---")
|
| 762 |
+
st.subheader("🔬 3D Structure Visualization")
|
| 763 |
+
|
| 764 |
+
if len(filtered_df) > 0:
|
| 765 |
+
# Select a comparison to visualize
|
| 766 |
+
selected_idx = st.selectbox(
|
| 767 |
+
"Select a comparison to visualize:",
|
| 768 |
+
range(len(filtered_df)),
|
| 769 |
+
format_func=lambda i: f"{filtered_df.iloc[i]['Reference']}{filtered_df.iloc[i]['Ref_Residues']} ({filtered_df.iloc[i]['Ref_Sequence']}) vs {filtered_df.iloc[i]['Query']}{filtered_df.iloc[i]['Query_Residues']} ({filtered_df.iloc[i]['Query_Sequence']}) | RMSD: {filtered_df.iloc[i]['RMSD']:.3f} Å"
|
| 770 |
+
)
|
| 771 |
+
|
| 772 |
+
selected_row = filtered_df.iloc[selected_idx]
|
| 773 |
+
|
| 774 |
+
# Display RMSD info
|
| 775 |
+
st.info(f"**RMSD: {selected_row['RMSD']:.3f} Å** ({selected_row['Num_Residues']} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})")
|
| 776 |
+
|
| 777 |
+
# Create visualization - wider display
|
| 778 |
+
col1, col2, col3 = st.columns([0.5, 4, 0.5])
|
| 779 |
+
|
| 780 |
+
with col2:
|
| 781 |
+
try:
|
| 782 |
+
viz_html = create_structure_visualization(
|
| 783 |
+
selected_row['Ref_Path'],
|
| 784 |
+
selected_row['Query_Path'],
|
| 785 |
+
selected_row['Ref_Indices'],
|
| 786 |
+
selected_row['Query_Indices'],
|
| 787 |
+
selected_row['Rotation_Matrix'],
|
| 788 |
+
selected_row['Ref_COM'],
|
| 789 |
+
selected_row['Query_COM'],
|
| 790 |
+
selected_row['RMSD']
|
| 791 |
+
)
|
| 792 |
+
st.components.v1.html(viz_html, height=700, scrolling=False)
|
| 793 |
+
except Exception as e:
|
| 794 |
+
st.error(f"Error creating visualization: {str(e)}")
|
| 795 |
+
|
| 796 |
+
# Show transformation details
|
| 797 |
+
with st.expander("🔧 Transformation Details"):
|
| 798 |
+
col1, col2 = st.columns(2)
|
| 799 |
+
|
| 800 |
+
with col1:
|
| 801 |
+
st.markdown("**Rotation Matrix (U):**")
|
| 802 |
+
st.dataframe(
|
| 803 |
+
pd.DataFrame(selected_row['Rotation_Matrix']).round(4),
|
| 804 |
+
use_container_width=True
|
| 805 |
+
)
|
| 806 |
+
|
| 807 |
+
with col2:
|
| 808 |
+
st.markdown("**Translation Vectors:**")
|
| 809 |
+
st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]")
|
| 810 |
+
st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]")
|
| 811 |
+
|
| 812 |
+
# Download aligned structures
|
| 813 |
+
with st.expander("💾 Download Structure Files"):
|
| 814 |
+
st.markdown("**Download extracted and aligned structures for external visualization**")
|
| 815 |
+
|
| 816 |
+
from visualization import extract_window_pdb, transform_pdb_string
|
| 817 |
+
|
| 818 |
+
# Extract reference window
|
| 819 |
+
ref_pdb = extract_window_pdb(
|
| 820 |
+
selected_row['Ref_Path'],
|
| 821 |
+
selected_row['Ref_Indices']
|
| 822 |
+
)
|
| 823 |
+
|
| 824 |
+
# Extract and transform query window
|
| 825 |
+
query_pdb = extract_window_pdb(
|
| 826 |
+
selected_row['Query_Path'],
|
| 827 |
+
selected_row['Query_Indices']
|
| 828 |
+
)
|
| 829 |
+
|
| 830 |
+
query_aligned_pdb = transform_pdb_string(
|
| 831 |
+
query_pdb,
|
| 832 |
+
selected_row['Rotation_Matrix'],
|
| 833 |
+
selected_row['Query_COM'],
|
| 834 |
+
selected_row['Ref_COM']
|
| 835 |
+
)
|
| 836 |
+
|
| 837 |
+
col1, col2, col3 = st.columns(3)
|
| 838 |
+
|
| 839 |
+
with col1:
|
| 840 |
+
# Reference structure
|
| 841 |
+
ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Indices']]))}.pdb"
|
| 842 |
+
st.download_button(
|
| 843 |
+
label="📥 Reference PDB",
|
| 844 |
+
data=ref_pdb,
|
| 845 |
+
file_name=ref_filename,
|
| 846 |
+
mime="chemical/x-pdb",
|
| 847 |
+
help="Original reference structure (selected residues only)"
|
| 848 |
+
)
|
| 849 |
+
|
| 850 |
+
with col2:
|
| 851 |
+
# Query structure (original position)
|
| 852 |
+
query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
|
| 853 |
+
st.download_button(
|
| 854 |
+
label="📥 Query PDB (Original)",
|
| 855 |
+
data=query_pdb,
|
| 856 |
+
file_name=query_filename,
|
| 857 |
+
mime="chemical/x-pdb",
|
| 858 |
+
help="Original query structure (selected residues only)"
|
| 859 |
+
)
|
| 860 |
+
|
| 861 |
+
with col3:
|
| 862 |
+
# Query structure (aligned)
|
| 863 |
+
query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
|
| 864 |
+
st.download_button(
|
| 865 |
+
label="📥 Query PDB (Aligned)",
|
| 866 |
+
data=query_aligned_pdb,
|
| 867 |
+
file_name=query_aligned_filename,
|
| 868 |
+
mime="chemical/x-pdb",
|
| 869 |
+
help="Query structure aligned to reference"
|
| 870 |
+
)
|
| 871 |
+
|
| 872 |
+
st.info("💡 **Tip:** Load reference and aligned query together in PyMOL/Chimera to examine the superposition")
|
| 873 |
+
else:
|
| 874 |
+
st.warning("No comparisons below the RMSD threshold. Try increasing the threshold.")
|
| 875 |
+
|
| 876 |
+
# Download results
|
| 877 |
+
st.markdown("---")
|
| 878 |
+
st.subheader("💾 Export Results")
|
| 879 |
+
|
| 880 |
+
col1, col2 = st.columns(2)
|
| 881 |
+
|
| 882 |
+
with col1:
|
| 883 |
+
st.markdown("**Export Results Table**")
|
| 884 |
+
# Prepare CSV - make sure all columns exist
|
| 885 |
+
export_columns = ['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']
|
| 886 |
+
export_df = results_df[export_columns].copy()
|
| 887 |
+
export_df = export_df.sort_values('RMSD').reset_index(drop=True)
|
| 888 |
+
|
| 889 |
+
csv = export_df.to_csv(index=False)
|
| 890 |
+
st.download_button(
|
| 891 |
+
label="📥 Download Results (CSV)",
|
| 892 |
+
data=csv,
|
| 893 |
+
file_name="rna_motif_comparison_results.csv",
|
| 894 |
+
mime="text/csv"
|
| 895 |
+
)
|
| 896 |
+
|
| 897 |
+
with col2:
|
| 898 |
+
st.markdown("**Export All Aligned Structures**")
|
| 899 |
+
if st.button("📦 Generate PDB Archive", help="Create a ZIP file with all aligned structure pairs"):
|
| 900 |
+
with st.spinner("Generating PDB files..."):
|
| 901 |
+
import zipfile
|
| 902 |
+
import io
|
| 903 |
+
from visualization import extract_window_pdb, transform_pdb_string
|
| 904 |
+
|
| 905 |
+
# Create ZIP file in memory
|
| 906 |
+
zip_buffer = io.BytesIO()
|
| 907 |
+
|
| 908 |
+
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 909 |
+
# Process each comparison
|
| 910 |
+
for idx, row in filtered_df.iterrows():
|
| 911 |
+
# Create a directory name for this comparison
|
| 912 |
+
comp_name = f"comparison_{idx:03d}_rmsd_{row['RMSD']:.3f}"
|
| 913 |
+
|
| 914 |
+
# Extract reference
|
| 915 |
+
ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Indices'])
|
| 916 |
+
ref_filename = f"{comp_name}/reference_{row['Reference'].replace('.pdb', '')}.pdb"
|
| 917 |
+
zip_file.writestr(ref_filename, ref_pdb)
|
| 918 |
+
|
| 919 |
+
# Extract query (original)
|
| 920 |
+
query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Indices'])
|
| 921 |
+
query_filename = f"{comp_name}/query_original_{row['Query'].replace('.pdb', '')}.pdb"
|
| 922 |
+
zip_file.writestr(query_filename, query_pdb)
|
| 923 |
+
|
| 924 |
+
# Extract and align query
|
| 925 |
+
query_aligned_pdb = transform_pdb_string(
|
| 926 |
+
query_pdb,
|
| 927 |
+
row['Rotation_Matrix'],
|
| 928 |
+
row['Query_COM'],
|
| 929 |
+
row['Ref_COM']
|
| 930 |
+
)
|
| 931 |
+
query_aligned_filename = f"{comp_name}/query_aligned_{row['Query'].replace('.pdb', '')}.pdb"
|
| 932 |
+
zip_file.writestr(query_aligned_filename, query_aligned_pdb)
|
| 933 |
+
|
| 934 |
+
# Add a README for this comparison
|
| 935 |
+
readme_content = f"""Comparison #{idx}
|
| 936 |
+
RMSD: {row['RMSD']:.3f} Å
|
| 937 |
+
Residues Compared: {row['Num_Residues']}
|
| 938 |
+
|
| 939 |
+
Reference:
|
| 940 |
+
File: {row['Reference']}
|
| 941 |
+
Residues: {row['Ref_Residues']}
|
| 942 |
+
Sequence: {row['Ref_Sequence']}
|
| 943 |
+
|
| 944 |
+
Query:
|
| 945 |
+
File: {row['Query']}
|
| 946 |
+
Residues: {row['Query_Residues']}
|
| 947 |
+
Sequence: {row['Query_Sequence']}
|
| 948 |
+
|
| 949 |
+
Files:
|
| 950 |
+
- reference_*.pdb: Reference structure (selected residues)
|
| 951 |
+
- query_original_*.pdb: Query structure (original position)
|
| 952 |
+
- query_aligned_*.pdb: Query structure (aligned to reference)
|
| 953 |
+
|
| 954 |
+
To visualize in PyMOL:
|
| 955 |
+
load reference_*.pdb
|
| 956 |
+
load query_aligned_*.pdb
|
| 957 |
+
|
| 958 |
+
To visualize in Chimera:
|
| 959 |
+
File → Open → Select both reference and query_aligned PDB files
|
| 960 |
+
"""
|
| 961 |
+
readme_filename = f"{comp_name}/README.txt"
|
| 962 |
+
zip_file.writestr(readme_filename, readme_content)
|
| 963 |
+
|
| 964 |
+
zip_buffer.seek(0)
|
| 965 |
+
|
| 966 |
+
st.download_button(
|
| 967 |
+
label="📥 Download PDB Archive (ZIP)",
|
| 968 |
+
data=zip_buffer.getvalue(),
|
| 969 |
+
file_name="aligned_structures.zip",
|
| 970 |
+
mime="application/zip",
|
| 971 |
+
help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs"
|
| 972 |
+
)
|
| 973 |
+
|
| 974 |
+
st.success(f"✅ Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.")
|
| 975 |
+
|
| 976 |
+
|
| 977 |
+
if __name__ == "__main__":
|
| 978 |
+
main()
|
rmsd_utils.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RMSD Calculation Utilities for RNA Structure Comparison
|
| 3 |
+
Fixed version with explicit purine-pyrimidine atom mapping
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def parse_residue_atoms(fname):
|
| 10 |
+
"""
|
| 11 |
+
Parse PDB file and organize atoms by residue.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
fname: Path to PDB file
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
List of residues, where each residue is a dict with:
|
| 18 |
+
- 'resnum': residue number
|
| 19 |
+
- 'resname': residue name (A, C, G, U)
|
| 20 |
+
- 'atoms': dict of {atom_name: [x, y, z]}
|
| 21 |
+
"""
|
| 22 |
+
with open(fname) as f:
|
| 23 |
+
content = f.readlines()
|
| 24 |
+
|
| 25 |
+
residues = {}
|
| 26 |
+
|
| 27 |
+
for line in content:
|
| 28 |
+
record = line[0:6].strip()
|
| 29 |
+
if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
|
| 30 |
+
atomname = line[12:16].strip()
|
| 31 |
+
resname = line[17:20].strip() # residue name (A, C, G, U)
|
| 32 |
+
resnum = int(line[22:26].strip()) # residue number
|
| 33 |
+
|
| 34 |
+
x = float(line[30:38].strip())
|
| 35 |
+
y = float(line[38:46].strip())
|
| 36 |
+
z = float(line[46:54].strip())
|
| 37 |
+
|
| 38 |
+
# Initialize residue if not seen before
|
| 39 |
+
if resnum not in residues:
|
| 40 |
+
residues[resnum] = {
|
| 41 |
+
'resnum': resnum,
|
| 42 |
+
'resname': resname,
|
| 43 |
+
'atoms': {}
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
residues[resnum]['atoms'][atomname] = [x, y, z]
|
| 47 |
+
|
| 48 |
+
# Convert to sorted list by residue number
|
| 49 |
+
sorted_residues = [residues[k] for k in sorted(residues.keys())]
|
| 50 |
+
|
| 51 |
+
return sorted_residues
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def get_backbone_sugar_coords_from_residue(residue):
|
| 55 |
+
"""
|
| 56 |
+
Extract backbone and sugar atom coordinates from a residue dict.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
residue: Dict with 'atoms' key containing atom coordinates
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
List of [x, y, z] coordinates in consistent order
|
| 63 |
+
"""
|
| 64 |
+
# Define the order of backbone and sugar atoms
|
| 65 |
+
backbone_sugar_atoms = ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"]
|
| 66 |
+
|
| 67 |
+
coords = []
|
| 68 |
+
atoms = residue['atoms']
|
| 69 |
+
|
| 70 |
+
for atom_name in backbone_sugar_atoms:
|
| 71 |
+
if atom_name in atoms:
|
| 72 |
+
coords.append(atoms[atom_name])
|
| 73 |
+
|
| 74 |
+
return coords
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def get_base_coords_from_residue(residue):
|
| 78 |
+
"""
|
| 79 |
+
Extract the three key base atom coordinates from a residue.
|
| 80 |
+
|
| 81 |
+
Returns list of [x, y, z] coordinates in the correct order:
|
| 82 |
+
- For purines (A, G): N9, C8, C4
|
| 83 |
+
- For pyrimidines (C, U): N1, C2, C6
|
| 84 |
+
|
| 85 |
+
These are ordered to enable proper purine-pyrimidine mapping:
|
| 86 |
+
N9 <-> N1, C8 <-> C2, C4 <-> C6
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
residue: Dict with 'resname' and 'atoms' keys
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
List of [x, y, z] coordinates
|
| 93 |
+
"""
|
| 94 |
+
resname = residue['resname']
|
| 95 |
+
atoms = residue['atoms']
|
| 96 |
+
coords = []
|
| 97 |
+
|
| 98 |
+
if resname in ['A', 'G']: # Purines
|
| 99 |
+
base_atoms = ['N9', 'C8', 'C4']
|
| 100 |
+
elif resname in ['C', 'U']: # Pyrimidines
|
| 101 |
+
base_atoms = ['N1', 'C2', 'C6']
|
| 102 |
+
else:
|
| 103 |
+
# Unknown residue type
|
| 104 |
+
return coords
|
| 105 |
+
|
| 106 |
+
for atom_name in base_atoms:
|
| 107 |
+
if atom_name in atoms:
|
| 108 |
+
coords.append(atoms[atom_name])
|
| 109 |
+
|
| 110 |
+
return coords
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def get_backbone_sugar_and_selectbase_coords_fixed(fname):
|
| 114 |
+
"""
|
| 115 |
+
Extract backbone, sugar, and select base atom coordinates.
|
| 116 |
+
Ensures proper ordering for purine-pyrimidine mapping.
|
| 117 |
+
|
| 118 |
+
For each residue, extracts:
|
| 119 |
+
1. All backbone and sugar atoms (in consistent order)
|
| 120 |
+
2. Three base atoms:
|
| 121 |
+
- Purines (A, G): N9, C8, C4
|
| 122 |
+
- Pyrimidines (C, U): N1, C2, C6
|
| 123 |
+
|
| 124 |
+
This ordering ensures that when comparing structures with different sequences,
|
| 125 |
+
the atoms are correctly mapped (N9<->N1, C8<->C2, C4<->C6).
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
fname: Path to PDB file
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
Numpy array of coordinates
|
| 132 |
+
"""
|
| 133 |
+
residues = parse_residue_atoms(fname)
|
| 134 |
+
|
| 135 |
+
all_coords = []
|
| 136 |
+
|
| 137 |
+
for residue in residues:
|
| 138 |
+
# Get backbone and sugar coordinates
|
| 139 |
+
backbone_coords = get_backbone_sugar_coords_from_residue(residue)
|
| 140 |
+
all_coords.extend(backbone_coords)
|
| 141 |
+
|
| 142 |
+
# Get base coordinates
|
| 143 |
+
base_coords = get_base_coords_from_residue(residue)
|
| 144 |
+
all_coords.extend(base_coords)
|
| 145 |
+
|
| 146 |
+
return np.asarray(all_coords)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def calculate_COM(coords):
|
| 150 |
+
"""
|
| 151 |
+
Calculate center of mass (geometric center) of coordinates.
|
| 152 |
+
|
| 153 |
+
Args:
|
| 154 |
+
coords: Numpy array of shape (N, 3)
|
| 155 |
+
|
| 156 |
+
Returns:
|
| 157 |
+
Numpy array of shape (3,) representing the center of mass
|
| 158 |
+
"""
|
| 159 |
+
L = coords.shape[0]
|
| 160 |
+
COM = np.sum(coords, axis=0) / float(L)
|
| 161 |
+
return COM
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def calculate_rotation_rmsd(coords1, coords2, COM1, COM2):
|
| 165 |
+
"""
|
| 166 |
+
Calculate rotation matrix and RMSD using Kabsch algorithm.
|
| 167 |
+
|
| 168 |
+
Args:
|
| 169 |
+
coords1: Coordinates of structure 1 (N, 3)
|
| 170 |
+
coords2: Coordinates of structure 2 (N, 3)
|
| 171 |
+
COM1: Center of mass of structure 1 (3,)
|
| 172 |
+
COM2: Center of mass of structure 2 (3,)
|
| 173 |
+
|
| 174 |
+
Returns:
|
| 175 |
+
U: Rotation matrix (3, 3)
|
| 176 |
+
RMSD: Root mean square deviation (float)
|
| 177 |
+
"""
|
| 178 |
+
sel1 = coords1 - COM1
|
| 179 |
+
sel2 = coords2 - COM2
|
| 180 |
+
|
| 181 |
+
# Check for consistency
|
| 182 |
+
if len(sel1) != len(sel2):
|
| 183 |
+
return None, None
|
| 184 |
+
|
| 185 |
+
L = len(sel1)
|
| 186 |
+
assert L > 0
|
| 187 |
+
|
| 188 |
+
# Initial residual, see Kabsch.
|
| 189 |
+
R0 = np.sum(np.sum(sel1 * sel1, axis=0), axis=0) + np.sum(np.sum(sel2 * sel2, axis=0), axis=0)
|
| 190 |
+
|
| 191 |
+
# Calculate the components of the rotation matrix (V,W)
|
| 192 |
+
# S is used to calculate the error (RMSD)
|
| 193 |
+
V, S, W = np.linalg.svd(np.dot(sel2.T, sel1))
|
| 194 |
+
|
| 195 |
+
# Calculate if the product of the determinants is + or -
|
| 196 |
+
# if negative reflect the rotation matrix components prior
|
| 197 |
+
# determining the rotation matrix (U)
|
| 198 |
+
reflect = float(str(float(np.linalg.det(V) * np.linalg.det(W))))
|
| 199 |
+
|
| 200 |
+
if reflect == -1.0:
|
| 201 |
+
S[-1] = -S[-1]
|
| 202 |
+
V[:, -1] = -V[:, -1]
|
| 203 |
+
|
| 204 |
+
U = np.dot(V, W)
|
| 205 |
+
|
| 206 |
+
# Calculate the RMSD using sigma from the SVD calculation above
|
| 207 |
+
RMSD = R0 - (2.0 * sum(S))
|
| 208 |
+
RMSD = np.sqrt(abs(RMSD / L))
|
| 209 |
+
|
| 210 |
+
return U, RMSD
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
def translate_rotate_coords(coords, COM, U=None):
|
| 214 |
+
"""
|
| 215 |
+
Translate and optionally rotate coordinates.
|
| 216 |
+
|
| 217 |
+
Args:
|
| 218 |
+
coords: Coordinates to transform (N, 3)
|
| 219 |
+
COM: Center of mass to translate by (3,)
|
| 220 |
+
U: Rotation matrix (3, 3), optional
|
| 221 |
+
|
| 222 |
+
Returns:
|
| 223 |
+
Transformed coordinates (N, 3)
|
| 224 |
+
"""
|
| 225 |
+
# Translate only
|
| 226 |
+
if U is None:
|
| 227 |
+
return coords - COM
|
| 228 |
+
|
| 229 |
+
# Translate and rotate
|
| 230 |
+
return np.dot((coords - COM), U)
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def get_all_atom_coords(fname):
|
| 234 |
+
"""
|
| 235 |
+
Get all atom coordinates from a PDB file.
|
| 236 |
+
|
| 237 |
+
Args:
|
| 238 |
+
fname: Path to PDB file
|
| 239 |
+
|
| 240 |
+
Returns:
|
| 241 |
+
Numpy array of coordinates (N, 3)
|
| 242 |
+
"""
|
| 243 |
+
with open(fname) as f:
|
| 244 |
+
content = f.readlines()
|
| 245 |
+
|
| 246 |
+
coords = []
|
| 247 |
+
for line in content:
|
| 248 |
+
record = line[0:6].strip()
|
| 249 |
+
if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
|
| 250 |
+
x = float(line[30:38].strip())
|
| 251 |
+
y = float(line[38:46].strip())
|
| 252 |
+
z = float(line[46:54].strip())
|
| 253 |
+
coords.append([x, y, z])
|
| 254 |
+
|
| 255 |
+
return np.asarray(coords)
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def apply_transformation_to_pdb(fname, U, COM, output_fname):
|
| 259 |
+
"""
|
| 260 |
+
Apply rotation and translation to a PDB file and save result.
|
| 261 |
+
|
| 262 |
+
Args:
|
| 263 |
+
fname: Input PDB file path
|
| 264 |
+
U: Rotation matrix (3, 3)
|
| 265 |
+
COM: Center of mass to translate from (3,)
|
| 266 |
+
output_fname: Output PDB file path
|
| 267 |
+
"""
|
| 268 |
+
with open(fname) as f:
|
| 269 |
+
lines = f.readlines()
|
| 270 |
+
|
| 271 |
+
with open(output_fname, 'w') as f:
|
| 272 |
+
for line in lines:
|
| 273 |
+
record = line[0:6].strip()
|
| 274 |
+
if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
|
| 275 |
+
# Extract coordinates
|
| 276 |
+
x = float(line[30:38].strip())
|
| 277 |
+
y = float(line[38:46].strip())
|
| 278 |
+
z = float(line[46:54].strip())
|
| 279 |
+
|
| 280 |
+
# Transform
|
| 281 |
+
coord = np.array([x, y, z])
|
| 282 |
+
new_coord = np.dot((coord - COM), U)
|
| 283 |
+
|
| 284 |
+
# Write transformed line
|
| 285 |
+
new_line = (
|
| 286 |
+
line[:30] +
|
| 287 |
+
f"{new_coord[0]:8.3f}" +
|
| 288 |
+
f"{new_coord[1]:8.3f}" +
|
| 289 |
+
f"{new_coord[2]:8.3f}" +
|
| 290 |
+
line[54:]
|
| 291 |
+
)
|
| 292 |
+
f.write(new_line)
|
| 293 |
+
else:
|
| 294 |
+
f.write(line)
|
visualization.py
ADDED
|
@@ -0,0 +1,673 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
3D Visualization Module for RNA Structure Comparison
|
| 3 |
+
Uses py3Dmol for interactive molecular visualization
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
from rmsd_utils import (
|
| 8 |
+
parse_residue_atoms,
|
| 9 |
+
translate_rotate_coords,
|
| 10 |
+
calculate_COM,
|
| 11 |
+
get_backbone_sugar_and_selectbase_coords_fixed
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def create_structure_visualization(ref_path, query_path, ref_window_indices, query_window_indices,
|
| 16 |
+
rotation_matrix, ref_com, query_com, rmsd=None):
|
| 17 |
+
"""
|
| 18 |
+
Create an interactive 3D visualization of aligned structures.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
ref_path: Path to reference motif PDB file
|
| 22 |
+
query_path: Path to query motif PDB file
|
| 23 |
+
ref_window_indices: List of residue indices for the reference window
|
| 24 |
+
query_window_indices: List of residue indices for the query window
|
| 25 |
+
rotation_matrix: Rotation matrix from RMSD calculation
|
| 26 |
+
ref_com: Center of mass of reference window
|
| 27 |
+
query_com: Center of mass of query window
|
| 28 |
+
rmsd: RMSD value (optional, for display)
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
HTML string containing the py3Dmol visualization
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
# Read PDB files
|
| 35 |
+
with open(ref_path) as f:
|
| 36 |
+
ref_pdb = f.read()
|
| 37 |
+
|
| 38 |
+
with open(query_path) as f:
|
| 39 |
+
query_pdb_full = f.read()
|
| 40 |
+
|
| 41 |
+
# Extract only the window residues from both structures
|
| 42 |
+
ref_residues = parse_residue_atoms(ref_path)
|
| 43 |
+
query_residues = parse_residue_atoms(query_path)
|
| 44 |
+
|
| 45 |
+
ref_window_pdb = extract_window_pdb(ref_path, ref_window_indices)
|
| 46 |
+
query_window_pdb = extract_window_pdb(query_path, query_window_indices)
|
| 47 |
+
|
| 48 |
+
# Parse window coordinates for transformation
|
| 49 |
+
from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
|
| 50 |
+
|
| 51 |
+
ref_window_coords = []
|
| 52 |
+
for idx in ref_window_indices:
|
| 53 |
+
if idx < len(ref_residues):
|
| 54 |
+
residue = ref_residues[idx]
|
| 55 |
+
backbone_coords = get_backbone_sugar_coords_from_residue(residue)
|
| 56 |
+
ref_window_coords.extend(backbone_coords)
|
| 57 |
+
base_coords = get_base_coords_from_residue(residue)
|
| 58 |
+
ref_window_coords.extend(base_coords)
|
| 59 |
+
ref_window_coords = np.asarray(ref_window_coords)
|
| 60 |
+
|
| 61 |
+
query_window_coords = []
|
| 62 |
+
for idx in query_window_indices:
|
| 63 |
+
if idx < len(query_residues):
|
| 64 |
+
residue = query_residues[idx]
|
| 65 |
+
backbone_coords = get_backbone_sugar_coords_from_residue(residue)
|
| 66 |
+
query_window_coords.extend(backbone_coords)
|
| 67 |
+
base_coords = get_base_coords_from_residue(residue)
|
| 68 |
+
query_window_coords.extend(base_coords)
|
| 69 |
+
query_window_coords = np.asarray(query_window_coords)
|
| 70 |
+
|
| 71 |
+
# Transform query window to align with reference window
|
| 72 |
+
# Proper alignment: translate to origin, rotate, translate to reference position
|
| 73 |
+
# Note: We need both query_com and ref_com for proper alignment
|
| 74 |
+
transformed_query_pdb = transform_pdb_string(
|
| 75 |
+
query_window_pdb,
|
| 76 |
+
rotation_matrix,
|
| 77 |
+
query_com,
|
| 78 |
+
ref_com # Add reference COM for proper alignment
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
# Create py3Dmol visualization
|
| 82 |
+
html = f"""
|
| 83 |
+
<!DOCTYPE html>
|
| 84 |
+
<html>
|
| 85 |
+
<head>
|
| 86 |
+
<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
|
| 87 |
+
<style>
|
| 88 |
+
#container {{
|
| 89 |
+
width: 100%;
|
| 90 |
+
height: 700px;
|
| 91 |
+
position: relative;
|
| 92 |
+
border: 1px solid #ddd;
|
| 93 |
+
}}
|
| 94 |
+
.control-panel {{
|
| 95 |
+
position: absolute;
|
| 96 |
+
top: 10px;
|
| 97 |
+
right: 10px;
|
| 98 |
+
background: rgba(255, 255, 255, 0.95);
|
| 99 |
+
padding: 15px;
|
| 100 |
+
border-radius: 8px;
|
| 101 |
+
font-family: Arial, sans-serif;
|
| 102 |
+
font-size: 13px;
|
| 103 |
+
z-index: 1000;
|
| 104 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 105 |
+
max-width: 220px;
|
| 106 |
+
}}
|
| 107 |
+
.control-panel h4 {{
|
| 108 |
+
margin: 0 0 10px 0;
|
| 109 |
+
font-size: 14px;
|
| 110 |
+
color: #333;
|
| 111 |
+
}}
|
| 112 |
+
.control-section {{
|
| 113 |
+
margin-bottom: 12px;
|
| 114 |
+
padding-bottom: 12px;
|
| 115 |
+
border-bottom: 1px solid #eee;
|
| 116 |
+
}}
|
| 117 |
+
.control-section:last-child {{
|
| 118 |
+
border-bottom: none;
|
| 119 |
+
margin-bottom: 0;
|
| 120 |
+
}}
|
| 121 |
+
.control-section label {{
|
| 122 |
+
display: block;
|
| 123 |
+
margin: 6px 0;
|
| 124 |
+
cursor: pointer;
|
| 125 |
+
}}
|
| 126 |
+
.control-section input[type="checkbox"] {{
|
| 127 |
+
margin-right: 8px;
|
| 128 |
+
}}
|
| 129 |
+
.control-section select {{
|
| 130 |
+
width: 100%;
|
| 131 |
+
padding: 4px;
|
| 132 |
+
margin-top: 5px;
|
| 133 |
+
border: 1px solid #ccc;
|
| 134 |
+
border-radius: 4px;
|
| 135 |
+
}}
|
| 136 |
+
.legend {{
|
| 137 |
+
position: absolute;
|
| 138 |
+
top: 10px;
|
| 139 |
+
left: 10px;
|
| 140 |
+
background: rgba(255, 255, 255, 0.95);
|
| 141 |
+
padding: 15px;
|
| 142 |
+
border-radius: 8px;
|
| 143 |
+
font-family: Arial, sans-serif;
|
| 144 |
+
font-size: 13px;
|
| 145 |
+
z-index: 1000;
|
| 146 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 147 |
+
}}
|
| 148 |
+
.legend h4 {{
|
| 149 |
+
margin: 0 0 10px 0;
|
| 150 |
+
font-size: 14px;
|
| 151 |
+
color: #333;
|
| 152 |
+
}}
|
| 153 |
+
.legend-item {{
|
| 154 |
+
margin: 6px 0;
|
| 155 |
+
display: flex;
|
| 156 |
+
align-items: center;
|
| 157 |
+
}}
|
| 158 |
+
.color-box {{
|
| 159 |
+
width: 24px;
|
| 160 |
+
height: 16px;
|
| 161 |
+
margin-right: 10px;
|
| 162 |
+
border: 1px solid #333;
|
| 163 |
+
border-radius: 2px;
|
| 164 |
+
}}
|
| 165 |
+
.rmsd-info {{
|
| 166 |
+
position: absolute;
|
| 167 |
+
bottom: 10px;
|
| 168 |
+
left: 10px;
|
| 169 |
+
background: rgba(255, 255, 255, 0.95);
|
| 170 |
+
padding: 10px 15px;
|
| 171 |
+
border-radius: 8px;
|
| 172 |
+
font-family: Arial, sans-serif;
|
| 173 |
+
font-size: 13px;
|
| 174 |
+
z-index: 1000;
|
| 175 |
+
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
| 176 |
+
}}
|
| 177 |
+
.section-title {{
|
| 178 |
+
font-weight: bold;
|
| 179 |
+
color: #555;
|
| 180 |
+
margin-bottom: 5px;
|
| 181 |
+
font-size: 12px;
|
| 182 |
+
text-transform: uppercase;
|
| 183 |
+
}}
|
| 184 |
+
</style>
|
| 185 |
+
</head>
|
| 186 |
+
<body>
|
| 187 |
+
<div id="container"></div>
|
| 188 |
+
|
| 189 |
+
<div class="legend">
|
| 190 |
+
<h4>🧬 Structures</h4>
|
| 191 |
+
<div class="legend-item">
|
| 192 |
+
<div class="color-box" style="background: #4A90E2;"></div>
|
| 193 |
+
<span>Reference</span>
|
| 194 |
+
</div>
|
| 195 |
+
<div class="legend-item">
|
| 196 |
+
<div class="color-box" style="background: #E94B3C;"></div>
|
| 197 |
+
<span>Query (Aligned)</span>
|
| 198 |
+
</div>
|
| 199 |
+
</div>
|
| 200 |
+
|
| 201 |
+
<div class="control-panel">
|
| 202 |
+
<h4>⚙️ Display Options</h4>
|
| 203 |
+
|
| 204 |
+
<div class="control-section">
|
| 205 |
+
<div class="section-title">Structures</div>
|
| 206 |
+
<label>
|
| 207 |
+
<input type="checkbox" id="showRef" checked onchange="updateDisplay()">
|
| 208 |
+
Reference
|
| 209 |
+
</label>
|
| 210 |
+
<label>
|
| 211 |
+
<input type="checkbox" id="showQuery" checked onchange="updateDisplay()">
|
| 212 |
+
Query
|
| 213 |
+
</label>
|
| 214 |
+
</div>
|
| 215 |
+
|
| 216 |
+
<div class="control-section">
|
| 217 |
+
<div class="section-title">Style</div>
|
| 218 |
+
<select id="styleMode" onchange="updateDisplay()">
|
| 219 |
+
<option value="sticks">Sticks</option>
|
| 220 |
+
<option value="cartoon">Cartoon</option>
|
| 221 |
+
<option value="spheres">Spheres</option>
|
| 222 |
+
<option value="lines">Lines</option>
|
| 223 |
+
<option value="cartoon_sticks">Cartoon + Sticks</option>
|
| 224 |
+
</select>
|
| 225 |
+
</div>
|
| 226 |
+
|
| 227 |
+
<div class="control-section">
|
| 228 |
+
<div class="section-title">Components</div>
|
| 229 |
+
<label>
|
| 230 |
+
<input type="checkbox" id="showBackbone" checked onchange="updateDisplay()">
|
| 231 |
+
Backbone/Sugar
|
| 232 |
+
</label>
|
| 233 |
+
<label>
|
| 234 |
+
<input type="checkbox" id="showBases" checked onchange="updateDisplay()">
|
| 235 |
+
Bases
|
| 236 |
+
</label>
|
| 237 |
+
</div>
|
| 238 |
+
|
| 239 |
+
<div class="control-section">
|
| 240 |
+
<div class="section-title">Labels</div>
|
| 241 |
+
<label>
|
| 242 |
+
<input type="checkbox" id="showLabels" onchange="updateDisplay()">
|
| 243 |
+
Residue Labels
|
| 244 |
+
</label>
|
| 245 |
+
<label>
|
| 246 |
+
<input type="checkbox" id="showNumbers" onchange="updateDisplay()">
|
| 247 |
+
Residue Numbers
|
| 248 |
+
</label>
|
| 249 |
+
<label>
|
| 250 |
+
<input type="checkbox" id="showAtoms" onchange="updateDisplay()">
|
| 251 |
+
Atom Names
|
| 252 |
+
</label>
|
| 253 |
+
<select id="atomLabelMode" style="margin-top: 5px; font-size: 11px;" onchange="updateDisplay()">
|
| 254 |
+
<option value="all">All Atoms</option>
|
| 255 |
+
<option value="backbone">Backbone Only</option>
|
| 256 |
+
<option value="sidechain">Bases Only</option>
|
| 257 |
+
</select>
|
| 258 |
+
</div>
|
| 259 |
+
|
| 260 |
+
<div class="control-section">
|
| 261 |
+
<div class="section-title">Background</div>
|
| 262 |
+
<select id="bgColor" onchange="updateBackground()">
|
| 263 |
+
<option value="white">White</option>
|
| 264 |
+
<option value="black">Black</option>
|
| 265 |
+
<option value="gray">Gray</option>
|
| 266 |
+
</select>
|
| 267 |
+
</div>
|
| 268 |
+
</div>
|
| 269 |
+
|
| 270 |
+
<div class="rmsd-info">
|
| 271 |
+
<strong>RMSD:</strong> <span style="color: #E94B3C; font-weight: bold;">{f"{rmsd:.3f}" if rmsd is not None else "N/A"} Å</span>
|
| 272 |
+
</div>
|
| 273 |
+
|
| 274 |
+
<script>
|
| 275 |
+
let viewer = null;
|
| 276 |
+
let refModel = null;
|
| 277 |
+
let queryModel = null;
|
| 278 |
+
const refPDB = `{ref_window_pdb}`;
|
| 279 |
+
const queryPDB = `{transformed_query_pdb}`;
|
| 280 |
+
|
| 281 |
+
// RNA backbone atoms
|
| 282 |
+
const backboneAtoms = ['P', 'OP1', 'OP2', "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"];
|
| 283 |
+
|
| 284 |
+
function initViewer() {{
|
| 285 |
+
try {{
|
| 286 |
+
viewer = $3Dmol.createViewer("container", {{
|
| 287 |
+
backgroundColor: 'white'
|
| 288 |
+
}});
|
| 289 |
+
|
| 290 |
+
if (!refPDB || refPDB.length < 10) {{
|
| 291 |
+
throw new Error("Reference PDB data is empty");
|
| 292 |
+
}}
|
| 293 |
+
|
| 294 |
+
if (!queryPDB || queryPDB.length < 10) {{
|
| 295 |
+
throw new Error("Query PDB data is empty");
|
| 296 |
+
}}
|
| 297 |
+
|
| 298 |
+
updateDisplay();
|
| 299 |
+
viewer.zoomTo();
|
| 300 |
+
viewer.render();
|
| 301 |
+
|
| 302 |
+
}} catch (error) {{
|
| 303 |
+
console.error("Error initializing viewer:", error);
|
| 304 |
+
document.getElementById("container").innerHTML =
|
| 305 |
+
'<div style="padding: 20px; color: red; text-align: center;">Error loading visualization: ' + error.message + '</div>';
|
| 306 |
+
}}
|
| 307 |
+
}}
|
| 308 |
+
|
| 309 |
+
function updateBackground() {{
|
| 310 |
+
const bgColor = document.getElementById('bgColor').value;
|
| 311 |
+
viewer.setBackgroundColor(bgColor);
|
| 312 |
+
viewer.render();
|
| 313 |
+
}}
|
| 314 |
+
|
| 315 |
+
function updateDisplay() {{
|
| 316 |
+
if (!viewer) return;
|
| 317 |
+
|
| 318 |
+
try {{
|
| 319 |
+
// Clear everything
|
| 320 |
+
viewer.removeAllModels();
|
| 321 |
+
viewer.removeAllLabels();
|
| 322 |
+
|
| 323 |
+
const showRef = document.getElementById('showRef').checked;
|
| 324 |
+
const showQuery = document.getElementById('showQuery').checked;
|
| 325 |
+
const showBackbone = document.getElementById('showBackbone').checked;
|
| 326 |
+
const showBases = document.getElementById('showBases').checked;
|
| 327 |
+
const showLabels = document.getElementById('showLabels').checked;
|
| 328 |
+
const showNumbers = document.getElementById('showNumbers').checked;
|
| 329 |
+
const showAtoms = document.getElementById('showAtoms').checked;
|
| 330 |
+
const styleMode = document.getElementById('styleMode').value;
|
| 331 |
+
|
| 332 |
+
// Reference structure (blue)
|
| 333 |
+
if (showRef) {{
|
| 334 |
+
refModel = viewer.addModel(refPDB, "pdb");
|
| 335 |
+
applyStyle(refModel, '#4A90E2', '#5BA3F5', styleMode, showBackbone, showBases);
|
| 336 |
+
|
| 337 |
+
if (showLabels || showNumbers) {{
|
| 338 |
+
addResidueLabels(refModel, '#4A90E2', showLabels, showNumbers);
|
| 339 |
+
}}
|
| 340 |
+
if (showAtoms) {{
|
| 341 |
+
addAtomLabels(refModel, '#4A90E2');
|
| 342 |
+
}}
|
| 343 |
+
}}
|
| 344 |
+
|
| 345 |
+
// Query structure (red)
|
| 346 |
+
if (showQuery) {{
|
| 347 |
+
queryModel = viewer.addModel(queryPDB, "pdb");
|
| 348 |
+
applyStyle(queryModel, '#E94B3C', '#FF6B6B', styleMode, showBackbone, showBases);
|
| 349 |
+
|
| 350 |
+
if (showLabels || showNumbers) {{
|
| 351 |
+
addResidueLabels(queryModel, '#E94B3C', showLabels, showNumbers);
|
| 352 |
+
}}
|
| 353 |
+
if (showAtoms) {{
|
| 354 |
+
addAtomLabels(queryModel, '#E94B3C');
|
| 355 |
+
}}
|
| 356 |
+
}}
|
| 357 |
+
|
| 358 |
+
viewer.zoomTo();
|
| 359 |
+
viewer.render();
|
| 360 |
+
|
| 361 |
+
}} catch (error) {{
|
| 362 |
+
console.error("Error updating display:", error);
|
| 363 |
+
}}
|
| 364 |
+
}}
|
| 365 |
+
|
| 366 |
+
function applyStyle(model, backboneColor, baseColor, styleMode, showBackbone, showBases) {{
|
| 367 |
+
// Clear any existing styles
|
| 368 |
+
viewer.setStyle({{model: model}}, {{}});
|
| 369 |
+
|
| 370 |
+
if (styleMode === 'cartoon') {{
|
| 371 |
+
// Cartoon representation
|
| 372 |
+
viewer.setStyle({{model: model}}, {{
|
| 373 |
+
cartoon: {{
|
| 374 |
+
color: backboneColor,
|
| 375 |
+
thickness: 0.5,
|
| 376 |
+
opacity: 0.8
|
| 377 |
+
}}
|
| 378 |
+
}});
|
| 379 |
+
}} else if (styleMode === 'cartoon_sticks') {{
|
| 380 |
+
// Cartoon + sticks for bases
|
| 381 |
+
viewer.setStyle({{model: model}}, {{
|
| 382 |
+
cartoon: {{
|
| 383 |
+
color: backboneColor,
|
| 384 |
+
thickness: 0.5,
|
| 385 |
+
opacity: 0.7
|
| 386 |
+
}}
|
| 387 |
+
}});
|
| 388 |
+
if (showBases) {{
|
| 389 |
+
viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
|
| 390 |
+
stick: {{
|
| 391 |
+
color: baseColor,
|
| 392 |
+
radius: 0.15
|
| 393 |
+
}}
|
| 394 |
+
}});
|
| 395 |
+
}}
|
| 396 |
+
}} else if (styleMode === 'spheres') {{
|
| 397 |
+
// Sphere representation
|
| 398 |
+
if (showBackbone) {{
|
| 399 |
+
viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
|
| 400 |
+
sphere: {{
|
| 401 |
+
color: backboneColor,
|
| 402 |
+
radius: 0.4
|
| 403 |
+
}}
|
| 404 |
+
}});
|
| 405 |
+
}}
|
| 406 |
+
if (showBases) {{
|
| 407 |
+
viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
|
| 408 |
+
sphere: {{
|
| 409 |
+
color: baseColor,
|
| 410 |
+
radius: 0.35
|
| 411 |
+
}}
|
| 412 |
+
}});
|
| 413 |
+
}}
|
| 414 |
+
}} else if (styleMode === 'lines') {{
|
| 415 |
+
// Line representation
|
| 416 |
+
if (showBackbone) {{
|
| 417 |
+
viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
|
| 418 |
+
line: {{
|
| 419 |
+
color: backboneColor,
|
| 420 |
+
linewidth: 2
|
| 421 |
+
}}
|
| 422 |
+
}});
|
| 423 |
+
}}
|
| 424 |
+
if (showBases) {{
|
| 425 |
+
viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
|
| 426 |
+
line: {{
|
| 427 |
+
color: baseColor,
|
| 428 |
+
linewidth: 2
|
| 429 |
+
}}
|
| 430 |
+
}});
|
| 431 |
+
}}
|
| 432 |
+
}} else {{
|
| 433 |
+
// Stick representation (default)
|
| 434 |
+
if (showBackbone) {{
|
| 435 |
+
viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
|
| 436 |
+
stick: {{
|
| 437 |
+
color: backboneColor,
|
| 438 |
+
radius: 0.2
|
| 439 |
+
}},
|
| 440 |
+
sphere: {{
|
| 441 |
+
color: backboneColor,
|
| 442 |
+
radius: 0.3
|
| 443 |
+
}}
|
| 444 |
+
}});
|
| 445 |
+
}}
|
| 446 |
+
if (showBases) {{
|
| 447 |
+
viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
|
| 448 |
+
stick: {{
|
| 449 |
+
color: baseColor,
|
| 450 |
+
radius: 0.15
|
| 451 |
+
}},
|
| 452 |
+
sphere: {{
|
| 453 |
+
color: baseColor,
|
| 454 |
+
radius: 0.25
|
| 455 |
+
}}
|
| 456 |
+
}});
|
| 457 |
+
}}
|
| 458 |
+
}}
|
| 459 |
+
}}
|
| 460 |
+
|
| 461 |
+
function addResidueLabels(model, color, showLabels, showNumbers) {{
|
| 462 |
+
const atoms = viewer.selectedAtoms({{model: model}});
|
| 463 |
+
const residues = {{}};
|
| 464 |
+
|
| 465 |
+
// Group atoms by residue
|
| 466 |
+
atoms.forEach(atom => {{
|
| 467 |
+
const key = atom.chain + '_' + atom.resi;
|
| 468 |
+
if (!residues[key]) {{
|
| 469 |
+
residues[key] = atom;
|
| 470 |
+
}}
|
| 471 |
+
}});
|
| 472 |
+
|
| 473 |
+
// Add labels for each residue
|
| 474 |
+
Object.values(residues).forEach(atom => {{
|
| 475 |
+
let labelText = '';
|
| 476 |
+
if (showLabels && showNumbers) {{
|
| 477 |
+
labelText = atom.resn + atom.resi;
|
| 478 |
+
}} else if (showLabels) {{
|
| 479 |
+
labelText = atom.resn;
|
| 480 |
+
}} else if (showNumbers) {{
|
| 481 |
+
labelText = atom.resi.toString();
|
| 482 |
+
}}
|
| 483 |
+
|
| 484 |
+
if (labelText) {{
|
| 485 |
+
viewer.addLabel(labelText, {{
|
| 486 |
+
position: atom,
|
| 487 |
+
backgroundColor: color,
|
| 488 |
+
backgroundOpacity: 0.7,
|
| 489 |
+
fontColor: 'white',
|
| 490 |
+
fontSize: 11,
|
| 491 |
+
fontWeight: 'bold',
|
| 492 |
+
showBackground: true,
|
| 493 |
+
borderRadius: 3
|
| 494 |
+
}});
|
| 495 |
+
}}
|
| 496 |
+
}});
|
| 497 |
+
}}
|
| 498 |
+
|
| 499 |
+
function addAtomLabels(model, color) {{
|
| 500 |
+
const atomLabelMode = document.getElementById('atomLabelMode').value;
|
| 501 |
+
const atoms = viewer.selectedAtoms({{model: model}});
|
| 502 |
+
|
| 503 |
+
// Filter atoms based on mode
|
| 504 |
+
let filteredAtoms = atoms;
|
| 505 |
+
if (atomLabelMode === 'backbone') {{
|
| 506 |
+
// Only backbone atoms
|
| 507 |
+
filteredAtoms = atoms.filter(atom => backboneAtoms.includes(atom.atom));
|
| 508 |
+
}} else if (atomLabelMode === 'sidechain') {{
|
| 509 |
+
// Only base/sidechain atoms (not backbone)
|
| 510 |
+
filteredAtoms = atoms.filter(atom => !backboneAtoms.includes(atom.atom));
|
| 511 |
+
}}
|
| 512 |
+
// 'all' mode uses all atoms (no filtering)
|
| 513 |
+
|
| 514 |
+
// Add label for each atom
|
| 515 |
+
filteredAtoms.forEach(atom => {{
|
| 516 |
+
// Use atom name (e.g., P, C1', N1, O4, etc.)
|
| 517 |
+
const atomName = atom.atom;
|
| 518 |
+
|
| 519 |
+
viewer.addLabel(atomName, {{
|
| 520 |
+
position: atom,
|
| 521 |
+
backgroundColor: color,
|
| 522 |
+
backgroundOpacity: 0.6,
|
| 523 |
+
fontColor: 'white',
|
| 524 |
+
fontSize: 9,
|
| 525 |
+
fontWeight: 'normal',
|
| 526 |
+
showBackground: true,
|
| 527 |
+
borderRadius: 2,
|
| 528 |
+
borderThickness: 0.5
|
| 529 |
+
}});
|
| 530 |
+
}});
|
| 531 |
+
}}
|
| 532 |
+
|
| 533 |
+
// Initialize on load
|
| 534 |
+
initViewer();
|
| 535 |
+
</script>
|
| 536 |
+
</body>
|
| 537 |
+
</html>
|
| 538 |
+
"""
|
| 539 |
+
|
| 540 |
+
return html
|
| 541 |
+
|
| 542 |
+
|
| 543 |
+
def extract_window_pdb(pdb_path, window_indices):
|
| 544 |
+
"""
|
| 545 |
+
Extract specific residues from a PDB file based on window indices.
|
| 546 |
+
|
| 547 |
+
Args:
|
| 548 |
+
pdb_path: Path to PDB file
|
| 549 |
+
window_indices: List of residue indices (0-based)
|
| 550 |
+
|
| 551 |
+
Returns:
|
| 552 |
+
String containing PDB data for only the specified residues
|
| 553 |
+
"""
|
| 554 |
+
with open(pdb_path) as f:
|
| 555 |
+
lines = f.readlines()
|
| 556 |
+
|
| 557 |
+
# Get all residue numbers from the file
|
| 558 |
+
residues = parse_residue_atoms(pdb_path)
|
| 559 |
+
|
| 560 |
+
if not residues:
|
| 561 |
+
# If parsing failed, return original file
|
| 562 |
+
return ''.join(lines)
|
| 563 |
+
|
| 564 |
+
residue_numbers = [res['resnum'] for res in residues]
|
| 565 |
+
|
| 566 |
+
# Map window indices to actual residue numbers
|
| 567 |
+
target_resnums = set()
|
| 568 |
+
for idx in window_indices:
|
| 569 |
+
if idx < len(residue_numbers):
|
| 570 |
+
target_resnums.add(residue_numbers[idx])
|
| 571 |
+
|
| 572 |
+
if not target_resnums:
|
| 573 |
+
# If no valid residues, return original file
|
| 574 |
+
return ''.join(lines)
|
| 575 |
+
|
| 576 |
+
# Extract lines for these residues
|
| 577 |
+
window_lines = []
|
| 578 |
+
for line in lines:
|
| 579 |
+
if len(line) < 6:
|
| 580 |
+
continue
|
| 581 |
+
|
| 582 |
+
record = line[0:6].strip()
|
| 583 |
+
if record in ['ATOM', 'HETATM', 'HETAT']:
|
| 584 |
+
try:
|
| 585 |
+
# Handle different PDB formats
|
| 586 |
+
resnum_str = line[22:26].strip()
|
| 587 |
+
if resnum_str:
|
| 588 |
+
resnum = int(resnum_str)
|
| 589 |
+
if resnum in target_resnums:
|
| 590 |
+
window_lines.append(line)
|
| 591 |
+
except (ValueError, IndexError):
|
| 592 |
+
continue
|
| 593 |
+
elif record in ['HEADER', 'TITLE', 'MODEL', 'ENDMDL']:
|
| 594 |
+
window_lines.append(line)
|
| 595 |
+
|
| 596 |
+
# Always add END record
|
| 597 |
+
if window_lines and not any('END' in line for line in window_lines):
|
| 598 |
+
window_lines.append('END\n')
|
| 599 |
+
|
| 600 |
+
result = ''.join(window_lines)
|
| 601 |
+
|
| 602 |
+
# Debug: print info about extraction
|
| 603 |
+
if not result or len(result) < 50:
|
| 604 |
+
print(f"Warning: Empty or very small PDB extracted from {pdb_path}")
|
| 605 |
+
print(f" Window indices: {window_indices}")
|
| 606 |
+
print(f" Target residue numbers: {target_resnums}")
|
| 607 |
+
print(f" Result length: {len(result)}")
|
| 608 |
+
# Return full structure if extraction failed
|
| 609 |
+
return ''.join(lines)
|
| 610 |
+
|
| 611 |
+
return result
|
| 612 |
+
|
| 613 |
+
|
| 614 |
+
def transform_pdb_string(pdb_string, rotation_matrix, query_com, ref_com=None):
|
| 615 |
+
"""
|
| 616 |
+
Apply rotation and translation to coordinates in a PDB string to align with reference.
|
| 617 |
+
|
| 618 |
+
The transformation aligns the query structure to the reference structure:
|
| 619 |
+
1. Translate query to origin (subtract query_com)
|
| 620 |
+
2. Apply rotation matrix
|
| 621 |
+
3. Translate to reference position (add ref_com)
|
| 622 |
+
|
| 623 |
+
Args:
|
| 624 |
+
pdb_string: PDB format string
|
| 625 |
+
rotation_matrix: 3x3 rotation matrix
|
| 626 |
+
query_com: Center of mass of query structure (to translate FROM)
|
| 627 |
+
ref_com: Center of mass of reference structure (to translate TO), optional
|
| 628 |
+
|
| 629 |
+
Returns:
|
| 630 |
+
Transformed PDB string with aligned coordinates
|
| 631 |
+
"""
|
| 632 |
+
lines = pdb_string.split('\n')
|
| 633 |
+
transformed_lines = []
|
| 634 |
+
|
| 635 |
+
# If ref_com not provided, just center at origin after rotation
|
| 636 |
+
if ref_com is None:
|
| 637 |
+
ref_com = np.array([0.0, 0.0, 0.0])
|
| 638 |
+
|
| 639 |
+
for line in lines:
|
| 640 |
+
if len(line) < 54:
|
| 641 |
+
transformed_lines.append(line)
|
| 642 |
+
continue
|
| 643 |
+
|
| 644 |
+
record = line[0:6].strip()
|
| 645 |
+
if record in ['ATOM', 'HETATM', 'HETAT']:
|
| 646 |
+
# Extract coordinates
|
| 647 |
+
try:
|
| 648 |
+
x = float(line[30:38].strip())
|
| 649 |
+
y = float(line[38:46].strip())
|
| 650 |
+
z = float(line[46:54].strip())
|
| 651 |
+
|
| 652 |
+
# Transform: (coord - query_com) @ rotation_matrix + ref_com
|
| 653 |
+
# This aligns query to reference coordinate system
|
| 654 |
+
coord = np.array([x, y, z])
|
| 655 |
+
centered = coord - query_com # Move query to origin
|
| 656 |
+
rotated = np.dot(centered, rotation_matrix) # Rotate
|
| 657 |
+
new_coord = rotated + ref_com # Move to reference position
|
| 658 |
+
|
| 659 |
+
# Write transformed line
|
| 660 |
+
new_line = (
|
| 661 |
+
line[:30] +
|
| 662 |
+
f"{new_coord[0]:8.3f}" +
|
| 663 |
+
f"{new_coord[1]:8.3f}" +
|
| 664 |
+
f"{new_coord[2]:8.3f}" +
|
| 665 |
+
line[54:]
|
| 666 |
+
)
|
| 667 |
+
transformed_lines.append(new_line)
|
| 668 |
+
except (ValueError, IndexError):
|
| 669 |
+
transformed_lines.append(line)
|
| 670 |
+
else:
|
| 671 |
+
transformed_lines.append(line)
|
| 672 |
+
|
| 673 |
+
return '\n'.join(transformed_lines)
|