Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +971 -33
src/streamlit_app.py
CHANGED
|
@@ -1,40 +1,978 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import numpy as np
|
| 3 |
import pandas as pd
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
"""
|
| 7 |
-
# Welcome to Streamlit!
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
indices = np.linspace(0, 1, num_points)
|
| 20 |
-
theta = 2 * np.pi * num_turns * indices
|
| 21 |
-
radius = indices
|
| 22 |
-
|
| 23 |
-
x = radius * np.cos(theta)
|
| 24 |
-
y = radius * np.sin(theta)
|
| 25 |
-
|
| 26 |
-
df = pd.DataFrame({
|
| 27 |
-
"x": x,
|
| 28 |
-
"y": y,
|
| 29 |
-
"idx": indices,
|
| 30 |
-
"rand": np.random.randn(num_points),
|
| 31 |
-
})
|
| 32 |
-
|
| 33 |
-
st.altair_chart(alt.Chart(df, height=700, width=700)
|
| 34 |
-
.mark_point(filled=True)
|
| 35 |
-
.encode(
|
| 36 |
-
x=alt.X("x", axis=None),
|
| 37 |
-
y=alt.Y("y", axis=None),
|
| 38 |
-
color=alt.Color("idx", legend=None, scale=alt.Scale()),
|
| 39 |
-
size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
|
| 40 |
-
))
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RNA Motif Structure Comparison Tool
|
| 3 |
+
Streamlit app for comparing RNA motif structures with flexible residue selection
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import streamlit as st
|
| 7 |
import numpy as np
|
| 8 |
import pandas as pd
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import io
|
| 11 |
+
import tempfile
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
# Import our RMSD calculation functions
|
| 15 |
+
from rmsd_utils import (
|
| 16 |
+
parse_residue_atoms,
|
| 17 |
+
get_backbone_sugar_and_selectbase_coords_fixed,
|
| 18 |
+
calculate_COM,
|
| 19 |
+
calculate_rotation_rmsd,
|
| 20 |
+
translate_rotate_coords
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
from visualization import create_structure_visualization
|
| 24 |
+
|
| 25 |
+
# Page configuration
|
| 26 |
+
st.set_page_config(
|
| 27 |
+
page_title="RNA Motif Structure Comparison",
|
| 28 |
+
page_icon="π§¬",
|
| 29 |
+
layout="wide",
|
| 30 |
+
initial_sidebar_state="expanded"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Custom CSS
|
| 34 |
+
st.markdown("""
|
| 35 |
+
<style>
|
| 36 |
+
.main-header {
|
| 37 |
+
font-size: 2.5rem;
|
| 38 |
+
font-weight: bold;
|
| 39 |
+
color: #1f77b4;
|
| 40 |
+
margin-bottom: 1rem;
|
| 41 |
+
}
|
| 42 |
+
.sub-header {
|
| 43 |
+
font-size: 1.2rem;
|
| 44 |
+
color: #666;
|
| 45 |
+
margin-bottom: 2rem;
|
| 46 |
+
}
|
| 47 |
+
.metric-box {
|
| 48 |
+
background-color: #f0f2f6;
|
| 49 |
+
padding: 1rem;
|
| 50 |
+
border-radius: 0.5rem;
|
| 51 |
+
margin: 0.5rem 0;
|
| 52 |
+
}
|
| 53 |
+
</style>
|
| 54 |
+
""", unsafe_allow_html=True)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def save_uploaded_file(uploaded_file, directory):
|
| 58 |
+
"""Save an uploaded file to a temporary directory"""
|
| 59 |
+
file_path = os.path.join(directory, uploaded_file.name)
|
| 60 |
+
with open(file_path, "wb") as f:
|
| 61 |
+
f.write(uploaded_file.getbuffer())
|
| 62 |
+
return file_path
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def get_structure_info(pdb_path):
|
| 66 |
+
"""
|
| 67 |
+
Get information about a structure's residues.
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
pdb_path: Path to PDB file
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
|
| 74 |
+
"""
|
| 75 |
+
residues = parse_residue_atoms(pdb_path)
|
| 76 |
+
|
| 77 |
+
structure_info = []
|
| 78 |
+
for idx, res in enumerate(residues):
|
| 79 |
+
structure_info.append({
|
| 80 |
+
'index': idx,
|
| 81 |
+
'resnum': res['resnum'],
|
| 82 |
+
'resname': res['resname'],
|
| 83 |
+
'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})"
|
| 84 |
+
})
|
| 85 |
+
|
| 86 |
+
return structure_info
|
| 87 |
|
|
|
|
|
|
|
| 88 |
|
| 89 |
+
def display_structure_selector(files, temp_dir, set_name):
|
| 90 |
+
"""
|
| 91 |
+
Display structure information and allow users to select residues.
|
| 92 |
+
|
| 93 |
+
Args:
|
| 94 |
+
files: List of uploaded files
|
| 95 |
+
temp_dir: Temporary directory containing files
|
| 96 |
+
set_name: Name of the set (e.g., "Reference" or "Query")
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
Dict mapping filename to list of selected residue indices
|
| 100 |
+
"""
|
| 101 |
+
if not files:
|
| 102 |
+
return {}
|
| 103 |
+
|
| 104 |
+
st.subheader(f"π {set_name} Structure Preview & Selection")
|
| 105 |
+
|
| 106 |
+
selections = {}
|
| 107 |
+
|
| 108 |
+
for file in files:
|
| 109 |
+
file_path = os.path.join(temp_dir, file.name)
|
| 110 |
+
structure_info = get_structure_info(file_path)
|
| 111 |
+
|
| 112 |
+
with st.expander(f"π {file.name} ({len(structure_info)} residues)"):
|
| 113 |
+
# Display residue table
|
| 114 |
+
info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
|
| 115 |
+
info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
|
| 116 |
+
info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
|
| 117 |
+
info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
|
| 118 |
+
|
| 119 |
+
st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
|
| 120 |
+
|
| 121 |
+
# Selection method
|
| 122 |
+
selection_method = st.radio(
|
| 123 |
+
f"Selection method for {file.name}",
|
| 124 |
+
["Select by range", "Select specific residues", "Use all residues"],
|
| 125 |
+
key=f"method_{set_name}_{file.name}",
|
| 126 |
+
horizontal=True
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
selected_indices = []
|
| 130 |
+
|
| 131 |
+
if selection_method == "Select by range":
|
| 132 |
+
col1, col2 = st.columns(2)
|
| 133 |
+
with col1:
|
| 134 |
+
start_idx = st.number_input(
|
| 135 |
+
"Start index (1-based)",
|
| 136 |
+
min_value=1,
|
| 137 |
+
max_value=len(structure_info),
|
| 138 |
+
value=1,
|
| 139 |
+
key=f"start_{set_name}_{file.name}"
|
| 140 |
+
)
|
| 141 |
+
with col2:
|
| 142 |
+
end_idx = st.number_input(
|
| 143 |
+
"End index (1-based, inclusive)",
|
| 144 |
+
min_value=1,
|
| 145 |
+
max_value=len(structure_info),
|
| 146 |
+
value=min(4, len(structure_info)),
|
| 147 |
+
key=f"end_{set_name}_{file.name}"
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
if start_idx <= end_idx:
|
| 151 |
+
selected_indices = list(range(start_idx - 1, end_idx))
|
| 152 |
+
st.info(f"β Selected residues: {[i+1 for i in selected_indices]}")
|
| 153 |
+
else:
|
| 154 |
+
st.error("Start index must be β€ end index")
|
| 155 |
+
|
| 156 |
+
elif selection_method == "Select specific residues":
|
| 157 |
+
# Multi-select for specific residues
|
| 158 |
+
selected_names = st.multiselect(
|
| 159 |
+
"Select residues",
|
| 160 |
+
options=[info['full_name'] for info in structure_info],
|
| 161 |
+
default=[structure_info[i]['full_name'] for i in range(min(4, len(structure_info)))],
|
| 162 |
+
key=f"specific_{set_name}_{file.name}"
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
# Map back to indices
|
| 166 |
+
name_to_idx = {info['full_name']: info['index'] for info in structure_info}
|
| 167 |
+
selected_indices = [name_to_idx[name] for name in selected_names]
|
| 168 |
+
selected_indices.sort()
|
| 169 |
+
|
| 170 |
+
if selected_indices:
|
| 171 |
+
st.info(f"β Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
|
| 172 |
+
|
| 173 |
+
else: # Use all residues
|
| 174 |
+
selected_indices = list(range(len(structure_info)))
|
| 175 |
+
st.info(f"β Using all {len(selected_indices)} residues")
|
| 176 |
+
|
| 177 |
+
# Show selected residues details
|
| 178 |
+
if selected_indices:
|
| 179 |
+
selected_df = info_df[info_df['Index (0-based)'].isin(selected_indices)]
|
| 180 |
+
st.markdown("**Selected residues:**")
|
| 181 |
+
st.dataframe(selected_df, use_container_width=True)
|
| 182 |
+
|
| 183 |
+
selections[file.name] = selected_indices
|
| 184 |
+
|
| 185 |
+
return selections
|
| 186 |
|
| 187 |
+
|
| 188 |
+
def save_uploaded_file(uploaded_file, directory):
|
| 189 |
+
"""Save an uploaded file to a temporary directory"""
|
| 190 |
+
file_path = os.path.join(directory, uploaded_file.name)
|
| 191 |
+
with open(file_path, "wb") as f:
|
| 192 |
+
f.write(uploaded_file.getbuffer())
|
| 193 |
+
return file_path
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def extract_window_coords(residues, window_indices):
|
| 197 |
+
"""
|
| 198 |
+
Extract coordinates for a specific window of residues.
|
| 199 |
+
|
| 200 |
+
Args:
|
| 201 |
+
residues: List of all residues
|
| 202 |
+
window_indices: List of indices to extract
|
| 203 |
+
|
| 204 |
+
Returns:
|
| 205 |
+
numpy array of coordinates
|
| 206 |
+
"""
|
| 207 |
+
from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
|
| 208 |
+
|
| 209 |
+
all_coords = []
|
| 210 |
+
for idx in window_indices:
|
| 211 |
+
if idx < len(residues):
|
| 212 |
+
residue = residues[idx]
|
| 213 |
+
# Get backbone and sugar coordinates
|
| 214 |
+
backbone_coords = get_backbone_sugar_coords_from_residue(residue)
|
| 215 |
+
all_coords.extend(backbone_coords)
|
| 216 |
+
# Get base coordinates
|
| 217 |
+
base_coords = get_base_coords_from_residue(residue)
|
| 218 |
+
all_coords.extend(base_coords)
|
| 219 |
+
|
| 220 |
+
return np.asarray(all_coords)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def compare_structures_with_selection(reference_files, query_files, ref_selections, query_selections, temp_dir):
|
| 224 |
+
"""
|
| 225 |
+
Compare reference and query structures using user-selected residues (direct comparison).
|
| 226 |
+
Only compares structures with matching selection sizes.
|
| 227 |
+
|
| 228 |
+
Args:
|
| 229 |
+
reference_files: List of reference motif files
|
| 230 |
+
query_files: List of query motif files
|
| 231 |
+
ref_selections: Dict mapping filename to selected residue indices
|
| 232 |
+
query_selections: Dict mapping filename to selected residue indices
|
| 233 |
+
temp_dir: Temporary directory containing files
|
| 234 |
+
|
| 235 |
+
Returns:
|
| 236 |
+
DataFrame with comparison results
|
| 237 |
+
"""
|
| 238 |
+
results = []
|
| 239 |
+
|
| 240 |
+
# Count valid comparisons
|
| 241 |
+
total_comparisons = 0
|
| 242 |
+
for ref_file in reference_files:
|
| 243 |
+
ref_indices = ref_selections.get(ref_file.name, [])
|
| 244 |
+
if len(ref_indices) < 2:
|
| 245 |
+
continue
|
| 246 |
+
for query_file in query_files:
|
| 247 |
+
query_indices = query_selections.get(query_file.name, [])
|
| 248 |
+
if len(query_indices) < 2:
|
| 249 |
+
continue
|
| 250 |
+
# Only compare if they have the same number of selected residues
|
| 251 |
+
if len(ref_indices) == len(query_indices):
|
| 252 |
+
total_comparisons += 1
|
| 253 |
+
|
| 254 |
+
if total_comparisons == 0:
|
| 255 |
+
st.error("No valid comparisons found. Ensure selected regions have matching sizes.")
|
| 256 |
+
return pd.DataFrame()
|
| 257 |
+
|
| 258 |
+
progress_bar = st.progress(0)
|
| 259 |
+
status_text = st.empty()
|
| 260 |
+
|
| 261 |
+
comparison_count = 0
|
| 262 |
+
|
| 263 |
+
for ref_file in reference_files:
|
| 264 |
+
ref_name = ref_file.name
|
| 265 |
+
ref_path = os.path.join(temp_dir, ref_name)
|
| 266 |
+
ref_indices = ref_selections.get(ref_name, [])
|
| 267 |
+
|
| 268 |
+
if len(ref_indices) < 2:
|
| 269 |
+
continue
|
| 270 |
+
|
| 271 |
+
# Parse reference motif
|
| 272 |
+
ref_residues = parse_residue_atoms(ref_path)
|
| 273 |
+
|
| 274 |
+
# Extract coordinates for selected residues
|
| 275 |
+
ref_coords = extract_window_coords(ref_residues, ref_indices)
|
| 276 |
+
ref_com = calculate_COM(ref_coords)
|
| 277 |
+
|
| 278 |
+
# Get residue description
|
| 279 |
+
ref_residue_desc = f"[{','.join([str(i+1) for i in ref_indices])}]"
|
| 280 |
+
ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_indices if i < len(ref_residues)])
|
| 281 |
+
|
| 282 |
+
for query_file in query_files:
|
| 283 |
+
query_name = query_file.name
|
| 284 |
+
query_path = os.path.join(temp_dir, query_name)
|
| 285 |
+
query_indices = query_selections.get(query_name, [])
|
| 286 |
+
|
| 287 |
+
if len(query_indices) < 2:
|
| 288 |
+
continue
|
| 289 |
+
|
| 290 |
+
# Only compare if same number of residues
|
| 291 |
+
if len(ref_indices) != len(query_indices):
|
| 292 |
+
continue
|
| 293 |
+
|
| 294 |
+
# Parse query motif
|
| 295 |
+
query_residues = parse_residue_atoms(query_path)
|
| 296 |
+
|
| 297 |
+
# Extract coordinates for selected residues
|
| 298 |
+
query_coords = extract_window_coords(query_residues, query_indices)
|
| 299 |
+
query_com = calculate_COM(query_coords)
|
| 300 |
+
|
| 301 |
+
# Get residue description
|
| 302 |
+
query_residue_desc = f"[{','.join([str(i+1) for i in query_indices])}]"
|
| 303 |
+
query_sequence = ''.join([query_residues[i]['resname'] for i in query_indices if i < len(query_residues)])
|
| 304 |
+
|
| 305 |
+
# Calculate RMSD
|
| 306 |
+
U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
|
| 307 |
+
|
| 308 |
+
if U is None or RMSD is None:
|
| 309 |
+
RMSD = 999.0
|
| 310 |
+
U = np.eye(3)
|
| 311 |
+
|
| 312 |
+
# Store results
|
| 313 |
+
results.append({
|
| 314 |
+
'Reference': ref_name,
|
| 315 |
+
'Ref_Residues': ref_residue_desc,
|
| 316 |
+
'Ref_Sequence': ref_sequence,
|
| 317 |
+
'Ref_Indices': ref_indices,
|
| 318 |
+
'Query': query_name,
|
| 319 |
+
'Query_Residues': query_residue_desc,
|
| 320 |
+
'Query_Sequence': query_sequence,
|
| 321 |
+
'Query_Indices': query_indices,
|
| 322 |
+
'Num_Residues': len(ref_indices),
|
| 323 |
+
'RMSD': RMSD,
|
| 324 |
+
'Rotation_Matrix': U,
|
| 325 |
+
'Ref_COM': ref_com,
|
| 326 |
+
'Query_COM': query_com,
|
| 327 |
+
'Ref_Path': ref_path,
|
| 328 |
+
'Query_Path': query_path
|
| 329 |
+
})
|
| 330 |
+
|
| 331 |
+
comparison_count += 1
|
| 332 |
+
progress = comparison_count / total_comparisons
|
| 333 |
+
progress_bar.progress(progress)
|
| 334 |
+
status_text.text(f"Processing: {ref_name}{ref_residue_desc} vs {query_name}{query_residue_desc}")
|
| 335 |
+
|
| 336 |
+
progress_bar.empty()
|
| 337 |
+
status_text.empty()
|
| 338 |
+
|
| 339 |
+
return pd.DataFrame(results)
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
def compare_structures_with_windows(reference_files, query_files, ref_selections, query_selections,
|
| 343 |
+
window_size, window_type, temp_dir):
|
| 344 |
+
"""
|
| 345 |
+
Compare reference and query structures using sliding windows on selected residues.
|
| 346 |
+
Allows comparison of different-sized selections.
|
| 347 |
+
|
| 348 |
+
Args:
|
| 349 |
+
reference_files: List of reference motif files
|
| 350 |
+
query_files: List of query motif files
|
| 351 |
+
ref_selections: Dict mapping filename to selected residue indices
|
| 352 |
+
query_selections: Dict mapping filename to selected residue indices
|
| 353 |
+
window_size: Size of comparison window
|
| 354 |
+
window_type: "contiguous" or "non-contiguous"
|
| 355 |
+
temp_dir: Temporary directory containing files
|
| 356 |
+
|
| 357 |
+
Returns:
|
| 358 |
+
DataFrame with comparison results
|
| 359 |
+
"""
|
| 360 |
+
from itertools import combinations
|
| 361 |
+
|
| 362 |
+
results = []
|
| 363 |
+
|
| 364 |
+
def generate_windows_from_selection(selected_indices, win_size, win_type):
|
| 365 |
+
"""Generate windows from selected indices"""
|
| 366 |
+
if len(selected_indices) < win_size:
|
| 367 |
+
return []
|
| 368 |
+
|
| 369 |
+
if win_type == "contiguous":
|
| 370 |
+
windows = []
|
| 371 |
+
for i in range(len(selected_indices) - win_size + 1):
|
| 372 |
+
windows.append(selected_indices[i:i + win_size])
|
| 373 |
+
return windows
|
| 374 |
+
else: # non-contiguous
|
| 375 |
+
return [list(combo) for combo in combinations(selected_indices, win_size)]
|
| 376 |
+
|
| 377 |
+
# Count total comparisons
|
| 378 |
+
total_comparisons = 0
|
| 379 |
+
for ref_file in reference_files:
|
| 380 |
+
ref_indices = ref_selections.get(ref_file.name, [])
|
| 381 |
+
ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
|
| 382 |
+
if not ref_windows:
|
| 383 |
+
continue
|
| 384 |
+
|
| 385 |
+
for query_file in query_files:
|
| 386 |
+
query_indices = query_selections.get(query_file.name, [])
|
| 387 |
+
query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
|
| 388 |
+
if not query_windows:
|
| 389 |
+
continue
|
| 390 |
+
total_comparisons += len(ref_windows) * len(query_windows)
|
| 391 |
+
|
| 392 |
+
if total_comparisons == 0:
|
| 393 |
+
st.error(f"No valid comparisons found. Ensure selected regions have at least {window_size} residues.")
|
| 394 |
+
return pd.DataFrame()
|
| 395 |
+
|
| 396 |
+
progress_bar = st.progress(0)
|
| 397 |
+
status_text = st.empty()
|
| 398 |
+
comparison_count = 0
|
| 399 |
+
|
| 400 |
+
for ref_file in reference_files:
|
| 401 |
+
ref_name = ref_file.name
|
| 402 |
+
ref_path = os.path.join(temp_dir, ref_name)
|
| 403 |
+
ref_indices = ref_selections.get(ref_name, [])
|
| 404 |
+
|
| 405 |
+
# Generate windows from selected residues
|
| 406 |
+
ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
|
| 407 |
+
|
| 408 |
+
if not ref_windows:
|
| 409 |
+
st.warning(f"Skipping {ref_name}: selected {len(ref_indices)} residues, need at least {window_size}")
|
| 410 |
+
continue
|
| 411 |
+
|
| 412 |
+
# Parse reference motif
|
| 413 |
+
ref_residues = parse_residue_atoms(ref_path)
|
| 414 |
+
|
| 415 |
+
for ref_window in ref_windows:
|
| 416 |
+
# Extract coordinates for this window
|
| 417 |
+
ref_coords = extract_window_coords(ref_residues, ref_window)
|
| 418 |
+
ref_com = calculate_COM(ref_coords)
|
| 419 |
+
|
| 420 |
+
# Get descriptions
|
| 421 |
+
ref_window_desc = f"[{','.join([str(i+1) for i in ref_window])}]"
|
| 422 |
+
ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_window if i < len(ref_residues)])
|
| 423 |
+
|
| 424 |
+
for query_file in query_files:
|
| 425 |
+
query_name = query_file.name
|
| 426 |
+
query_path = os.path.join(temp_dir, query_name)
|
| 427 |
+
query_indices = query_selections.get(query_name, [])
|
| 428 |
+
|
| 429 |
+
# Generate windows from selected residues
|
| 430 |
+
query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
|
| 431 |
+
|
| 432 |
+
if not query_windows:
|
| 433 |
+
continue
|
| 434 |
+
|
| 435 |
+
# Parse query motif
|
| 436 |
+
query_residues = parse_residue_atoms(query_path)
|
| 437 |
+
|
| 438 |
+
for query_window in query_windows:
|
| 439 |
+
# Extract coordinates for this window
|
| 440 |
+
query_coords = extract_window_coords(query_residues, query_window)
|
| 441 |
+
query_com = calculate_COM(query_coords)
|
| 442 |
+
|
| 443 |
+
# Get descriptions
|
| 444 |
+
query_window_desc = f"[{','.join([str(i+1) for i in query_window])}]"
|
| 445 |
+
query_sequence = ''.join([query_residues[i]['resname'] for i in query_window if i < len(query_residues)])
|
| 446 |
+
|
| 447 |
+
# Calculate RMSD
|
| 448 |
+
U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
|
| 449 |
+
|
| 450 |
+
if U is None or RMSD is None:
|
| 451 |
+
RMSD = 999.0
|
| 452 |
+
U = np.eye(3)
|
| 453 |
+
|
| 454 |
+
# Store results
|
| 455 |
+
results.append({
|
| 456 |
+
'Reference': ref_name,
|
| 457 |
+
'Ref_Residues': ref_window_desc,
|
| 458 |
+
'Ref_Sequence': ref_sequence,
|
| 459 |
+
'Ref_Indices': ref_window,
|
| 460 |
+
'Query': query_name,
|
| 461 |
+
'Query_Residues': query_window_desc,
|
| 462 |
+
'Query_Sequence': query_sequence,
|
| 463 |
+
'Query_Indices': query_window,
|
| 464 |
+
'Num_Residues': window_size,
|
| 465 |
+
'RMSD': RMSD,
|
| 466 |
+
'Rotation_Matrix': U,
|
| 467 |
+
'Ref_COM': ref_com,
|
| 468 |
+
'Query_COM': query_com,
|
| 469 |
+
'Ref_Path': ref_path,
|
| 470 |
+
'Query_Path': query_path
|
| 471 |
+
})
|
| 472 |
+
|
| 473 |
+
comparison_count += 1
|
| 474 |
+
progress = comparison_count / total_comparisons
|
| 475 |
+
progress_bar.progress(progress)
|
| 476 |
+
status_text.text(f"Processing: {ref_name}{ref_window_desc} vs {query_name}{query_window_desc}")
|
| 477 |
+
|
| 478 |
+
progress_bar.empty()
|
| 479 |
+
status_text.empty()
|
| 480 |
+
|
| 481 |
+
return pd.DataFrame(results)
|
| 482 |
+
|
| 483 |
+
|
| 484 |
+
def main():
|
| 485 |
+
# Header
|
| 486 |
+
st.markdown('<p class="main-header">𧬠RNA Motif Structure Comparison</p>', unsafe_allow_html=True)
|
| 487 |
+
st.markdown('<p class="sub-header">Compare RNA motifs with flexible residue selection</p>', unsafe_allow_html=True)
|
| 488 |
+
|
| 489 |
+
# Sidebar
|
| 490 |
+
st.sidebar.header("βοΈ Configuration")
|
| 491 |
+
|
| 492 |
+
# File upload
|
| 493 |
+
st.sidebar.subheader("1οΈβ£ Upload Structures")
|
| 494 |
+
reference_files = st.sidebar.file_uploader(
|
| 495 |
+
"Upload Reference Motif PDB files (Set A)",
|
| 496 |
+
type=['pdb', 'PDB'],
|
| 497 |
+
accept_multiple_files=True,
|
| 498 |
+
key="reference",
|
| 499 |
+
help="Upload RNA motif structures to use as reference"
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
query_files = st.sidebar.file_uploader(
|
| 503 |
+
"Upload Query Motif PDB files (Set B)",
|
| 504 |
+
type=['pdb', 'PDB'],
|
| 505 |
+
accept_multiple_files=True,
|
| 506 |
+
key="query",
|
| 507 |
+
help="Upload RNA motif structures to compare against reference"
|
| 508 |
+
)
|
| 509 |
+
|
| 510 |
+
# Main content area
|
| 511 |
+
if not reference_files or not query_files:
|
| 512 |
+
st.info("π Please upload reference and query motif PDB files to begin analysis")
|
| 513 |
+
|
| 514 |
+
# Show example info
|
| 515 |
+
with st.expander("βΉοΈ About this tool"):
|
| 516 |
+
st.markdown("""
|
| 517 |
+
### Purpose
|
| 518 |
+
This tool compares the 3D structures of RNA motifs with **flexible residue selection** and **multiple comparison modes**.
|
| 519 |
+
|
| 520 |
+
### Workflow
|
| 521 |
+
1. **Upload PDB files** for reference and query motifs
|
| 522 |
+
2. **Preview structures** and see all residues in each file
|
| 523 |
+
3. **Select residues** to include in comparison (e.g., exclude stem bases, keep only loop)
|
| 524 |
+
4. **Choose comparison mode**:
|
| 525 |
+
- **Direct comparison**: Compare selected regions directly (must be same size)
|
| 526 |
+
- **Window-based comparison**: Generate windows from selections (handles different sizes)
|
| 527 |
+
5. **Run analysis** using RMSD-based structural alignment
|
| 528 |
+
|
| 529 |
+
### Comparison Modes
|
| 530 |
+
|
| 531 |
+
#### Direct Comparison (Same Size)
|
| 532 |
+
- Compares your exact selections
|
| 533 |
+
- Example: You select 4 loop residues from each structure
|
| 534 |
+
- Result: Direct 4-residue vs 4-residue comparison
|
| 535 |
+
- Best for: When all structures have same-sized regions of interest
|
| 536 |
+
|
| 537 |
+
#### Window-Based Comparison (Different Sizes)
|
| 538 |
+
- Generates sliding windows from your selections
|
| 539 |
+
- Example: You select 4 loop residues from ref, 6 loop residues from query
|
| 540 |
+
- Set window size to 4
|
| 541 |
+
- Result: Ref's 4 residues compared against all 4-residue windows from query's 6
|
| 542 |
+
- Best for: When structures have different-sized regions but you want to find similar sub-regions
|
| 543 |
+
|
| 544 |
+
### Selection Methods
|
| 545 |
+
- **By range**: Select consecutive residues (e.g., residues 3-6 for a tetraloop)
|
| 546 |
+
- **Specific residues**: Pick any combination of residues (e.g., 1,3,5,7)
|
| 547 |
+
- **All residues**: Use the entire structure
|
| 548 |
+
|
| 549 |
+
### Method Details
|
| 550 |
+
- RMSD calculated using backbone, sugar, and select base atoms
|
| 551 |
+
- Base atoms mapped: purines (N9,C8,C4) β pyrimidines (N1,C2,C6)
|
| 552 |
+
- Kabsch algorithm for optimal structural alignment
|
| 553 |
+
|
| 554 |
+
### Example Use Cases
|
| 555 |
+
|
| 556 |
+
**Case 1: Extract loops from 2+4+2 structures (Direct)**
|
| 557 |
+
- All structures have 8 residues (2 stem + 4 loop + 2 stem)
|
| 558 |
+
- Select residues 3-6 for all structures (the 4-residue loop)
|
| 559 |
+
- Use "Direct comparison"
|
| 560 |
+
- Result: Compare loop vs loop directly
|
| 561 |
+
|
| 562 |
+
**Case 2: Compare 4-mer loop vs 6-mer loop (Window-based)**
|
| 563 |
+
- Structure A: Select residues 3-6 (4 loop residues)
|
| 564 |
+
- Structure B: Select residues 2-7 (6 loop residues)
|
| 565 |
+
- Use "Window-based comparison" with window size = 4
|
| 566 |
+
- Result: Structure A compared against 3 windows from Structure B
|
| 567 |
+
|
| 568 |
+
**Case 3: Find similar regions in different structures (Window-based)**
|
| 569 |
+
- Reference: Select 5 residues of interest
|
| 570 |
+
- Query: Select 10 residues from larger region
|
| 571 |
+
- Use "Window-based comparison" with window size = 5
|
| 572 |
+
- Result: Find which 5-residue window in query best matches reference
|
| 573 |
+
|
| 574 |
+
### Output
|
| 575 |
+
- RMSD values for all comparisons
|
| 576 |
+
- Interactive 3D visualization of aligned structures
|
| 577 |
+
- Rotation and translation matrices
|
| 578 |
+
- Sequence information for compared regions
|
| 579 |
+
""")
|
| 580 |
+
|
| 581 |
+
return
|
| 582 |
+
|
| 583 |
+
# Create temporary directory for file processing
|
| 584 |
+
temp_dir = tempfile.mkdtemp()
|
| 585 |
+
|
| 586 |
+
# Save uploaded files
|
| 587 |
+
for file in reference_files:
|
| 588 |
+
save_uploaded_file(file, temp_dir)
|
| 589 |
+
for file in query_files:
|
| 590 |
+
save_uploaded_file(file, temp_dir)
|
| 591 |
+
|
| 592 |
+
# Display file info
|
| 593 |
+
st.markdown("---")
|
| 594 |
+
col1, col2 = st.columns(2)
|
| 595 |
+
with col1:
|
| 596 |
+
st.metric("Reference Motifs", len(reference_files))
|
| 597 |
+
with col2:
|
| 598 |
+
st.metric("Query Motifs", len(query_files))
|
| 599 |
+
|
| 600 |
+
# Structure preview and selection
|
| 601 |
+
st.markdown("---")
|
| 602 |
+
|
| 603 |
+
# Get residue selections for reference and query sets
|
| 604 |
+
ref_selections = display_structure_selector(reference_files, temp_dir, "Reference")
|
| 605 |
+
|
| 606 |
+
st.markdown("---")
|
| 607 |
+
|
| 608 |
+
query_selections = display_structure_selector(query_files, temp_dir, "Query")
|
| 609 |
+
|
| 610 |
+
# Validate selections
|
| 611 |
+
st.markdown("---")
|
| 612 |
+
valid_selections = True
|
| 613 |
+
min_residues = 2
|
| 614 |
+
|
| 615 |
+
for filename, indices in ref_selections.items():
|
| 616 |
+
if len(indices) < min_residues:
|
| 617 |
+
st.error(f"β {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
|
| 618 |
+
valid_selections = False
|
| 619 |
+
|
| 620 |
+
for filename, indices in query_selections.items():
|
| 621 |
+
if len(indices) < min_residues:
|
| 622 |
+
st.error(f"β {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
|
| 623 |
+
valid_selections = False
|
| 624 |
+
|
| 625 |
+
# Check if all selections have the same number of residues
|
| 626 |
+
ref_lengths = set(len(indices) for indices in ref_selections.values())
|
| 627 |
+
query_lengths = set(len(indices) for indices in query_selections.values())
|
| 628 |
+
all_lengths = ref_lengths.union(query_lengths)
|
| 629 |
+
|
| 630 |
+
if len(all_lengths) > 1:
|
| 631 |
+
st.warning(f"β οΈ Selected regions have different sizes: {sorted(all_lengths)} residues. Only structures with matching sizes will be compared.")
|
| 632 |
+
|
| 633 |
+
# Run analysis button
|
| 634 |
+
st.sidebar.markdown("---")
|
| 635 |
+
st.sidebar.subheader("2οΈβ£ Comparison Method")
|
| 636 |
+
|
| 637 |
+
comparison_mode = st.sidebar.radio(
|
| 638 |
+
"How to compare structures?",
|
| 639 |
+
["Direct comparison (same size)", "Window-based comparison (different sizes)"],
|
| 640 |
+
help="""
|
| 641 |
+
Direct: Compare selected regions directly (must have same size)
|
| 642 |
+
Window-based: Generate sliding windows for flexible comparison
|
| 643 |
+
"""
|
| 644 |
+
)
|
| 645 |
+
|
| 646 |
+
window_size = None
|
| 647 |
+
window_type = None
|
| 648 |
+
|
| 649 |
+
if comparison_mode == "Window-based comparison (different sizes)":
|
| 650 |
+
st.sidebar.markdown("**Window Configuration**")
|
| 651 |
+
|
| 652 |
+
window_size = st.sidebar.number_input(
|
| 653 |
+
"Window Size",
|
| 654 |
+
min_value=2,
|
| 655 |
+
max_value=20,
|
| 656 |
+
value=4,
|
| 657 |
+
step=1,
|
| 658 |
+
help="Number of residues per comparison window"
|
| 659 |
+
)
|
| 660 |
+
|
| 661 |
+
window_type = st.sidebar.radio(
|
| 662 |
+
"Window Type",
|
| 663 |
+
["contiguous", "non-contiguous"],
|
| 664 |
+
help="Contiguous: sliding windows. Non-contiguous: all combinations"
|
| 665 |
+
)
|
| 666 |
+
|
| 667 |
+
st.sidebar.markdown("---")
|
| 668 |
+
st.sidebar.subheader("3οΈβ£ Run Analysis")
|
| 669 |
+
|
| 670 |
+
if st.sidebar.button("π Run Analysis", type="primary", disabled=not valid_selections):
|
| 671 |
+
if not valid_selections:
|
| 672 |
+
st.error("Please fix selection errors before running analysis")
|
| 673 |
+
return
|
| 674 |
+
|
| 675 |
+
with st.spinner("Analyzing structures..."):
|
| 676 |
+
if comparison_mode == "Direct comparison (same size)":
|
| 677 |
+
results_df = compare_structures_with_selection(
|
| 678 |
+
reference_files,
|
| 679 |
+
query_files,
|
| 680 |
+
ref_selections,
|
| 681 |
+
query_selections,
|
| 682 |
+
temp_dir
|
| 683 |
+
)
|
| 684 |
+
else: # Window-based comparison
|
| 685 |
+
results_df = compare_structures_with_windows(
|
| 686 |
+
reference_files,
|
| 687 |
+
query_files,
|
| 688 |
+
ref_selections,
|
| 689 |
+
query_selections,
|
| 690 |
+
window_size,
|
| 691 |
+
window_type,
|
| 692 |
+
temp_dir
|
| 693 |
+
)
|
| 694 |
+
|
| 695 |
+
# Store results in session state
|
| 696 |
+
st.session_state['results_df'] = results_df
|
| 697 |
+
st.session_state['ref_selections'] = ref_selections
|
| 698 |
+
st.session_state['query_selections'] = query_selections
|
| 699 |
+
st.session_state['comparison_mode'] = comparison_mode
|
| 700 |
+
|
| 701 |
+
if len(results_df) > 0:
|
| 702 |
+
st.success(f"β
Analysis complete! {len(results_df)} comparisons performed.")
|
| 703 |
+
else:
|
| 704 |
+
st.warning("β οΈ No comparisons could be performed. Check that structures meet comparison requirements.")
|
| 705 |
+
|
| 706 |
+
# Display results if available
|
| 707 |
+
if 'results_df' in st.session_state and len(st.session_state['results_df']) > 0:
|
| 708 |
+
results_df = st.session_state['results_df']
|
| 709 |
+
|
| 710 |
+
# Add RMSD threshold filter
|
| 711 |
+
st.sidebar.markdown("---")
|
| 712 |
+
st.sidebar.subheader("4οΈβ£ Filter Results")
|
| 713 |
+
rmsd_threshold = st.sidebar.slider(
|
| 714 |
+
"RMSD Threshold (Γ
)",
|
| 715 |
+
min_value=0.0,
|
| 716 |
+
max_value=5.0,
|
| 717 |
+
value=2.0,
|
| 718 |
+
step=0.1,
|
| 719 |
+
help="Only show results below this RMSD value"
|
| 720 |
+
)
|
| 721 |
+
|
| 722 |
+
# Show comparison mode
|
| 723 |
+
if 'comparison_mode' in st.session_state:
|
| 724 |
+
mode_display = "Direct" if "Direct" in st.session_state['comparison_mode'] else "Window-based"
|
| 725 |
+
st.sidebar.info(f"**Mode**: {mode_display}")
|
| 726 |
+
|
| 727 |
+
# Filter by threshold
|
| 728 |
+
filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold].copy()
|
| 729 |
+
|
| 730 |
+
# Summary statistics
|
| 731 |
+
st.markdown("---")
|
| 732 |
+
st.subheader("π Summary Statistics")
|
| 733 |
+
|
| 734 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 735 |
+
with col1:
|
| 736 |
+
st.metric("Total Comparisons", len(results_df))
|
| 737 |
+
with col2:
|
| 738 |
+
st.metric("Below Threshold", len(filtered_df))
|
| 739 |
+
with col3:
|
| 740 |
+
st.metric("Best RMSD", f"{results_df['RMSD'].min():.3f} Γ
")
|
| 741 |
+
with col4:
|
| 742 |
+
st.metric("Mean RMSD", f"{results_df['RMSD'].mean():.3f} Γ
")
|
| 743 |
+
|
| 744 |
+
# Results table
|
| 745 |
+
st.markdown("---")
|
| 746 |
+
st.subheader("π Comparison Results")
|
| 747 |
+
|
| 748 |
+
# Prepare display dataframe
|
| 749 |
+
display_df = filtered_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']].copy()
|
| 750 |
+
display_df = display_df.sort_values('RMSD').reset_index(drop=True)
|
| 751 |
+
display_df['RMSD'] = display_df['RMSD'].round(3)
|
| 752 |
+
|
| 753 |
+
# Display with selection
|
| 754 |
+
st.dataframe(
|
| 755 |
+
display_df,
|
| 756 |
+
use_container_width=True,
|
| 757 |
+
height=300
|
| 758 |
+
)
|
| 759 |
+
|
| 760 |
+
# Structure selection for visualization
|
| 761 |
+
st.markdown("---")
|
| 762 |
+
st.subheader("π¬ 3D Structure Visualization")
|
| 763 |
+
|
| 764 |
+
if len(filtered_df) > 0:
|
| 765 |
+
# Select a comparison to visualize
|
| 766 |
+
selected_idx = st.selectbox(
|
| 767 |
+
"Select a comparison to visualize:",
|
| 768 |
+
range(len(filtered_df)),
|
| 769 |
+
format_func=lambda i: f"{filtered_df.iloc[i]['Reference']}{filtered_df.iloc[i]['Ref_Residues']} ({filtered_df.iloc[i]['Ref_Sequence']}) vs {filtered_df.iloc[i]['Query']}{filtered_df.iloc[i]['Query_Residues']} ({filtered_df.iloc[i]['Query_Sequence']}) | RMSD: {filtered_df.iloc[i]['RMSD']:.3f} Γ
"
|
| 770 |
+
)
|
| 771 |
+
|
| 772 |
+
selected_row = filtered_df.iloc[selected_idx]
|
| 773 |
+
|
| 774 |
+
# Display RMSD info
|
| 775 |
+
st.info(f"**RMSD: {selected_row['RMSD']:.3f} Γ
** ({selected_row['Num_Residues']} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})")
|
| 776 |
+
|
| 777 |
+
# Create visualization - wider display
|
| 778 |
+
col1, col2, col3 = st.columns([0.5, 4, 0.5])
|
| 779 |
+
|
| 780 |
+
with col2:
|
| 781 |
+
try:
|
| 782 |
+
viz_html = create_structure_visualization(
|
| 783 |
+
selected_row['Ref_Path'],
|
| 784 |
+
selected_row['Query_Path'],
|
| 785 |
+
selected_row['Ref_Indices'],
|
| 786 |
+
selected_row['Query_Indices'],
|
| 787 |
+
selected_row['Rotation_Matrix'],
|
| 788 |
+
selected_row['Ref_COM'],
|
| 789 |
+
selected_row['Query_COM'],
|
| 790 |
+
selected_row['RMSD']
|
| 791 |
+
)
|
| 792 |
+
st.components.v1.html(viz_html, height=700, scrolling=False)
|
| 793 |
+
except Exception as e:
|
| 794 |
+
st.error(f"Error creating visualization: {str(e)}")
|
| 795 |
+
|
| 796 |
+
# Show transformation details
|
| 797 |
+
with st.expander("π§ Transformation Details"):
|
| 798 |
+
col1, col2 = st.columns(2)
|
| 799 |
+
|
| 800 |
+
with col1:
|
| 801 |
+
st.markdown("**Rotation Matrix (U):**")
|
| 802 |
+
st.dataframe(
|
| 803 |
+
pd.DataFrame(selected_row['Rotation_Matrix']).round(4),
|
| 804 |
+
use_container_width=True
|
| 805 |
+
)
|
| 806 |
+
|
| 807 |
+
with col2:
|
| 808 |
+
st.markdown("**Translation Vectors:**")
|
| 809 |
+
st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]")
|
| 810 |
+
st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]")
|
| 811 |
+
|
| 812 |
+
# Download aligned structures
|
| 813 |
+
with st.expander("πΎ Download Structure Files"):
|
| 814 |
+
st.markdown("**Download extracted and aligned structures for external visualization**")
|
| 815 |
+
|
| 816 |
+
from visualization import extract_window_pdb, transform_pdb_string
|
| 817 |
+
|
| 818 |
+
# Extract reference window
|
| 819 |
+
ref_pdb = extract_window_pdb(
|
| 820 |
+
selected_row['Ref_Path'],
|
| 821 |
+
selected_row['Ref_Indices']
|
| 822 |
+
)
|
| 823 |
+
|
| 824 |
+
# Extract and transform query window
|
| 825 |
+
query_pdb = extract_window_pdb(
|
| 826 |
+
selected_row['Query_Path'],
|
| 827 |
+
selected_row['Query_Indices']
|
| 828 |
+
)
|
| 829 |
+
|
| 830 |
+
query_aligned_pdb = transform_pdb_string(
|
| 831 |
+
query_pdb,
|
| 832 |
+
selected_row['Rotation_Matrix'],
|
| 833 |
+
selected_row['Query_COM'],
|
| 834 |
+
selected_row['Ref_COM']
|
| 835 |
+
)
|
| 836 |
+
|
| 837 |
+
col1, col2, col3 = st.columns(3)
|
| 838 |
+
|
| 839 |
+
with col1:
|
| 840 |
+
# Reference structure
|
| 841 |
+
ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Indices']]))}.pdb"
|
| 842 |
+
st.download_button(
|
| 843 |
+
label="π₯ Reference PDB",
|
| 844 |
+
data=ref_pdb,
|
| 845 |
+
file_name=ref_filename,
|
| 846 |
+
mime="chemical/x-pdb",
|
| 847 |
+
help="Original reference structure (selected residues only)"
|
| 848 |
+
)
|
| 849 |
+
|
| 850 |
+
with col2:
|
| 851 |
+
# Query structure (original position)
|
| 852 |
+
query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
|
| 853 |
+
st.download_button(
|
| 854 |
+
label="π₯ Query PDB (Original)",
|
| 855 |
+
data=query_pdb,
|
| 856 |
+
file_name=query_filename,
|
| 857 |
+
mime="chemical/x-pdb",
|
| 858 |
+
help="Original query structure (selected residues only)"
|
| 859 |
+
)
|
| 860 |
+
|
| 861 |
+
with col3:
|
| 862 |
+
# Query structure (aligned)
|
| 863 |
+
query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
|
| 864 |
+
st.download_button(
|
| 865 |
+
label="π₯ Query PDB (Aligned)",
|
| 866 |
+
data=query_aligned_pdb,
|
| 867 |
+
file_name=query_aligned_filename,
|
| 868 |
+
mime="chemical/x-pdb",
|
| 869 |
+
help="Query structure aligned to reference"
|
| 870 |
+
)
|
| 871 |
+
|
| 872 |
+
st.info("π‘ **Tip:** Load reference and aligned query together in PyMOL/Chimera to examine the superposition")
|
| 873 |
+
else:
|
| 874 |
+
st.warning("No comparisons below the RMSD threshold. Try increasing the threshold.")
|
| 875 |
+
|
| 876 |
+
# Download results
|
| 877 |
+
st.markdown("---")
|
| 878 |
+
st.subheader("πΎ Export Results")
|
| 879 |
+
|
| 880 |
+
col1, col2 = st.columns(2)
|
| 881 |
+
|
| 882 |
+
with col1:
|
| 883 |
+
st.markdown("**Export Results Table**")
|
| 884 |
+
# Prepare CSV - make sure all columns exist
|
| 885 |
+
export_columns = ['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']
|
| 886 |
+
export_df = results_df[export_columns].copy()
|
| 887 |
+
export_df = export_df.sort_values('RMSD').reset_index(drop=True)
|
| 888 |
+
|
| 889 |
+
csv = export_df.to_csv(index=False)
|
| 890 |
+
st.download_button(
|
| 891 |
+
label="π₯ Download Results (CSV)",
|
| 892 |
+
data=csv,
|
| 893 |
+
file_name="rna_motif_comparison_results.csv",
|
| 894 |
+
mime="text/csv"
|
| 895 |
+
)
|
| 896 |
+
|
| 897 |
+
with col2:
|
| 898 |
+
st.markdown("**Export All Aligned Structures**")
|
| 899 |
+
if st.button("π¦ Generate PDB Archive", help="Create a ZIP file with all aligned structure pairs"):
|
| 900 |
+
with st.spinner("Generating PDB files..."):
|
| 901 |
+
import zipfile
|
| 902 |
+
import io
|
| 903 |
+
from visualization import extract_window_pdb, transform_pdb_string
|
| 904 |
+
|
| 905 |
+
# Create ZIP file in memory
|
| 906 |
+
zip_buffer = io.BytesIO()
|
| 907 |
+
|
| 908 |
+
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 909 |
+
# Process each comparison
|
| 910 |
+
for idx, row in filtered_df.iterrows():
|
| 911 |
+
# Create a directory name for this comparison
|
| 912 |
+
comp_name = f"comparison_{idx:03d}_rmsd_{row['RMSD']:.3f}"
|
| 913 |
+
|
| 914 |
+
# Extract reference
|
| 915 |
+
ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Indices'])
|
| 916 |
+
ref_filename = f"{comp_name}/reference_{row['Reference'].replace('.pdb', '')}.pdb"
|
| 917 |
+
zip_file.writestr(ref_filename, ref_pdb)
|
| 918 |
+
|
| 919 |
+
# Extract query (original)
|
| 920 |
+
query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Indices'])
|
| 921 |
+
query_filename = f"{comp_name}/query_original_{row['Query'].replace('.pdb', '')}.pdb"
|
| 922 |
+
zip_file.writestr(query_filename, query_pdb)
|
| 923 |
+
|
| 924 |
+
# Extract and align query
|
| 925 |
+
query_aligned_pdb = transform_pdb_string(
|
| 926 |
+
query_pdb,
|
| 927 |
+
row['Rotation_Matrix'],
|
| 928 |
+
row['Query_COM'],
|
| 929 |
+
row['Ref_COM']
|
| 930 |
+
)
|
| 931 |
+
query_aligned_filename = f"{comp_name}/query_aligned_{row['Query'].replace('.pdb', '')}.pdb"
|
| 932 |
+
zip_file.writestr(query_aligned_filename, query_aligned_pdb)
|
| 933 |
+
|
| 934 |
+
# Add a README for this comparison
|
| 935 |
+
readme_content = f"""Comparison #{idx}
|
| 936 |
+
RMSD: {row['RMSD']:.3f} Γ
|
| 937 |
+
Residues Compared: {row['Num_Residues']}
|
| 938 |
+
|
| 939 |
+
Reference:
|
| 940 |
+
File: {row['Reference']}
|
| 941 |
+
Residues: {row['Ref_Residues']}
|
| 942 |
+
Sequence: {row['Ref_Sequence']}
|
| 943 |
+
|
| 944 |
+
Query:
|
| 945 |
+
File: {row['Query']}
|
| 946 |
+
Residues: {row['Query_Residues']}
|
| 947 |
+
Sequence: {row['Query_Sequence']}
|
| 948 |
+
|
| 949 |
+
Files:
|
| 950 |
+
- reference_*.pdb: Reference structure (selected residues)
|
| 951 |
+
- query_original_*.pdb: Query structure (original position)
|
| 952 |
+
- query_aligned_*.pdb: Query structure (aligned to reference)
|
| 953 |
+
|
| 954 |
+
To visualize in PyMOL:
|
| 955 |
+
load reference_*.pdb
|
| 956 |
+
load query_aligned_*.pdb
|
| 957 |
+
|
| 958 |
+
To visualize in Chimera:
|
| 959 |
+
File β Open β Select both reference and query_aligned PDB files
|
| 960 |
"""
|
| 961 |
+
readme_filename = f"{comp_name}/README.txt"
|
| 962 |
+
zip_file.writestr(readme_filename, readme_content)
|
| 963 |
+
|
| 964 |
+
zip_buffer.seek(0)
|
| 965 |
+
|
| 966 |
+
st.download_button(
|
| 967 |
+
label="π₯ Download PDB Archive (ZIP)",
|
| 968 |
+
data=zip_buffer.getvalue(),
|
| 969 |
+
file_name="aligned_structures.zip",
|
| 970 |
+
mime="application/zip",
|
| 971 |
+
help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs"
|
| 972 |
+
)
|
| 973 |
+
|
| 974 |
+
st.success(f"β
Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.")
|
| 975 |
+
|
| 976 |
|
| 977 |
+
if __name__ == "__main__":
|
| 978 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|