MotifAlign / app.py
jiehou's picture
Update app.py
1c36d3a verified
"""
RNA Motif Multi-Structure Comparison Tool - Pairwise Mode
Streamlit app for comparing multiple RNA motif structures with separate reference and query sets
Uses dropdown menu for residue configuration and default Backbone + Sugar atom selection
"""
import streamlit as st
import numpy as np
import pandas as pd
from pathlib import Path
import io
import tempfile
import os
from itertools import combinations
# Import our RMSD calculation functions
from rmsd_utils import (
parse_residue_atoms,
get_backbone_sugar_and_selectbase_coords_fixed,
calculate_COM,
calculate_rotation_rmsd,
translate_rotate_coords,
get_backbone_sugar_coords_from_residue,
get_base_coords_from_residue
)
# Import example data loader
try:
from example_data_loader import (
get_example_pdbs,
load_example_as_uploaded_file,
get_example_info
)
EXAMPLES_AVAILABLE = True
except ImportError:
EXAMPLES_AVAILABLE = False
st.warning("Example data loader not available. Please use 'Upload Files' mode.")
# Page configuration
st.set_page_config(
page_title="RNA Motif Multi-Structure Comparison - Pairwise",
page_icon="🧬",
layout="wide",
initial_sidebar_state="expanded"
)
from image_annotator import annotate_alignment_image
# Custom CSS - IMPROVED VERSION with larger fonts
st.markdown("""
<style>
/* ========================================
MAIN CONTENT - LARGER FONTS
======================================== */
/* Increase base font size for all main content */
.main .element-container,
.main [data-testid="stMarkdownContainer"],
.main [data-testid="stText"],
.main p,
.main span,
.main div {
font-size: 1.15rem !important;
}
/* Headers in main content */
.main h1 {
font-size: 2.8rem !important;
font-weight: 700 !important;
}
.main h2 {
font-size: 2.0rem !important;
font-weight: 600 !important;
}
.main h3 {
font-size: 1.6rem !important;
font-weight: 600 !important;
}
/* Custom header classes */
.main-header {
font-size: 2.8rem !important;
font-weight: bold;
color: #1f77b4;
margin-bottom: 1rem;
}
.sub-header {
font-size: 1.4rem !important;
color: #666;
margin-bottom: 2rem;
}
/* Info/warning/success boxes */
.main [data-testid="stAlert"] p,
.main [data-testid="stAlert"] {
font-size: 1.1rem !important;
}
/* Dataframes and tables */
.main [data-testid="stDataFrame"],
.main .dataframe,
.main table {
font-size: 1.05rem !important;
}
.main .dataframe th,
.main .dataframe td {
font-size: 1.05rem !important;
padding: 8px !important;
}
/* Metrics */
.main [data-testid="stMetric"] {
font-size: 1.15rem !important;
}
.main [data-testid="stMetricLabel"] {
font-size: 1.1rem !important;
}
.main [data-testid="stMetricValue"] {
font-size: 1.8rem !important;
}
/* Buttons in main content */
.main button p,
.main button span {
font-size: 1.05rem !important;
}
/* Selectbox, radio, and other inputs in main */
.main .stSelectbox label,
.main .stRadio label,
.main .stNumberInput label,
.main .stMultiSelect label {
font-size: 1.1rem !important;
}
.main .stSelectbox [data-baseweb="select"] div,
.main .stRadio [role="radiogroup"] label,
.main .stNumberInput input {
font-size: 1.05rem !important;
}
/* Expander headers */
.main [data-testid="stExpander"] summary {
font-size: 1.15rem !important;
}
/* Code blocks */
.main code,
.main pre {
font-size: 1.0rem !important;
}
/* ========================================
SIDEBAR - COMPACT & NORMAL FONT
======================================== */
/* Ultra-compact sidebar spacing */
section[data-testid="stSidebar"] {
padding-top: 0.2rem !important;
}
section[data-testid="stSidebar"] > div {
padding-top: 0.2rem !important;
}
/* Minimal margins */
section[data-testid="stSidebar"] [data-testid="stMarkdownContainer"] {
margin: 0rem !important;
}
/* Minimal header spacing */
section[data-testid="stSidebar"] h1,
section[data-testid="stSidebar"] h2,
section[data-testid="stSidebar"] h3 {
margin-top: 0.1rem !important;
margin-bottom: 0.2rem !important;
padding: 0rem !important;
line-height: 1.2 !important;
font-size: 1.0rem !important;
}
/* Tight widget spacing */
section[data-testid="stSidebar"] .stSelectbox,
section[data-testid="stSidebar"] .stNumberInput,
section[data-testid="stSidebar"] .stRadio,
section[data-testid="stSidebar"] .stFileUploader {
margin-top: 0.1rem !important;
margin-bottom: 0.2rem !important;
}
section[data-testid="stSidebar"] .stButton {
margin: 0.2rem 0 !important;
}
section[data-testid="stSidebar"] .element-container {
margin: 0.1rem 0 !important;
}
section[data-testid="stSidebar"] .stAlert {
padding: 0.3rem 0.5rem !important;
margin: 0.1rem 0 !important;
}
section[data-testid="stSidebar"] label {
margin-bottom: 0.1rem !important;
font-size: 0.9rem !important;
}
section[data-testid="stSidebar"] .stCaptionContainer {
margin: 0.1rem 0 !important;
}
section[data-testid="stSidebar"] hr {
margin: 0.2rem 0 !important;
}
/* Sidebar font sizes - keep normal/small */
section[data-testid="stSidebar"] * {
font-size: 0.9rem !important;
}
section[data-testid="stSidebar"] p,
section[data-testid="stSidebar"] span,
section[data-testid="stSidebar"] div {
font-size: 0.9rem !important;
}
section[data-testid="stSidebar"] button {
font-size: 0.9rem !important;
}
</style>
""", unsafe_allow_html=True)
def save_uploaded_file(uploaded_file, directory):
"""Save an uploaded file to a temporary directory"""
file_path = os.path.join(directory, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path
def get_structure_info(pdb_path):
"""
Get information about a structure's residues.
Args:
pdb_path: Path to PDB file
Returns:
List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
"""
residues = parse_residue_atoms(pdb_path)
structure_info = []
for idx, res in enumerate(residues):
structure_info.append({
'index': idx,
'resnum': res['resnum'],
'resname': res['resname'],
'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})"
})
return structure_info
def load_structure_data(uploaded_files, temp_dir):
"""Load structure data from uploaded files"""
structure_data = []
for uploaded_file in uploaded_files:
file_path = save_uploaded_file(uploaded_file, temp_dir)
residues = parse_residue_atoms(file_path)
structure_data.append({
'name': uploaded_file.name,
'path': file_path,
'residues': residues,
'num_residues': len(residues)
})
return structure_data
def extract_window_coords(residues, window_indices):
"""
Extract coordinates for a specific window of residues.
Args:
residues: List of all residues
window_indices: List of indices to extract
Returns:
numpy array of coordinates
"""
from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
all_coords = []
for idx in window_indices:
if idx < len(residues):
residue = residues[idx]
# Get backbone and sugar coordinates
backbone_coords = get_backbone_sugar_coords_from_residue(residue)
all_coords.extend(backbone_coords)
# Get base coordinates
base_coords = get_base_coords_from_residue(residue)
all_coords.extend(base_coords)
return np.asarray(all_coords)
def generate_windows_from_selection(selected_indices, window_size, window_type):
"""Generate windows from selected residue indices"""
if len(selected_indices) < window_size:
return []
windows = []
if len(selected_indices) == window_size:
windows.append(selected_indices)
return windows
if window_type == "contiguous":
# Only sliding windows
for i in range(len(selected_indices) - window_size + 1):
windows.append(selected_indices[i:i+window_size])
elif window_type == "non-contiguous":
from itertools import combinations
all_combos = list(combinations(selected_indices, window_size))
# Get the contiguous windows (to exclude them)
contiguous_windows = []
for i in range(len(selected_indices) - window_size + 1):
contiguous_windows.append(tuple(selected_indices[i:i+window_size]))
# Filter: keep only combinations that are NOT in contiguous_windows
for combo in all_combos:
if combo not in contiguous_windows:
windows.append(list(combo))
else:
from itertools import combinations
all_combos = list(combinations(selected_indices, window_size))
# Filter: keep only combinations that are NOT in contiguous_windows
for combo in all_combos:
windows.append(list(combo))
return windows
def main():
st.markdown('<h1 class="main-header">🧬 RNA Motif Multi-Structure Comparison</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">Pairwise comparison: Reference structures vs Query structures</p>', unsafe_allow_html=True)
# Create temporary directory
if 'temp_dir' not in st.session_state:
st.session_state['temp_dir'] = tempfile.mkdtemp()
temp_dir = st.session_state['temp_dir']
# Initialize session state
if 'data_mode' not in st.session_state:
st.session_state['data_mode'] = 'upload'
if 'ref_selections' not in st.session_state:
st.session_state['ref_selections'] = {}
if 'query_selections' not in st.session_state:
st.session_state['query_selections'] = {}
# Sidebar: Step 1 - Data Source Selection
st.sidebar.title("βš™οΈ Configuration")
st.sidebar.subheader("1️⃣ Data Source")
# Check if examples are available
if EXAMPLES_AVAILABLE:
data_mode = st.sidebar.radio(
"Choose data source",
["Upload Files", "Use Example Data"],
key="data_mode_radio",
help="Upload your own PDB files or use provided examples"
)
else:
st.sidebar.info("ℹ️ Example data not available. Using upload mode.")
data_mode = "Upload Files"
# Update data mode
if data_mode == "Upload Files":
st.session_state['data_mode'] = 'upload'
# Reset example initialization when switching to upload mode
if 'example_mode_initialized' in st.session_state:
del st.session_state['example_mode_initialized']
else:
st.session_state['data_mode'] = 'example'
# Step 2: File Upload/Selection - SEPARATE FOR REFERENCE AND QUERY
st.sidebar.subheader("2️⃣ Structure Files")
reference_files = []
query_files = []
if st.session_state['data_mode'] == 'upload':
st.sidebar.markdown("**Upload Reference Structures**")
ref_uploaded = st.sidebar.file_uploader(
"Reference PDB files",
type=['pdb'],
accept_multiple_files=True,
key="ref_uploader",
help="Upload one or more reference structures (e.g., Pentaloop)"
)
st.sidebar.markdown("**Upload Query Structures**")
query_uploaded = st.sidebar.file_uploader(
"Query PDB files",
type=['pdb'],
accept_multiple_files=True,
key="query_uploader",
help="Upload one or more query structures (e.g., Tetraloop)"
)
reference_files = ref_uploaded if ref_uploaded else []
query_files = query_uploaded if query_uploaded else []
else: # Example data mode
if not EXAMPLES_AVAILABLE:
st.sidebar.error("❌ Example data loader module not found")
reference_files = []
query_files = []
else:
try:
examples = get_example_pdbs()
if not examples or len(examples) == 0:
st.sidebar.error("❌ No example data available. Please add PDB files to 'data/' folder")
st.sidebar.info("πŸ’‘ Create a 'data/' folder in the same directory as the app and add .pdb files")
reference_files = []
query_files = []
else:
example_names = sorted(list(examples.keys()))
# Auto-select examples when first switching to example mode
if 'example_mode_initialized' not in st.session_state:
st.session_state['example_mode_initialized'] = True
# Auto-select first half as reference, second half as query
mid_point = max(1, len(example_names) // 2)
st.session_state['auto_ref_examples'] = example_names[:mid_point]
st.session_state['auto_query_examples'] = example_names[mid_point:mid_point*2]
st.sidebar.markdown("**Select Reference Examples**")
ref_example_names = st.sidebar.multiselect(
"Reference structures",
options=example_names,
default=st.session_state.get('auto_ref_examples', []),
key="ref_examples",
help="Select example reference structures"
)
if ref_example_names:
st.sidebar.success(f"βœ… {len(ref_example_names)} reference file(s) selected")
st.sidebar.markdown("**Select Query Examples**")
query_example_names = st.sidebar.multiselect(
"Query structures",
options=example_names,
default=st.session_state.get('auto_query_examples', []),
key="query_examples",
help="Select example query structures"
)
if query_example_names:
st.sidebar.success(f"βœ… {len(query_example_names)} query file(s) selected")
# Convert names to paths and load files
try:
reference_files = [load_example_as_uploaded_file(examples[name]) for name in ref_example_names]
query_files = [load_example_as_uploaded_file(examples[name]) for name in query_example_names]
except Exception as load_error:
st.sidebar.error(f"Error loading files: {str(load_error)}")
import traceback
st.sidebar.code(traceback.format_exc())
reference_files = []
query_files = []
except Exception as e:
st.sidebar.error(f"❌ Error loading examples: {str(e)}")
import traceback
st.sidebar.code(traceback.format_exc())
reference_files = []
query_files = []
# Show upload status
if reference_files and query_files:
st.sidebar.success(f"βœ… {len(reference_files)} reference + {len(query_files)} query structures")
elif reference_files:
st.sidebar.info(f"ℹ️ {len(reference_files)} reference structures loaded")
elif query_files:
st.sidebar.info(f"ℹ️ {len(query_files)} query structures loaded")
else:
st.sidebar.warning("⚠️ Upload or select structures")
# Residue trimming controls - add early so they're available when needed
st.sidebar.markdown("---")
st.sidebar.markdown("**πŸ”§ 5'/3' Base Trimming (Reference) **")
col1, col2 = st.sidebar.columns(2)
with col1:
n_term_trim_ref = st.number_input(
"5' trim_ref",
min_value=0,
max_value=10,
value=2,
step=1,
help="Number of bases to remove from 5' end",
key="n_term_trim_ref"
)
with col2:
c_term_trim_ref = st.number_input(
"3' trim_ref",
min_value=0,
max_value=10,
value=2,
step=1,
help="Number of bases to remove from 3' end",
key="c_term_trim_ref"
)
# Residue trimming controls - add early so they're available when needed
st.sidebar.markdown("---")
st.sidebar.markdown("**πŸ”§ 5'/3' Base Trimming (Query) **")
col1, col2 = st.sidebar.columns(2)
with col1:
n_term_trim_query = st.number_input(
"5' trim_query",
min_value=0,
max_value=10,
value=2,
step=1,
help="Number of bases to remove from 5' end",
key="n_term_trim_query"
)
with col2:
c_term_trim_query = st.number_input(
"3' trim_query",
min_value=0,
max_value=10,
value=2,
step=1,
help="Number of bases to remove from 3' end",
key="c_term_trim_query"
)
# Load structure data
ref_structure_data = []
query_structure_data = []
if reference_files:
ref_structure_data = load_structure_data(reference_files, temp_dir)
if query_files:
query_structure_data = load_structure_data(query_files, temp_dir)
# Track current files to reset selections if files change
current_ref_files = set([s['name'] for s in ref_structure_data])
current_query_files = set([s['name'] for s in query_structure_data])
if 'current_ref_files' not in st.session_state:
st.session_state['current_ref_files'] = current_ref_files
if 'current_query_files' not in st.session_state:
st.session_state['current_query_files'] = current_query_files
# Reset selections if files changed
if st.session_state['current_ref_files'] != current_ref_files:
st.session_state['current_ref_files'] = current_ref_files
st.session_state['ref_selections'] = {}
if 'ref_auto_initialized' in st.session_state:
del st.session_state['ref_auto_initialized']
if st.session_state['current_query_files'] != current_query_files:
st.session_state['current_query_files'] = current_query_files
st.session_state['query_selections'] = {}
if 'query_auto_initialized' in st.session_state:
del st.session_state['query_auto_initialized']
# Auto-initialize selections (exclude first and last residue by default)
if 'ref_auto_initialized' not in st.session_state and ref_structure_data:
for struct in ref_structure_data:
num_res = struct['num_residues']
if num_res > n_term_trim_ref + c_term_trim_ref:
auto_selection = list(range(n_term_trim_ref, num_res - c_term_trim_ref))
st.session_state['ref_selections'][struct['name']] = auto_selection
else:
st.session_state['ref_selections'][struct['name']] = list(range(num_res))
st.session_state['ref_auto_initialized'] = True
if 'query_auto_initialized' not in st.session_state and query_structure_data:
for struct in query_structure_data:
num_res = struct['num_residues']
if num_res > n_term_trim_query + c_term_trim_query:
auto_selection = list(range(n_term_trim_query, num_res - c_term_trim_query))
st.session_state['query_selections'][struct['name']] = auto_selection
else:
st.session_state['query_selections'][struct['name']] = list(range(num_res))
st.session_state['query_auto_initialized'] = True
# Step 3: Configure Atom Selections in Main Area
st.markdown("---")
st.subheader("πŸ”¬ Configure Atom Selections")
st.info(f"""ℹ️ **Atom Selection:** Backbone + Sugar\n
- For purines (A, G): N9, C8, C4\n
- For pyrimidines (C, U): N1, C2, C6\n
- For backbone and sugar atoms: "P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"\n
""")
# Create two columns for Reference and Query
col1, col2 = st.columns(2)
with col1:
st.markdown("### πŸ“‹ Reference Structures")
if ref_structure_data:
selected_ref_name = st.selectbox(
"Select structure to configure (excluding two bases in 5' and 3' by default)",
options=[s['name'] for s in ref_structure_data],
key="ref_dropdown",
help="Choose a reference structure to configure its residue selection"
)
selected_ref = next((s for s in ref_structure_data if s['name'] == selected_ref_name), None)
if selected_ref:
st.markdown(f"**{selected_ref['name']}** ({selected_ref['num_residues']} residues)")
# Display residue table
structure_info = get_structure_info(selected_ref['path'])
info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
with st.expander("πŸ“‹ View Residue Table", expanded=False):
st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
# Selection method
selection_method = st.radio(
f"Selection method for {selected_ref['name']}",
["Select by range", "Select specific residues", "Use all residues"],
key=f"method_ref_{selected_ref['name']}",
index=1,
horizontal=True
)
selected_indices = []
if selection_method == "Select by range":
current_selection = st.session_state['ref_selections'].get(selected_ref['name'], [])
default_start = current_selection[0] + n_term_trim_ref if current_selection else n_term_trim_ref
default_end = current_selection[-1] + 1 if current_selection else max(n_term_trim_ref, len(structure_info) - c_term_trim_ref)
c1, c2 = st.columns(2)
with c1:
start_idx = st.number_input(
"Start index (1-based)",
min_value=1,
max_value=len(structure_info),
value=default_start,
key=f"start_ref_{selected_ref['name']}"
)
with c2:
end_idx = st.number_input(
"End index (1-based, inclusive)",
min_value=1,
max_value=len(structure_info),
value=default_end,
key=f"end_ref_{selected_ref['name']}"
)
if start_idx <= end_idx:
selected_indices = list(range(start_idx - 1, end_idx))
st.success(f"βœ“ Selected residues: {[i+1 for i in selected_indices]}")
# Auto-save the selection
st.session_state['ref_selections'][selected_ref['name']] = selected_indices
else:
st.error("Start index must be ≀ end index")
elif selection_method == "Select specific residues":
# Always use current trim values for default selection (updates when trim values change)
default_names = [structure_info[i]['full_name'] for i in range(n_term_trim_ref, len(structure_info)-c_term_trim_ref)]
selected_names = st.multiselect(
"Select residues",
options=[info['full_name'] for info in structure_info],
default=default_names,
key=f"specific_ref_{selected_ref['name']}_n{n_term_trim_ref}_c{c_term_trim_ref}"
)
name_to_idx = {info['full_name']: info['index'] for info in structure_info}
selected_indices = [name_to_idx[name] for name in selected_names]
selected_indices.sort()
if selected_indices:
st.success(f"βœ“ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
# Auto-save the selection
st.session_state['ref_selections'][selected_ref['name']] = selected_indices
else: # Use all residues
selected_indices = list(range(len(structure_info)))
st.info(f"βœ“ Using all {len(selected_indices)} residues")
# Auto-save the selection
st.session_state['ref_selections'][selected_ref['name']] = selected_indices
# Show current saved selection (now always up-to-date)
if selected_ref['name'] in st.session_state['ref_selections']:
saved_indices = st.session_state['ref_selections'][selected_ref['name']]
st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}")
else:
st.info("Upload reference structures to configure")
with col2:
st.markdown("### πŸ“‹ Query Structures")
if query_structure_data:
selected_query_name = st.selectbox(
"Select structure to configure (excluding two bases in 5' and 3' by default)",
options=[s['name'] for s in query_structure_data],
key="query_dropdown",
help="Choose a query structure to configure its residue selection"
)
selected_query = next((s for s in query_structure_data if s['name'] == selected_query_name), None)
if selected_query:
st.markdown(f"**{selected_query['name']}** ({selected_query['num_residues']} residues)")
# Display residue table
structure_info = get_structure_info(selected_query['path'])
info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
with st.expander("πŸ“‹ View Residue Table", expanded=False):
st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
# Selection method
selection_method = st.radio(
f"Selection method for {selected_query['name']}",
["Select by range", "Select specific residues", "Use all residues"],
key=f"method_query_{selected_query['name']}",
index=1,
horizontal=True
)
selected_indices = []
if selection_method == "Select by range":
current_selection = st.session_state['query_selections'].get(selected_query['name'], [])
default_start = current_selection[0] + n_term_trim_query if current_selection else 3
default_end = current_selection[-1] + 1 if current_selection else max(2, len(structure_info) - c_term_trim_query)
c1, c2 = st.columns(2)
with c1:
start_idx = st.number_input(
"Start index (1-based)",
min_value=1,
max_value=len(structure_info),
value=default_start,
key=f"start_query_{selected_query['name']}"
)
with c2:
end_idx = st.number_input(
"End index (1-based, inclusive)",
min_value=1,
max_value=len(structure_info),
value=default_end,
key=f"end_query_{selected_query['name']}"
)
if start_idx <= end_idx:
selected_indices = list(range(start_idx - 1, end_idx))
st.success(f"βœ“ Selected residues: {[i+1 for i in selected_indices]}")
# Auto-save the selection
st.session_state['query_selections'][selected_query['name']] = selected_indices
else:
st.error("Start index must be ≀ end index")
elif selection_method == "Select specific residues":
# Always use current trim values for default selection (updates when trim values change)
default_names = [structure_info[i]['full_name'] for i in range(n_term_trim_query, len(structure_info)-c_term_trim_query)]
selected_names = st.multiselect(
"Select residues",
options=[info['full_name'] for info in structure_info],
default=default_names,
key=f"specific_query_{selected_query['name']}_n{n_term_trim_query}_c{c_term_trim_query}"
)
name_to_idx = {info['full_name']: info['index'] for info in structure_info}
selected_indices = [name_to_idx[name] for name in selected_names]
selected_indices.sort()
if selected_indices:
st.success(f"βœ“ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
# Auto-save the selection
st.session_state['query_selections'][selected_query['name']] = selected_indices
else: # Use all residues
selected_indices = list(range(len(structure_info)))
st.info(f"βœ“ Using all {len(selected_indices)} residues")
# Auto-save the selection
st.session_state['query_selections'][selected_query['name']] = selected_indices
# Show current saved selection (now always up-to-date)
if selected_query['name'] in st.session_state['query_selections']:
saved_indices = st.session_state['query_selections'][selected_query['name']]
st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}")
else:
st.info("Upload query structures to configure")
# Step 4: Window Configuration
st.sidebar.subheader("3️⃣ Window Configuration")
# Check if all structures have selections
all_ref_have_selections = all(s['name'] in st.session_state['ref_selections'] for s in ref_structure_data)
all_query_have_selections = all(s['name'] in st.session_state['query_selections'] for s in query_structure_data)
if all_ref_have_selections and all_query_have_selections and ref_structure_data and query_structure_data:
# Find minimum selection size
all_selections = list(st.session_state['ref_selections'].values()) + list(st.session_state['query_selections'].values())
min_selection_size = min(len(sel) for sel in all_selections)
window_size = st.sidebar.number_input(
"Window Size",
min_value=2,
max_value=min_selection_size,
value=min(4, min_selection_size),
step=1,
help="Number of residues per comparison window"
)
window_type = st.sidebar.radio(
"Window Type",
["contiguous", "non-contiguous", "both"],
index=0,
help="Contiguous: sliding windows. Non-contiguous: all combinations"
)
else:
st.sidebar.warning("⚠️ Configure selections first")
window_size = 4
window_type = "contiguous"
# Step 5: Run Analysis
st.sidebar.subheader("4️⃣ Run Analysis")
can_run = (all_ref_have_selections and all_query_have_selections and
ref_structure_data and query_structure_data)
if st.sidebar.button("πŸš€ Run Pairwise Analysis", type="primary", disabled=not can_run):
if not can_run:
st.error("Please upload and configure both reference and query structures")
return
# Run comparisons
with st.spinner("Analyzing structures..."):
results = []
# For each reference structure
for ref_struct in ref_structure_data:
ref_indices = st.session_state['ref_selections'][ref_struct['name']]
ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
if not ref_windows:
continue
# For each reference window
for ref_window in ref_windows:
# Extract reference coords
ref_coords = extract_window_coords(ref_struct['residues'], ref_window)
ref_com = calculate_COM(ref_coords)
ref_sequence = ''.join([ref_struct['residues'][i]['resname'] for i in ref_window])
# Compare against all query structures
for query_struct in query_structure_data:
query_indices = st.session_state['query_selections'][query_struct['name']]
query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
for query_window in query_windows:
# Extract query coords
query_coords = extract_window_coords(query_struct['residues'], query_window)
query_com = calculate_COM(query_coords)
query_sequence = ''.join([query_struct['residues'][i]['resname'] for i in query_window])
# Calculate RMSD
U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
if U is None or RMSD is None:
RMSD = 999.0
U = np.eye(3)
results.append({
'Reference': ref_struct['name'],
'Ref_Window': ref_window,
'Ref_Sequence': ref_sequence,
'Query': query_struct['name'],
'Query_Window': query_window,
'Query_Sequence': query_sequence,
'RMSD': RMSD,
'Rotation_Matrix': U,
'Ref_COM': ref_com,
'Query_COM': query_com,
'Ref_Path': ref_struct['path'],
'Query_Path': query_struct['path']
})
results_df = pd.DataFrame(results)
st.session_state['results'] = results_df
st.session_state['ref_structure_data'] = ref_structure_data
st.session_state['query_structure_data'] = query_structure_data
st.success(f"βœ… Analysis complete! {len(results_df)} comparisons performed.")
# Display results
if 'results' in st.session_state:
results_df = st.session_state['results']
st.markdown("---")
st.subheader("πŸ“Š Results Summary")
# RMSD threshold filter
col1, col2 = st.columns([1, 3])
with col1:
rmsd_threshold = st.slider(
"RMSD Threshold (Γ…)",
min_value=0.0,
max_value=10.0,
value=3.0,
step=0.1
)
filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold]
with col2:
st.metric("Comparisons Below Threshold", f"{len(filtered_df)} / {len(results_df)}")
# Best match per Reference-Query pair
st.markdown("### πŸ† Best Match per Reference-Query Pair")
if len(filtered_df) > 0:
# Group by Reference and Query to find best match for each pair
best_matches = filtered_df.loc[filtered_df.groupby(['Reference', 'Query'])['RMSD'].idxmin()]
best_display = best_matches[['Reference', 'Query', 'Ref_Sequence', 'Query_Sequence', 'RMSD']].copy()
best_display['RMSD'] = best_display['RMSD'].round(3)
best_display.columns = ['Reference', 'Query', 'Ref Sequence', 'Query Sequence', 'RMSD (Γ…)']
st.dataframe(best_display, use_container_width=True)
else:
st.warning("No matches found below threshold")
# Full results
with st.expander("πŸ“‹ All Comparison Results"):
if len(filtered_df) > 0:
display_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy()
# Format the window indices to be 1-based
display_df['Ref_Residues'] = display_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
display_df['Query_Residues'] = display_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
# Reorder columns
display_df = display_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']]
display_df['RMSD'] = display_df['RMSD'].round(3)
display_df = display_df.sort_values('RMSD').reset_index(drop=True)
# Rename columns for better display
display_df.columns = ['Reference', 'Ref_Indices', 'Ref_Sequence', 'Query', 'Query_Indices', 'Query_Sequence', 'RMSD (Γ…)']
st.dataframe(display_df, use_container_width=True, height=400)
else:
st.info("No results to display")
# Visualization
st.markdown("---")
st.subheader("πŸ”¬ 3D Structure Visualization")
if len(filtered_df) > 0:
st.markdown("**Select a comparison to visualize:**")
# Create dropdown options
viz_options = []
for idx, row in filtered_df.iterrows():
ref_res_str = ','.join([str(i+1) for i in row['Ref_Window']])
query_res_str = ','.join([str(i+1) for i in row['Query_Window']])
option_text = f"{row['Reference']}[{ref_res_str}] ({row['Ref_Sequence']}) vs {row['Query']}[{query_res_str}] ({row['Query_Sequence']}) | RMSD: {row['RMSD']:.3f} Γ…"
viz_options.append((idx, option_text))
# Sort by RMSD
viz_options.sort(key=lambda x: filtered_df.loc[x[0], 'RMSD'])
selected_viz_idx = st.selectbox(
"Choose comparison to visualize",
options=[opt[0] for opt in viz_options],
format_func=lambda idx: next(opt[1] for opt in viz_options if opt[0] == idx),
help="All comparisons below RMSD threshold, sorted by RMSD"
)
# Get the selected comparison
selected_row = filtered_df.loc[selected_viz_idx]
# Import visualization function
from visualization import create_structure_visualization
# Display RMSD info
#st.info(f"**RMSD: {selected_row['RMSD']:.3f} Γ…** ({len(selected_row['Query_Indices'])} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})")
# Create visualization - wider display
col1, col2, col3 = st.columns([0.5, 4, 0.5])
with col2:
try:
viz_html = create_structure_visualization(
selected_row['Ref_Path'],
selected_row['Query_Path'],
selected_row['Ref_Window'],
selected_row['Query_Window'],
selected_row['Rotation_Matrix'],
selected_row['Ref_COM'],
selected_row['Query_COM'],
selected_row['RMSD'],
ref_name=selected_row['Reference'],
query_name=selected_row['Query'],
ref_sequence=selected_row['Ref_Sequence'],
query_sequence=selected_row['Query_Sequence']
)
st.components.v1.html(viz_html, width=1400, height=750, scrolling=False)
except Exception as e:
st.error(f"Error creating visualization: {str(e)}")
import traceback
st.code(traceback.format_exc())
# Automatic Annotation Info
st.markdown("---")
st.success("βœ… **Automatic Annotation:** When you click 'Download PNG' in the 3D viewer above, the image automatically includes RMSD, structure names, and sequences!")
st.info("πŸ’‘ **Customize font size:** Use the 'Annotation Font Size' dropdown in the viewer controls (top-right) to choose from Small, Medium, Large (default), or Extra Large fonts!")
# Show transformation details
with st.expander("πŸ”§ Transformation Details"):
col1, col2 = st.columns(2)
with col1:
st.markdown("**Rotation Matrix (U):**")
st.dataframe(
pd.DataFrame(selected_row['Rotation_Matrix']).round(4),
use_container_width=True
)
with col2:
st.markdown("**Translation Vectors:**")
st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]")
st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]")
# Download aligned structures
with st.expander("πŸ’Ύ Download Structure Files"):
st.markdown("**Download extracted and aligned structures for external visualization**")
from visualization import extract_window_pdb, transform_pdb_string
# Extract reference window
ref_pdb = extract_window_pdb(
selected_row['Ref_Path'],
selected_row['Ref_Window']
)
# Extract and transform query window
query_pdb = extract_window_pdb(
selected_row['Query_Path'],
selected_row['Query_Window']
)
query_aligned_pdb = transform_pdb_string(
query_pdb,
selected_row['Rotation_Matrix'],
selected_row['Query_COM'],
selected_row['Ref_COM']
)
col1, col2, col3 = st.columns(3)
with col1:
# Reference structure
ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Window']]))}.pdb"
st.download_button(
label="πŸ“₯ Reference PDB",
data=ref_pdb,
file_name=ref_filename,
mime="chemical/x-pdb",
help="Original reference structure (selected residues only)"
)
with col2:
# Query structure (original position)
query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Window']]))}.pdb"
st.download_button(
label="πŸ“₯ Query PDB (Original)",
data=query_pdb,
file_name=query_filename,
mime="chemical/x-pdb",
help="Original query structure (selected residues only)"
)
with col3:
# Query structure (aligned)
query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Window']]))}.pdb"
st.download_button(
label="πŸ“₯ Query PDB (Aligned)",
data=query_aligned_pdb,
file_name=query_aligned_filename,
mime="chemical/x-pdb",
help="Query structure aligned to reference"
)
# Combined aligned structure
st.markdown("---")
st.markdown("**Combined Aligned Structure (Reference + Query)**")
# Create combined PDB with both structures
combined_pdb_lines = []
# Add header information as REMARK records
combined_pdb_lines.append(f"REMARK Reference: {selected_row['Reference']}")
combined_pdb_lines.append(f"REMARK Reference Residues: {','.join(map(str, [i+1 for i in selected_row['Ref_Window']]))}")
combined_pdb_lines.append(f"REMARK Reference Sequence: {selected_row['Ref_Sequence']}")
combined_pdb_lines.append(f"REMARK Query: {selected_row['Query']}")
combined_pdb_lines.append(f"REMARK Query Residues: {','.join(map(str, [i+1 for i in selected_row['Query_Window']]))}")
combined_pdb_lines.append(f"REMARK Query Sequence: {selected_row['Query_Sequence']}")
combined_pdb_lines.append(f"REMARK RMSD: {selected_row['RMSD']:.3f} Angstroms")
combined_pdb_lines.append("MODEL 1")
# Add reference atoms with chain A
for line in ref_pdb.split('\n'):
if line.startswith(('ATOM', 'HETATM')):
# Set chain to A for reference
modified_line = line[:21] + 'A' + line[22:]
combined_pdb_lines.append(modified_line)
combined_pdb_lines.append("ENDMDL")
combined_pdb_lines.append("MODEL 2")
# Add aligned query atoms with chain B
for line in query_aligned_pdb.split('\n'):
if line.startswith(('ATOM', 'HETATM')):
# Set chain to B for query
modified_line = line[:21] + 'B' + line[22:]
combined_pdb_lines.append(modified_line)
combined_pdb_lines.append("ENDMDL")
combined_pdb_lines.append("END")
combined_pdb = '\n'.join(combined_pdb_lines)
combined_filename = f"aligned_{selected_row['Reference'].replace('.pdb', '')}_{selected_row['Query'].replace('.pdb', '')}_rmsd_{selected_row['RMSD']:.3f}.pdb"
st.download_button(
label="πŸ“₯ Download Combined Aligned Structure",
data=combined_pdb,
file_name=combined_filename,
mime="chemical/x-pdb",
help="Reference (chain A) and aligned query (chain B) in one file",
use_container_width=True
)
st.info("πŸ’‘ **Tip:** The combined PDB contains reference (chain A) and aligned query (chain B) - ready for PyMOL/Chimera")
else:
st.warning("No comparisons below RMSD threshold to visualize")
# Export Results
st.markdown("---")
st.subheader("πŸ’Ύ Export Results")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Download Results Table**")
if len(filtered_df) > 0:
export_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy()
export_df['Ref_Residues'] = export_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
export_df['Query_Residues'] = export_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
export_df = export_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']]
export_df = export_df.sort_values('RMSD').reset_index(drop=True)
csv = export_df.to_csv(index=False)
st.download_button(
label="πŸ“₯ Download Results (CSV)",
data=csv,
file_name="rna_pairwise_comparison_results.csv",
mime="text/csv"
)
else:
st.info("No results to export")
with col2:
st.markdown("**Download Aligned Structures**")
if len(filtered_df) > 0 and st.button("πŸ“¦ Generate PDB Archive"):
with st.spinner("Creating archive..."):
import zipfile
from visualization_multi import extract_window_pdb, transform_pdb_string
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for idx, row in filtered_df.iterrows():
comp_name = f"comp_{idx:03d}_rmsd_{row['RMSD']:.3f}"
# Reference
ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Window'])
zip_file.writestr(f"{comp_name}/reference.pdb", ref_pdb)
# Query original
query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Window'])
zip_file.writestr(f"{comp_name}/query_original.pdb", query_pdb)
# Query aligned
query_aligned = transform_pdb_string(
query_pdb,
row['Rotation_Matrix'],
row['Query_COM'],
row['Ref_COM']
)
zip_file.writestr(f"{comp_name}/query_aligned.pdb", query_aligned)
# README
readme = f"""Comparison #{idx}
RMSD: {row['RMSD']:.3f} Γ…
Atom Selection: Backbone + Sugar (default)
Reference: {row['Reference']}
Residues: {','.join([str(i+1) for i in row['Ref_Window']])}
Sequence: {row['Ref_Sequence']}
Query: {row['Query']}
Residues: {','.join([str(i+1) for i in row['Query_Window']])}
Sequence: {row['Query_Sequence']}
"""
zip_file.writestr(f"{comp_name}/README.txt", readme)
zip_buffer.seek(0)
st.download_button(
label="πŸ“₯ Download PDB Archive (ZIP)",
data=zip_buffer.getvalue(),
file_name="aligned_structures.zip",
mime="application/zip",
help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs"
)
st.success(f"βœ… Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.")
if __name__ == "__main__":
main()