Spaces:

HouBioLab
/

MotifAlign

Sleeping

App Files Files Community

jiehou commited on Oct 27, 2025

Commit

4a5024a

verified ·

1 Parent(s): a6c9f2a

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +971 -33

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,978 @@
-import altair as alt
 import numpy as np
 import pandas as pd
-import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
 """
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+"""
+RNA Motif Structure Comparison Tool
+Streamlit app for comparing RNA motif structures with flexible residue selection
+"""
+import streamlit as st
 import numpy as np
 import pandas as pd
+from pathlib import Path
+import io
+import tempfile
+import os
+# Import our RMSD calculation functions
+from rmsd_utils import (
+    parse_residue_atoms,
+    get_backbone_sugar_and_selectbase_coords_fixed,
+    calculate_COM,
+    calculate_rotation_rmsd,
+    translate_rotate_coords
+)
+from visualization import create_structure_visualization
+# Page configuration
+st.set_page_config(
+    page_title="RNA Motif Structure Comparison",
+    page_icon="🧬",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: bold;
+        color: #1f77b4;
+        margin-bottom: 1rem;
+    }
+    .sub-header {
+        font-size: 1.2rem;
+        color: #666;
+        margin-bottom: 2rem;
+    }
+    .metric-box {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin: 0.5rem 0;
+    }
+</style>
+""", unsafe_allow_html=True)
+def save_uploaded_file(uploaded_file, directory):
+    """Save an uploaded file to a temporary directory"""
+    file_path = os.path.join(directory, uploaded_file.name)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return file_path
+def get_structure_info(pdb_path):
+    """
+    Get information about a structure's residues.
+    Args:
+        pdb_path: Path to PDB file
+    Returns:
+        List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
+    """
+    residues = parse_residue_atoms(pdb_path)
+    structure_info = []
+    for idx, res in enumerate(residues):
+        structure_info.append({
+            'index': idx,
+            'resnum': res['resnum'],
+            'resname': res['resname'],
+            'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})"
+        })
+    return structure_info
+def display_structure_selector(files, temp_dir, set_name):
+    """
+    Display structure information and allow users to select residues.
+    Args:
+        files: List of uploaded files
+        temp_dir: Temporary directory containing files
+        set_name: Name of the set (e.g., "Reference" or "Query")
+    Returns:
+        Dict mapping filename to list of selected residue indices
+    """
+    if not files:
+        return {}
+    st.subheader(f"📋 {set_name} Structure Preview & Selection")
+    selections = {}
+    for file in files:
+        file_path = os.path.join(temp_dir, file.name)
+        structure_info = get_structure_info(file_path)
+        with st.expander(f"🔍 {file.name} ({len(structure_info)} residues)"):
+            # Display residue table
+            info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
+            info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
+            info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
+            info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
+            st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
+            # Selection method
+            selection_method = st.radio(
+                f"Selection method for {file.name}",
+                ["Select by range", "Select specific residues", "Use all residues"],
+                key=f"method_{set_name}_{file.name}",
+                horizontal=True
+            )
+            selected_indices = []
+            if selection_method == "Select by range":
+                col1, col2 = st.columns(2)
+                with col1:
+                    start_idx = st.number_input(
+                        "Start index (1-based)",
+                        min_value=1,
+                        max_value=len(structure_info),
+                        value=1,
+                        key=f"start_{set_name}_{file.name}"
+                    )
+                with col2:
+                    end_idx = st.number_input(
+                        "End index (1-based, inclusive)",
+                        min_value=1,
+                        max_value=len(structure_info),
+                        value=min(4, len(structure_info)),
+                        key=f"end_{set_name}_{file.name}"
+                    )
+                if start_idx <= end_idx:
+                    selected_indices = list(range(start_idx - 1, end_idx))
+                    st.info(f"✓ Selected residues: {[i+1 for i in selected_indices]}")
+                else:
+                    st.error("Start index must be ≤ end index")
+            elif selection_method == "Select specific residues":
+                # Multi-select for specific residues
+                selected_names = st.multiselect(
+                    "Select residues",
+                    options=[info['full_name'] for info in structure_info],
+                    default=[structure_info[i]['full_name'] for i in range(min(4, len(structure_info)))],
+                    key=f"specific_{set_name}_{file.name}"
+                )
+                # Map back to indices
+                name_to_idx = {info['full_name']: info['index'] for info in structure_info}
+                selected_indices = [name_to_idx[name] for name in selected_names]
+                selected_indices.sort()
+                if selected_indices:
+                    st.info(f"✓ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
+            else:  # Use all residues
+                selected_indices = list(range(len(structure_info)))
+                st.info(f"✓ Using all {len(selected_indices)} residues")
+            # Show selected residues details
+            if selected_indices:
+                selected_df = info_df[info_df['Index (0-based)'].isin(selected_indices)]
+                st.markdown("**Selected residues:**")
+                st.dataframe(selected_df, use_container_width=True)
+            selections[file.name] = selected_indices
+    return selections
+def save_uploaded_file(uploaded_file, directory):
+    """Save an uploaded file to a temporary directory"""
+    file_path = os.path.join(directory, uploaded_file.name)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return file_path
+def extract_window_coords(residues, window_indices):
+    """
+    Extract coordinates for a specific window of residues.
+    Args:
+        residues: List of all residues
+        window_indices: List of indices to extract
+    Returns:
+        numpy array of coordinates
+    """
+    from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
+    all_coords = []
+    for idx in window_indices:
+        if idx < len(residues):
+            residue = residues[idx]
+            # Get backbone and sugar coordinates
+            backbone_coords = get_backbone_sugar_coords_from_residue(residue)
+            all_coords.extend(backbone_coords)
+            # Get base coordinates
+            base_coords = get_base_coords_from_residue(residue)
+            all_coords.extend(base_coords)
+    return np.asarray(all_coords)
+def compare_structures_with_selection(reference_files, query_files, ref_selections, query_selections, temp_dir):
+    """
+    Compare reference and query structures using user-selected residues (direct comparison).
+    Only compares structures with matching selection sizes.
+    Args:
+        reference_files: List of reference motif files
+        query_files: List of query motif files
+        ref_selections: Dict mapping filename to selected residue indices
+        query_selections: Dict mapping filename to selected residue indices
+        temp_dir: Temporary directory containing files
+    Returns:
+        DataFrame with comparison results
+    """
+    results = []
+    # Count valid comparisons
+    total_comparisons = 0
+    for ref_file in reference_files:
+        ref_indices = ref_selections.get(ref_file.name, [])
+        if len(ref_indices) < 2:
+            continue
+        for query_file in query_files:
+            query_indices = query_selections.get(query_file.name, [])
+            if len(query_indices) < 2:
+                continue
+            # Only compare if they have the same number of selected residues
+            if len(ref_indices) == len(query_indices):
+                total_comparisons += 1
+    if total_comparisons == 0:
+        st.error("No valid comparisons found. Ensure selected regions have matching sizes.")
+        return pd.DataFrame()
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    comparison_count = 0
+    for ref_file in reference_files:
+        ref_name = ref_file.name
+        ref_path = os.path.join(temp_dir, ref_name)
+        ref_indices = ref_selections.get(ref_name, [])
+        if len(ref_indices) < 2:
+            continue
+        # Parse reference motif
+        ref_residues = parse_residue_atoms(ref_path)
+        # Extract coordinates for selected residues
+        ref_coords = extract_window_coords(ref_residues, ref_indices)
+        ref_com = calculate_COM(ref_coords)
+        # Get residue description
+        ref_residue_desc = f"[{','.join([str(i+1) for i in ref_indices])}]"
+        ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_indices if i < len(ref_residues)])
+        for query_file in query_files:
+            query_name = query_file.name
+            query_path = os.path.join(temp_dir, query_name)
+            query_indices = query_selections.get(query_name, [])
+            if len(query_indices) < 2:
+                continue
+            # Only compare if same number of residues
+            if len(ref_indices) != len(query_indices):
+                continue
+            # Parse query motif
+            query_residues = parse_residue_atoms(query_path)
+            # Extract coordinates for selected residues
+            query_coords = extract_window_coords(query_residues, query_indices)
+            query_com = calculate_COM(query_coords)
+            # Get residue description
+            query_residue_desc = f"[{','.join([str(i+1) for i in query_indices])}]"
+            query_sequence = ''.join([query_residues[i]['resname'] for i in query_indices if i < len(query_residues)])
+            # Calculate RMSD
+            U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
+            if U is None or RMSD is None:
+                RMSD = 999.0
+                U = np.eye(3)
+            # Store results
+            results.append({
+                'Reference': ref_name,
+                'Ref_Residues': ref_residue_desc,
+                'Ref_Sequence': ref_sequence,
+                'Ref_Indices': ref_indices,
+                'Query': query_name,
+                'Query_Residues': query_residue_desc,
+                'Query_Sequence': query_sequence,
+                'Query_Indices': query_indices,
+                'Num_Residues': len(ref_indices),
+                'RMSD': RMSD,
+                'Rotation_Matrix': U,
+                'Ref_COM': ref_com,
+                'Query_COM': query_com,
+                'Ref_Path': ref_path,
+                'Query_Path': query_path
+            })
+            comparison_count += 1
+            progress = comparison_count / total_comparisons
+            progress_bar.progress(progress)
+            status_text.text(f"Processing: {ref_name}{ref_residue_desc} vs {query_name}{query_residue_desc}")
+    progress_bar.empty()
+    status_text.empty()
+    return pd.DataFrame(results)
+def compare_structures_with_windows(reference_files, query_files, ref_selections, query_selections,
+                                    window_size, window_type, temp_dir):
+    """
+    Compare reference and query structures using sliding windows on selected residues.
+    Allows comparison of different-sized selections.
+    Args:
+        reference_files: List of reference motif files
+        query_files: List of query motif files
+        ref_selections: Dict mapping filename to selected residue indices
+        query_selections: Dict mapping filename to selected residue indices
+        window_size: Size of comparison window
+        window_type: "contiguous" or "non-contiguous"
+        temp_dir: Temporary directory containing files
+    Returns:
+        DataFrame with comparison results
+    """
+    from itertools import combinations
+    results = []
+    def generate_windows_from_selection(selected_indices, win_size, win_type):
+        """Generate windows from selected indices"""
+        if len(selected_indices) < win_size:
+            return []
+        if win_type == "contiguous":
+            windows = []
+            for i in range(len(selected_indices) - win_size + 1):
+                windows.append(selected_indices[i:i + win_size])
+            return windows
+        else:  # non-contiguous
+            return [list(combo) for combo in combinations(selected_indices, win_size)]
+    # Count total comparisons
+    total_comparisons = 0
+    for ref_file in reference_files:
+        ref_indices = ref_selections.get(ref_file.name, [])
+        ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
+        if not ref_windows:
+            continue
+        for query_file in query_files:
+            query_indices = query_selections.get(query_file.name, [])
+            query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
+            if not query_windows:
+                continue
+            total_comparisons += len(ref_windows) * len(query_windows)
+    if total_comparisons == 0:
+        st.error(f"No valid comparisons found. Ensure selected regions have at least {window_size} residues.")
+        return pd.DataFrame()
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    comparison_count = 0
+    for ref_file in reference_files:
+        ref_name = ref_file.name
+        ref_path = os.path.join(temp_dir, ref_name)
+        ref_indices = ref_selections.get(ref_name, [])
+        # Generate windows from selected residues
+        ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
+        if not ref_windows:
+            st.warning(f"Skipping {ref_name}: selected {len(ref_indices)} residues, need at least {window_size}")
+            continue
+        # Parse reference motif
+        ref_residues = parse_residue_atoms(ref_path)
+        for ref_window in ref_windows:
+            # Extract coordinates for this window
+            ref_coords = extract_window_coords(ref_residues, ref_window)
+            ref_com = calculate_COM(ref_coords)
+            # Get descriptions
+            ref_window_desc = f"[{','.join([str(i+1) for i in ref_window])}]"
+            ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_window if i < len(ref_residues)])
+            for query_file in query_files:
+                query_name = query_file.name
+                query_path = os.path.join(temp_dir, query_name)
+                query_indices = query_selections.get(query_name, [])
+                # Generate windows from selected residues
+                query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
+                if not query_windows:
+                    continue
+                # Parse query motif
+                query_residues = parse_residue_atoms(query_path)
+                for query_window in query_windows:
+                    # Extract coordinates for this window
+                    query_coords = extract_window_coords(query_residues, query_window)
+                    query_com = calculate_COM(query_coords)
+                    # Get descriptions
+                    query_window_desc = f"[{','.join([str(i+1) for i in query_window])}]"
+                    query_sequence = ''.join([query_residues[i]['resname'] for i in query_window if i < len(query_residues)])
+                    # Calculate RMSD
+                    U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
+                    if U is None or RMSD is None:
+                        RMSD = 999.0
+                        U = np.eye(3)
+                    # Store results
+                    results.append({
+                        'Reference': ref_name,
+                        'Ref_Residues': ref_window_desc,
+                        'Ref_Sequence': ref_sequence,
+                        'Ref_Indices': ref_window,
+                        'Query': query_name,
+                        'Query_Residues': query_window_desc,
+                        'Query_Sequence': query_sequence,
+                        'Query_Indices': query_window,
+                        'Num_Residues': window_size,
+                        'RMSD': RMSD,
+                        'Rotation_Matrix': U,
+                        'Ref_COM': ref_com,
+                        'Query_COM': query_com,
+                        'Ref_Path': ref_path,
+                        'Query_Path': query_path
+                    })
+                    comparison_count += 1
+                    progress = comparison_count / total_comparisons
+                    progress_bar.progress(progress)
+                    status_text.text(f"Processing: {ref_name}{ref_window_desc} vs {query_name}{query_window_desc}")
+    progress_bar.empty()
+    status_text.empty()
+    return pd.DataFrame(results)
+def main():
+    # Header
+    st.markdown('<p class="main-header">🧬 RNA Motif Structure Comparison</p>', unsafe_allow_html=True)
+    st.markdown('<p class="sub-header">Compare RNA motifs with flexible residue selection</p>', unsafe_allow_html=True)
+    # Sidebar
+    st.sidebar.header("⚙️ Configuration")
+    # File upload
+    st.sidebar.subheader("1️⃣ Upload Structures")
+    reference_files = st.sidebar.file_uploader(
+        "Upload Reference Motif PDB files (Set A)",
+        type=['pdb', 'PDB'],
+        accept_multiple_files=True,
+        key="reference",
+        help="Upload RNA motif structures to use as reference"
+    )
+    query_files = st.sidebar.file_uploader(
+        "Upload Query Motif PDB files (Set B)",
+        type=['pdb', 'PDB'],
+        accept_multiple_files=True,
+        key="query",
+        help="Upload RNA motif structures to compare against reference"
+    )
+    # Main content area
+    if not reference_files or not query_files:
+        st.info("👈 Please upload reference and query motif PDB files to begin analysis")
+        # Show example info
+        with st.expander("ℹ️ About this tool"):
+            st.markdown("""
+            ### Purpose
+            This tool compares the 3D structures of RNA motifs with **flexible residue selection** and **multiple comparison modes**.
+            ### Workflow
+            1. **Upload PDB files** for reference and query motifs
+            2. **Preview structures** and see all residues in each file
+            3. **Select residues** to include in comparison (e.g., exclude stem bases, keep only loop)
+            4. **Choose comparison mode**:
+               - **Direct comparison**: Compare selected regions directly (must be same size)
+               - **Window-based comparison**: Generate windows from selections (handles different sizes)
+            5. **Run analysis** using RMSD-based structural alignment
+            ### Comparison Modes
+            #### Direct Comparison (Same Size)
+            - Compares your exact selections
+            - Example: You select 4 loop residues from each structure
+            - Result: Direct 4-residue vs 4-residue comparison
+            - Best for: When all structures have same-sized regions of interest
+            #### Window-Based Comparison (Different Sizes)
+            - Generates sliding windows from your selections
+            - Example: You select 4 loop residues from ref, 6 loop residues from query
+            - Set window size to 4
+            - Result: Ref's 4 residues compared against all 4-residue windows from query's 6
+            - Best for: When structures have different-sized regions but you want to find similar sub-regions
+            ### Selection Methods
+            - **By range**: Select consecutive residues (e.g., residues 3-6 for a tetraloop)
+            - **Specific residues**: Pick any combination of residues (e.g., 1,3,5,7)
+            - **All residues**: Use the entire structure
+            ### Method Details
+            - RMSD calculated using backbone, sugar, and select base atoms
+            - Base atoms mapped: purines (N9,C8,C4) ↔ pyrimidines (N1,C2,C6)
+            - Kabsch algorithm for optimal structural alignment
+            ### Example Use Cases
+            **Case 1: Extract loops from 2+4+2 structures (Direct)**
+            - All structures have 8 residues (2 stem + 4 loop + 2 stem)
+            - Select residues 3-6 for all structures (the 4-residue loop)
+            - Use "Direct comparison"
+            - Result: Compare loop vs loop directly
+            **Case 2: Compare 4-mer loop vs 6-mer loop (Window-based)**
+            - Structure A: Select residues 3-6 (4 loop residues)
+            - Structure B: Select residues 2-7 (6 loop residues)
+            - Use "Window-based comparison" with window size = 4
+            - Result: Structure A compared against 3 windows from Structure B
+            **Case 3: Find similar regions in different structures (Window-based)**
+            - Reference: Select 5 residues of interest
+            - Query: Select 10 residues from larger region
+            - Use "Window-based comparison" with window size = 5
+            - Result: Find which 5-residue window in query best matches reference
+            ### Output
+            - RMSD values for all comparisons
+            - Interactive 3D visualization of aligned structures
+            - Rotation and translation matrices
+            - Sequence information for compared regions
+            """)
+        return
+    # Create temporary directory for file processing
+    temp_dir = tempfile.mkdtemp()
+    # Save uploaded files
+    for file in reference_files:
+        save_uploaded_file(file, temp_dir)
+    for file in query_files:
+        save_uploaded_file(file, temp_dir)
+    # Display file info
+    st.markdown("---")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric("Reference Motifs", len(reference_files))
+    with col2:
+        st.metric("Query Motifs", len(query_files))
+    # Structure preview and selection
+    st.markdown("---")
+    # Get residue selections for reference and query sets
+    ref_selections = display_structure_selector(reference_files, temp_dir, "Reference")
+    st.markdown("---")
+    query_selections = display_structure_selector(query_files, temp_dir, "Query")
+    # Validate selections
+    st.markdown("---")
+    valid_selections = True
+    min_residues = 2
+    for filename, indices in ref_selections.items():
+        if len(indices) < min_residues:
+            st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
+            valid_selections = False
+    for filename, indices in query_selections.items():
+        if len(indices) < min_residues:
+            st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
+            valid_selections = False
+    # Check if all selections have the same number of residues
+    ref_lengths = set(len(indices) for indices in ref_selections.values())
+    query_lengths = set(len(indices) for indices in query_selections.values())
+    all_lengths = ref_lengths.union(query_lengths)
+    if len(all_lengths) > 1:
+        st.warning(f"⚠️ Selected regions have different sizes: {sorted(all_lengths)} residues. Only structures with matching sizes will be compared.")
+    # Run analysis button
+    st.sidebar.markdown("---")
+    st.sidebar.subheader("2️⃣ Comparison Method")
+    comparison_mode = st.sidebar.radio(
+        "How to compare structures?",
+        ["Direct comparison (same size)", "Window-based comparison (different sizes)"],
+        help="""
+        Direct: Compare selected regions directly (must have same size)
+        Window-based: Generate sliding windows for flexible comparison
+        """
+    )
+    window_size = None
+    window_type = None
+    if comparison_mode == "Window-based comparison (different sizes)":
+        st.sidebar.markdown("**Window Configuration**")
+        window_size = st.sidebar.number_input(
+            "Window Size",
+            min_value=2,
+            max_value=20,
+            value=4,
+            step=1,
+            help="Number of residues per comparison window"
+        )
+        window_type = st.sidebar.radio(
+            "Window Type",
+            ["contiguous", "non-contiguous"],
+            help="Contiguous: sliding windows. Non-contiguous: all combinations"
+        )
+    st.sidebar.markdown("---")
+    st.sidebar.subheader("3️⃣ Run Analysis")
+    if st.sidebar.button("🚀 Run Analysis", type="primary", disabled=not valid_selections):
+        if not valid_selections:
+            st.error("Please fix selection errors before running analysis")
+            return
+        with st.spinner("Analyzing structures..."):
+            if comparison_mode == "Direct comparison (same size)":
+                results_df = compare_structures_with_selection(
+                    reference_files,
+                    query_files,
+                    ref_selections,
+                    query_selections,
+                    temp_dir
+                )
+            else:  # Window-based comparison
+                results_df = compare_structures_with_windows(
+                    reference_files,
+                    query_files,
+                    ref_selections,
+                    query_selections,
+                    window_size,
+                    window_type,
+                    temp_dir
+                )
+            # Store results in session state
+            st.session_state['results_df'] = results_df
+            st.session_state['ref_selections'] = ref_selections
+            st.session_state['query_selections'] = query_selections
+            st.session_state['comparison_mode'] = comparison_mode
+            if len(results_df) > 0:
+                st.success(f"✅ Analysis complete! {len(results_df)} comparisons performed.")
+            else:
+                st.warning("⚠️ No comparisons could be performed. Check that structures meet comparison requirements.")
+    # Display results if available
+    if 'results_df' in st.session_state and len(st.session_state['results_df']) > 0:
+        results_df = st.session_state['results_df']
+        # Add RMSD threshold filter
+        st.sidebar.markdown("---")
+        st.sidebar.subheader("4️⃣ Filter Results")
+        rmsd_threshold = st.sidebar.slider(
+            "RMSD Threshold (Å)",
+            min_value=0.0,
+            max_value=5.0,
+            value=2.0,
+            step=0.1,
+            help="Only show results below this RMSD value"
+        )
+        # Show comparison mode
+        if 'comparison_mode' in st.session_state:
+            mode_display = "Direct" if "Direct" in st.session_state['comparison_mode'] else "Window-based"
+            st.sidebar.info(f"**Mode**: {mode_display}")
+        # Filter by threshold
+        filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold].copy()
+        # Summary statistics
+        st.markdown("---")
+        st.subheader("📊 Summary Statistics")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Total Comparisons", len(results_df))
+        with col2:
+            st.metric("Below Threshold", len(filtered_df))
+        with col3:
+            st.metric("Best RMSD", f"{results_df['RMSD'].min():.3f} Å")
+        with col4:
+            st.metric("Mean RMSD", f"{results_df['RMSD'].mean():.3f} Å")
+        # Results table
+        st.markdown("---")
+        st.subheader("🔍 Comparison Results")
+        # Prepare display dataframe
+        display_df = filtered_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']].copy()
+        display_df = display_df.sort_values('RMSD').reset_index(drop=True)
+        display_df['RMSD'] = display_df['RMSD'].round(3)
+        # Display with selection
+        st.dataframe(
+            display_df,
+            use_container_width=True,
+            height=300
+        )
+        # Structure selection for visualization
+        st.markdown("---")
+        st.subheader("🔬 3D Structure Visualization")
+        if len(filtered_df) > 0:
+            # Select a comparison to visualize
+            selected_idx = st.selectbox(
+                "Select a comparison to visualize:",
+                range(len(filtered_df)),
+                format_func=lambda i: f"{filtered_df.iloc[i]['Reference']}{filtered_df.iloc[i]['Ref_Residues']} ({filtered_df.iloc[i]['Ref_Sequence']}) vs {filtered_df.iloc[i]['Query']}{filtered_df.iloc[i]['Query_Residues']} ({filtered_df.iloc[i]['Query_Sequence']}) | RMSD: {filtered_df.iloc[i]['RMSD']:.3f} Å"
+            )
+            selected_row = filtered_df.iloc[selected_idx]
+            # Display RMSD info
+            st.info(f"**RMSD: {selected_row['RMSD']:.3f} Å** ({selected_row['Num_Residues']} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})")
+            # Create visualization - wider display
+            col1, col2, col3 = st.columns([0.5, 4, 0.5])
+            with col2:
+                try:
+                    viz_html = create_structure_visualization(
+                        selected_row['Ref_Path'],
+                        selected_row['Query_Path'],
+                        selected_row['Ref_Indices'],
+                        selected_row['Query_Indices'],
+                        selected_row['Rotation_Matrix'],
+                        selected_row['Ref_COM'],
+                        selected_row['Query_COM'],
+                        selected_row['RMSD']
+                    )
+                    st.components.v1.html(viz_html, height=700, scrolling=False)
+                except Exception as e:
+                    st.error(f"Error creating visualization: {str(e)}")
+            # Show transformation details
+            with st.expander("🔧 Transformation Details"):
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.markdown("**Rotation Matrix (U):**")
+                    st.dataframe(
+                        pd.DataFrame(selected_row['Rotation_Matrix']).round(4),
+                        use_container_width=True
+                    )
+                with col2:
+                    st.markdown("**Translation Vectors:**")
+                    st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]")
+                    st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]")
+            # Download aligned structures
+            with st.expander("💾 Download Structure Files"):
+                st.markdown("**Download extracted and aligned structures for external visualization**")
+                from visualization import extract_window_pdb, transform_pdb_string
+                # Extract reference window
+                ref_pdb = extract_window_pdb(
+                    selected_row['Ref_Path'],
+                    selected_row['Ref_Indices']
+                )
+                # Extract and transform query window
+                query_pdb = extract_window_pdb(
+                    selected_row['Query_Path'],
+                    selected_row['Query_Indices']
+                )
+                query_aligned_pdb = transform_pdb_string(
+                    query_pdb,
+                    selected_row['Rotation_Matrix'],
+                    selected_row['Query_COM'],
+                    selected_row['Ref_COM']
+                )
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    # Reference structure
+                    ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Indices']]))}.pdb"
+                    st.download_button(
+                        label="📥 Reference PDB",
+                        data=ref_pdb,
+                        file_name=ref_filename,
+                        mime="chemical/x-pdb",
+                        help="Original reference structure (selected residues only)"
+                    )
+                with col2:
+                    # Query structure (original position)
+                    query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
+                    st.download_button(
+                        label="📥 Query PDB (Original)",
+                        data=query_pdb,
+                        file_name=query_filename,
+                        mime="chemical/x-pdb",
+                        help="Original query structure (selected residues only)"
+                    )
+                with col3:
+                    # Query structure (aligned)
+                    query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
+                    st.download_button(
+                        label="📥 Query PDB (Aligned)",
+                        data=query_aligned_pdb,
+                        file_name=query_aligned_filename,
+                        mime="chemical/x-pdb",
+                        help="Query structure aligned to reference"
+                    )
+                st.info("💡 **Tip:** Load reference and aligned query together in PyMOL/Chimera to examine the superposition")
+        else:
+            st.warning("No comparisons below the RMSD threshold. Try increasing the threshold.")
+        # Download results
+        st.markdown("---")
+        st.subheader("💾 Export Results")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("**Export Results Table**")
+            # Prepare CSV - make sure all columns exist
+            export_columns = ['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']
+            export_df = results_df[export_columns].copy()
+            export_df = export_df.sort_values('RMSD').reset_index(drop=True)
+            csv = export_df.to_csv(index=False)
+            st.download_button(
+                label="📥 Download Results (CSV)",
+                data=csv,
+                file_name="rna_motif_comparison_results.csv",
+                mime="text/csv"
+            )
+        with col2:
+            st.markdown("**Export All Aligned Structures**")
+            if st.button("📦 Generate PDB Archive", help="Create a ZIP file with all aligned structure pairs"):
+                with st.spinner("Generating PDB files..."):
+                    import zipfile
+                    import io
+                    from visualization import extract_window_pdb, transform_pdb_string
+                    # Create ZIP file in memory
+                    zip_buffer = io.BytesIO()
+                    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                        # Process each comparison
+                        for idx, row in filtered_df.iterrows():
+                            # Create a directory name for this comparison
+                            comp_name = f"comparison_{idx:03d}_rmsd_{row['RMSD']:.3f}"
+                            # Extract reference
+                            ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Indices'])
+                            ref_filename = f"{comp_name}/reference_{row['Reference'].replace('.pdb', '')}.pdb"
+                            zip_file.writestr(ref_filename, ref_pdb)
+                            # Extract query (original)
+                            query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Indices'])
+                            query_filename = f"{comp_name}/query_original_{row['Query'].replace('.pdb', '')}.pdb"
+                            zip_file.writestr(query_filename, query_pdb)
+                            # Extract and align query
+                            query_aligned_pdb = transform_pdb_string(
+                                query_pdb,
+                                row['Rotation_Matrix'],
+                                row['Query_COM'],
+                                row['Ref_COM']
+                            )
+                            query_aligned_filename = f"{comp_name}/query_aligned_{row['Query'].replace('.pdb', '')}.pdb"
+                            zip_file.writestr(query_aligned_filename, query_aligned_pdb)
+                            # Add a README for this comparison
+                            readme_content = f"""Comparison #{idx}
+RMSD: {row['RMSD']:.3f} Å
+Residues Compared: {row['Num_Residues']}
+Reference:
+  File: {row['Reference']}
+  Residues: {row['Ref_Residues']}
+  Sequence: {row['Ref_Sequence']}
+Query:
+  File: {row['Query']}
+  Residues: {row['Query_Residues']}
+  Sequence: {row['Query_Sequence']}
+Files:
+  - reference_*.pdb: Reference structure (selected residues)
+  - query_original_*.pdb: Query structure (original position)
+  - query_aligned_*.pdb: Query structure (aligned to reference)
+To visualize in PyMOL:
+  load reference_*.pdb
+  load query_aligned_*.pdb
+To visualize in Chimera:
+  File → Open → Select both reference and query_aligned PDB files
 """
+                            readme_filename = f"{comp_name}/README.txt"
+                            zip_file.writestr(readme_filename, readme_content)
+                    zip_buffer.seek(0)
+                    st.download_button(
+                        label="📥 Download PDB Archive (ZIP)",
+                        data=zip_buffer.getvalue(),
+                        file_name="aligned_structures.zip",
+                        mime="application/zip",
+                        help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs"
+                    )
+                    st.success(f"✅ Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.")
+if __name__ == "__main__":
+    main()