Spaces:

HouBioLab
/

MotifAlign

Sleeping

App Files Files Community

jiehou commited on Oct 27, 2025

Commit

a6c9f2a

verified ·

1 Parent(s): fc2db95

Upload 3 files

Browse files

Files changed (3) hide show

app.py +978 -0
rmsd_utils.py +294 -0
visualization.py +673 -0

app.py ADDED Viewed

	@@ -0,0 +1,978 @@

+"""
+RNA Motif Structure Comparison Tool
+Streamlit app for comparing RNA motif structures with flexible residue selection
+"""
+import streamlit as st
+import numpy as np
+import pandas as pd
+from pathlib import Path
+import io
+import tempfile
+import os
+# Import our RMSD calculation functions
+from rmsd_utils import (
+    parse_residue_atoms,
+    get_backbone_sugar_and_selectbase_coords_fixed,
+    calculate_COM,
+    calculate_rotation_rmsd,
+    translate_rotate_coords
+)
+from visualization import create_structure_visualization
+# Page configuration
+st.set_page_config(
+    page_title="RNA Motif Structure Comparison",
+    page_icon="🧬",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS
+st.markdown("""
+<style>
+    .main-header {
+        font-size: 2.5rem;
+        font-weight: bold;
+        color: #1f77b4;
+        margin-bottom: 1rem;
+    }
+    .sub-header {
+        font-size: 1.2rem;
+        color: #666;
+        margin-bottom: 2rem;
+    }
+    .metric-box {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin: 0.5rem 0;
+    }
+</style>
+""", unsafe_allow_html=True)
+def save_uploaded_file(uploaded_file, directory):
+    """Save an uploaded file to a temporary directory"""
+    file_path = os.path.join(directory, uploaded_file.name)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return file_path
+def get_structure_info(pdb_path):
+    """
+    Get information about a structure's residues.
+    Args:
+        pdb_path: Path to PDB file
+    Returns:
+        List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
+    """
+    residues = parse_residue_atoms(pdb_path)
+    structure_info = []
+    for idx, res in enumerate(residues):
+        structure_info.append({
+            'index': idx,
+            'resnum': res['resnum'],
+            'resname': res['resname'],
+            'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})"
+        })
+    return structure_info
+def display_structure_selector(files, temp_dir, set_name):
+    """
+    Display structure information and allow users to select residues.
+    Args:
+        files: List of uploaded files
+        temp_dir: Temporary directory containing files
+        set_name: Name of the set (e.g., "Reference" or "Query")
+    Returns:
+        Dict mapping filename to list of selected residue indices
+    """
+    if not files:
+        return {}
+    st.subheader(f"📋 {set_name} Structure Preview & Selection")
+    selections = {}
+    for file in files:
+        file_path = os.path.join(temp_dir, file.name)
+        structure_info = get_structure_info(file_path)
+        with st.expander(f"🔍 {file.name} ({len(structure_info)} residues)"):
+            # Display residue table
+            info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
+            info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
+            info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
+            info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
+            st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
+            # Selection method
+            selection_method = st.radio(
+                f"Selection method for {file.name}",
+                ["Select by range", "Select specific residues", "Use all residues"],
+                key=f"method_{set_name}_{file.name}",
+                horizontal=True
+            )
+            selected_indices = []
+            if selection_method == "Select by range":
+                col1, col2 = st.columns(2)
+                with col1:
+                    start_idx = st.number_input(
+                        "Start index (1-based)",
+                        min_value=1,
+                        max_value=len(structure_info),
+                        value=1,
+                        key=f"start_{set_name}_{file.name}"
+                    )
+                with col2:
+                    end_idx = st.number_input(
+                        "End index (1-based, inclusive)",
+                        min_value=1,
+                        max_value=len(structure_info),
+                        value=min(4, len(structure_info)),
+                        key=f"end_{set_name}_{file.name}"
+                    )
+                if start_idx <= end_idx:
+                    selected_indices = list(range(start_idx - 1, end_idx))
+                    st.info(f"✓ Selected residues: {[i+1 for i in selected_indices]}")
+                else:
+                    st.error("Start index must be ≤ end index")
+            elif selection_method == "Select specific residues":
+                # Multi-select for specific residues
+                selected_names = st.multiselect(
+                    "Select residues",
+                    options=[info['full_name'] for info in structure_info],
+                    default=[structure_info[i]['full_name'] for i in range(min(4, len(structure_info)))],
+                    key=f"specific_{set_name}_{file.name}"
+                )
+                # Map back to indices
+                name_to_idx = {info['full_name']: info['index'] for info in structure_info}
+                selected_indices = [name_to_idx[name] for name in selected_names]
+                selected_indices.sort()
+                if selected_indices:
+                    st.info(f"✓ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
+            else:  # Use all residues
+                selected_indices = list(range(len(structure_info)))
+                st.info(f"✓ Using all {len(selected_indices)} residues")
+            # Show selected residues details
+            if selected_indices:
+                selected_df = info_df[info_df['Index (0-based)'].isin(selected_indices)]
+                st.markdown("**Selected residues:**")
+                st.dataframe(selected_df, use_container_width=True)
+            selections[file.name] = selected_indices
+    return selections
+def save_uploaded_file(uploaded_file, directory):
+    """Save an uploaded file to a temporary directory"""
+    file_path = os.path.join(directory, uploaded_file.name)
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return file_path
+def extract_window_coords(residues, window_indices):
+    """
+    Extract coordinates for a specific window of residues.
+    Args:
+        residues: List of all residues
+        window_indices: List of indices to extract
+    Returns:
+        numpy array of coordinates
+    """
+    from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
+    all_coords = []
+    for idx in window_indices:
+        if idx < len(residues):
+            residue = residues[idx]
+            # Get backbone and sugar coordinates
+            backbone_coords = get_backbone_sugar_coords_from_residue(residue)
+            all_coords.extend(backbone_coords)
+            # Get base coordinates
+            base_coords = get_base_coords_from_residue(residue)
+            all_coords.extend(base_coords)
+    return np.asarray(all_coords)
+def compare_structures_with_selection(reference_files, query_files, ref_selections, query_selections, temp_dir):
+    """
+    Compare reference and query structures using user-selected residues (direct comparison).
+    Only compares structures with matching selection sizes.
+    Args:
+        reference_files: List of reference motif files
+        query_files: List of query motif files
+        ref_selections: Dict mapping filename to selected residue indices
+        query_selections: Dict mapping filename to selected residue indices
+        temp_dir: Temporary directory containing files
+    Returns:
+        DataFrame with comparison results
+    """
+    results = []
+    # Count valid comparisons
+    total_comparisons = 0
+    for ref_file in reference_files:
+        ref_indices = ref_selections.get(ref_file.name, [])
+        if len(ref_indices) < 2:
+            continue
+        for query_file in query_files:
+            query_indices = query_selections.get(query_file.name, [])
+            if len(query_indices) < 2:
+                continue
+            # Only compare if they have the same number of selected residues
+            if len(ref_indices) == len(query_indices):
+                total_comparisons += 1
+    if total_comparisons == 0:
+        st.error("No valid comparisons found. Ensure selected regions have matching sizes.")
+        return pd.DataFrame()
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    comparison_count = 0
+    for ref_file in reference_files:
+        ref_name = ref_file.name
+        ref_path = os.path.join(temp_dir, ref_name)
+        ref_indices = ref_selections.get(ref_name, [])
+        if len(ref_indices) < 2:
+            continue
+        # Parse reference motif
+        ref_residues = parse_residue_atoms(ref_path)
+        # Extract coordinates for selected residues
+        ref_coords = extract_window_coords(ref_residues, ref_indices)
+        ref_com = calculate_COM(ref_coords)
+        # Get residue description
+        ref_residue_desc = f"[{','.join([str(i+1) for i in ref_indices])}]"
+        ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_indices if i < len(ref_residues)])
+        for query_file in query_files:
+            query_name = query_file.name
+            query_path = os.path.join(temp_dir, query_name)
+            query_indices = query_selections.get(query_name, [])
+            if len(query_indices) < 2:
+                continue
+            # Only compare if same number of residues
+            if len(ref_indices) != len(query_indices):
+                continue
+            # Parse query motif
+            query_residues = parse_residue_atoms(query_path)
+            # Extract coordinates for selected residues
+            query_coords = extract_window_coords(query_residues, query_indices)
+            query_com = calculate_COM(query_coords)
+            # Get residue description
+            query_residue_desc = f"[{','.join([str(i+1) for i in query_indices])}]"
+            query_sequence = ''.join([query_residues[i]['resname'] for i in query_indices if i < len(query_residues)])
+            # Calculate RMSD
+            U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
+            if U is None or RMSD is None:
+                RMSD = 999.0
+                U = np.eye(3)
+            # Store results
+            results.append({
+                'Reference': ref_name,
+                'Ref_Residues': ref_residue_desc,
+                'Ref_Sequence': ref_sequence,
+                'Ref_Indices': ref_indices,
+                'Query': query_name,
+                'Query_Residues': query_residue_desc,
+                'Query_Sequence': query_sequence,
+                'Query_Indices': query_indices,
+                'Num_Residues': len(ref_indices),
+                'RMSD': RMSD,
+                'Rotation_Matrix': U,
+                'Ref_COM': ref_com,
+                'Query_COM': query_com,
+                'Ref_Path': ref_path,
+                'Query_Path': query_path
+            })
+            comparison_count += 1
+            progress = comparison_count / total_comparisons
+            progress_bar.progress(progress)
+            status_text.text(f"Processing: {ref_name}{ref_residue_desc} vs {query_name}{query_residue_desc}")
+    progress_bar.empty()
+    status_text.empty()
+    return pd.DataFrame(results)
+def compare_structures_with_windows(reference_files, query_files, ref_selections, query_selections,
+                                    window_size, window_type, temp_dir):
+    """
+    Compare reference and query structures using sliding windows on selected residues.
+    Allows comparison of different-sized selections.
+    Args:
+        reference_files: List of reference motif files
+        query_files: List of query motif files
+        ref_selections: Dict mapping filename to selected residue indices
+        query_selections: Dict mapping filename to selected residue indices
+        window_size: Size of comparison window
+        window_type: "contiguous" or "non-contiguous"
+        temp_dir: Temporary directory containing files
+    Returns:
+        DataFrame with comparison results
+    """
+    from itertools import combinations
+    results = []
+    def generate_windows_from_selection(selected_indices, win_size, win_type):
+        """Generate windows from selected indices"""
+        if len(selected_indices) < win_size:
+            return []
+        if win_type == "contiguous":
+            windows = []
+            for i in range(len(selected_indices) - win_size + 1):
+                windows.append(selected_indices[i:i + win_size])
+            return windows
+        else:  # non-contiguous
+            return [list(combo) for combo in combinations(selected_indices, win_size)]
+    # Count total comparisons
+    total_comparisons = 0
+    for ref_file in reference_files:
+        ref_indices = ref_selections.get(ref_file.name, [])
+        ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
+        if not ref_windows:
+            continue
+        for query_file in query_files:
+            query_indices = query_selections.get(query_file.name, [])
+            query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
+            if not query_windows:
+                continue
+            total_comparisons += len(ref_windows) * len(query_windows)
+    if total_comparisons == 0:
+        st.error(f"No valid comparisons found. Ensure selected regions have at least {window_size} residues.")
+        return pd.DataFrame()
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    comparison_count = 0
+    for ref_file in reference_files:
+        ref_name = ref_file.name
+        ref_path = os.path.join(temp_dir, ref_name)
+        ref_indices = ref_selections.get(ref_name, [])
+        # Generate windows from selected residues
+        ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
+        if not ref_windows:
+            st.warning(f"Skipping {ref_name}: selected {len(ref_indices)} residues, need at least {window_size}")
+            continue
+        # Parse reference motif
+        ref_residues = parse_residue_atoms(ref_path)
+        for ref_window in ref_windows:
+            # Extract coordinates for this window
+            ref_coords = extract_window_coords(ref_residues, ref_window)
+            ref_com = calculate_COM(ref_coords)
+            # Get descriptions
+            ref_window_desc = f"[{','.join([str(i+1) for i in ref_window])}]"
+            ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_window if i < len(ref_residues)])
+            for query_file in query_files:
+                query_name = query_file.name
+                query_path = os.path.join(temp_dir, query_name)
+                query_indices = query_selections.get(query_name, [])
+                # Generate windows from selected residues
+                query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
+                if not query_windows:
+                    continue
+                # Parse query motif
+                query_residues = parse_residue_atoms(query_path)
+                for query_window in query_windows:
+                    # Extract coordinates for this window
+                    query_coords = extract_window_coords(query_residues, query_window)
+                    query_com = calculate_COM(query_coords)
+                    # Get descriptions
+                    query_window_desc = f"[{','.join([str(i+1) for i in query_window])}]"
+                    query_sequence = ''.join([query_residues[i]['resname'] for i in query_window if i < len(query_residues)])
+                    # Calculate RMSD
+                    U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
+                    if U is None or RMSD is None:
+                        RMSD = 999.0
+                        U = np.eye(3)
+                    # Store results
+                    results.append({
+                        'Reference': ref_name,
+                        'Ref_Residues': ref_window_desc,
+                        'Ref_Sequence': ref_sequence,
+                        'Ref_Indices': ref_window,
+                        'Query': query_name,
+                        'Query_Residues': query_window_desc,
+                        'Query_Sequence': query_sequence,
+                        'Query_Indices': query_window,
+                        'Num_Residues': window_size,
+                        'RMSD': RMSD,
+                        'Rotation_Matrix': U,
+                        'Ref_COM': ref_com,
+                        'Query_COM': query_com,
+                        'Ref_Path': ref_path,
+                        'Query_Path': query_path
+                    })
+                    comparison_count += 1
+                    progress = comparison_count / total_comparisons
+                    progress_bar.progress(progress)
+                    status_text.text(f"Processing: {ref_name}{ref_window_desc} vs {query_name}{query_window_desc}")
+    progress_bar.empty()
+    status_text.empty()
+    return pd.DataFrame(results)
+def main():
+    # Header
+    st.markdown('<p class="main-header">🧬 RNA Motif Structure Comparison</p>', unsafe_allow_html=True)
+    st.markdown('<p class="sub-header">Compare RNA motifs with flexible residue selection</p>', unsafe_allow_html=True)
+    # Sidebar
+    st.sidebar.header("⚙️ Configuration")
+    # File upload
+    st.sidebar.subheader("1️⃣ Upload Structures")
+    reference_files = st.sidebar.file_uploader(
+        "Upload Reference Motif PDB files (Set A)",
+        type=['pdb', 'PDB'],
+        accept_multiple_files=True,
+        key="reference",
+        help="Upload RNA motif structures to use as reference"
+    )
+    query_files = st.sidebar.file_uploader(
+        "Upload Query Motif PDB files (Set B)",
+        type=['pdb', 'PDB'],
+        accept_multiple_files=True,
+        key="query",
+        help="Upload RNA motif structures to compare against reference"
+    )
+    # Main content area
+    if not reference_files or not query_files:
+        st.info("👈 Please upload reference and query motif PDB files to begin analysis")
+        # Show example info
+        with st.expander("ℹ️ About this tool"):
+            st.markdown("""
+            ### Purpose
+            This tool compares the 3D structures of RNA motifs with **flexible residue selection** and **multiple comparison modes**.
+            ### Workflow
+            1. **Upload PDB files** for reference and query motifs
+            2. **Preview structures** and see all residues in each file
+            3. **Select residues** to include in comparison (e.g., exclude stem bases, keep only loop)
+            4. **Choose comparison mode**:
+               - **Direct comparison**: Compare selected regions directly (must be same size)
+               - **Window-based comparison**: Generate windows from selections (handles different sizes)
+            5. **Run analysis** using RMSD-based structural alignment
+            ### Comparison Modes
+            #### Direct Comparison (Same Size)
+            - Compares your exact selections
+            - Example: You select 4 loop residues from each structure
+            - Result: Direct 4-residue vs 4-residue comparison
+            - Best for: When all structures have same-sized regions of interest
+            #### Window-Based Comparison (Different Sizes)
+            - Generates sliding windows from your selections
+            - Example: You select 4 loop residues from ref, 6 loop residues from query
+            - Set window size to 4
+            - Result: Ref's 4 residues compared against all 4-residue windows from query's 6
+            - Best for: When structures have different-sized regions but you want to find similar sub-regions
+            ### Selection Methods
+            - **By range**: Select consecutive residues (e.g., residues 3-6 for a tetraloop)
+            - **Specific residues**: Pick any combination of residues (e.g., 1,3,5,7)
+            - **All residues**: Use the entire structure
+            ### Method Details
+            - RMSD calculated using backbone, sugar, and select base atoms
+            - Base atoms mapped: purines (N9,C8,C4) ↔ pyrimidines (N1,C2,C6)
+            - Kabsch algorithm for optimal structural alignment
+            ### Example Use Cases
+            **Case 1: Extract loops from 2+4+2 structures (Direct)**
+            - All structures have 8 residues (2 stem + 4 loop + 2 stem)
+            - Select residues 3-6 for all structures (the 4-residue loop)
+            - Use "Direct comparison"
+            - Result: Compare loop vs loop directly
+            **Case 2: Compare 4-mer loop vs 6-mer loop (Window-based)**
+            - Structure A: Select residues 3-6 (4 loop residues)
+            - Structure B: Select residues 2-7 (6 loop residues)
+            - Use "Window-based comparison" with window size = 4
+            - Result: Structure A compared against 3 windows from Structure B
+            **Case 3: Find similar regions in different structures (Window-based)**
+            - Reference: Select 5 residues of interest
+            - Query: Select 10 residues from larger region
+            - Use "Window-based comparison" with window size = 5
+            - Result: Find which 5-residue window in query best matches reference
+            ### Output
+            - RMSD values for all comparisons
+            - Interactive 3D visualization of aligned structures
+            - Rotation and translation matrices
+            - Sequence information for compared regions
+            """)
+        return
+    # Create temporary directory for file processing
+    temp_dir = tempfile.mkdtemp()
+    # Save uploaded files
+    for file in reference_files:
+        save_uploaded_file(file, temp_dir)
+    for file in query_files:
+        save_uploaded_file(file, temp_dir)
+    # Display file info
+    st.markdown("---")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.metric("Reference Motifs", len(reference_files))
+    with col2:
+        st.metric("Query Motifs", len(query_files))
+    # Structure preview and selection
+    st.markdown("---")
+    # Get residue selections for reference and query sets
+    ref_selections = display_structure_selector(reference_files, temp_dir, "Reference")
+    st.markdown("---")
+    query_selections = display_structure_selector(query_files, temp_dir, "Query")
+    # Validate selections
+    st.markdown("---")
+    valid_selections = True
+    min_residues = 2
+    for filename, indices in ref_selections.items():
+        if len(indices) < min_residues:
+            st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
+            valid_selections = False
+    for filename, indices in query_selections.items():
+        if len(indices) < min_residues:
+            st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
+            valid_selections = False
+    # Check if all selections have the same number of residues
+    ref_lengths = set(len(indices) for indices in ref_selections.values())
+    query_lengths = set(len(indices) for indices in query_selections.values())
+    all_lengths = ref_lengths.union(query_lengths)
+    if len(all_lengths) > 1:
+        st.warning(f"⚠️ Selected regions have different sizes: {sorted(all_lengths)} residues. Only structures with matching sizes will be compared.")
+    # Run analysis button
+    st.sidebar.markdown("---")
+    st.sidebar.subheader("2️⃣ Comparison Method")
+    comparison_mode = st.sidebar.radio(
+        "How to compare structures?",
+        ["Direct comparison (same size)", "Window-based comparison (different sizes)"],
+        help="""
+        Direct: Compare selected regions directly (must have same size)
+        Window-based: Generate sliding windows for flexible comparison
+        """
+    )
+    window_size = None
+    window_type = None
+    if comparison_mode == "Window-based comparison (different sizes)":
+        st.sidebar.markdown("**Window Configuration**")
+        window_size = st.sidebar.number_input(
+            "Window Size",
+            min_value=2,
+            max_value=20,
+            value=4,
+            step=1,
+            help="Number of residues per comparison window"
+        )
+        window_type = st.sidebar.radio(
+            "Window Type",
+            ["contiguous", "non-contiguous"],
+            help="Contiguous: sliding windows. Non-contiguous: all combinations"
+        )
+    st.sidebar.markdown("---")
+    st.sidebar.subheader("3️⃣ Run Analysis")
+    if st.sidebar.button("🚀 Run Analysis", type="primary", disabled=not valid_selections):
+        if not valid_selections:
+            st.error("Please fix selection errors before running analysis")
+            return
+        with st.spinner("Analyzing structures..."):
+            if comparison_mode == "Direct comparison (same size)":
+                results_df = compare_structures_with_selection(
+                    reference_files,
+                    query_files,
+                    ref_selections,
+                    query_selections,
+                    temp_dir
+                )
+            else:  # Window-based comparison
+                results_df = compare_structures_with_windows(
+                    reference_files,
+                    query_files,
+                    ref_selections,
+                    query_selections,
+                    window_size,
+                    window_type,
+                    temp_dir
+                )
+            # Store results in session state
+            st.session_state['results_df'] = results_df
+            st.session_state['ref_selections'] = ref_selections
+            st.session_state['query_selections'] = query_selections
+            st.session_state['comparison_mode'] = comparison_mode
+            if len(results_df) > 0:
+                st.success(f"✅ Analysis complete! {len(results_df)} comparisons performed.")
+            else:
+                st.warning("⚠️ No comparisons could be performed. Check that structures meet comparison requirements.")
+    # Display results if available
+    if 'results_df' in st.session_state and len(st.session_state['results_df']) > 0:
+        results_df = st.session_state['results_df']
+        # Add RMSD threshold filter
+        st.sidebar.markdown("---")
+        st.sidebar.subheader("4️⃣ Filter Results")
+        rmsd_threshold = st.sidebar.slider(
+            "RMSD Threshold (Å)",
+            min_value=0.0,
+            max_value=5.0,
+            value=2.0,
+            step=0.1,
+            help="Only show results below this RMSD value"
+        )
+        # Show comparison mode
+        if 'comparison_mode' in st.session_state:
+            mode_display = "Direct" if "Direct" in st.session_state['comparison_mode'] else "Window-based"
+            st.sidebar.info(f"**Mode**: {mode_display}")
+        # Filter by threshold
+        filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold].copy()
+        # Summary statistics
+        st.markdown("---")
+        st.subheader("📊 Summary Statistics")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Total Comparisons", len(results_df))
+        with col2:
+            st.metric("Below Threshold", len(filtered_df))
+        with col3:
+            st.metric("Best RMSD", f"{results_df['RMSD'].min():.3f} Å")
+        with col4:
+            st.metric("Mean RMSD", f"{results_df['RMSD'].mean():.3f} Å")
+        # Results table
+        st.markdown("---")
+        st.subheader("🔍 Comparison Results")
+        # Prepare display dataframe
+        display_df = filtered_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']].copy()
+        display_df = display_df.sort_values('RMSD').reset_index(drop=True)
+        display_df['RMSD'] = display_df['RMSD'].round(3)
+        # Display with selection
+        st.dataframe(
+            display_df,
+            use_container_width=True,
+            height=300
+        )
+        # Structure selection for visualization
+        st.markdown("---")
+        st.subheader("🔬 3D Structure Visualization")
+        if len(filtered_df) > 0:
+            # Select a comparison to visualize
+            selected_idx = st.selectbox(
+                "Select a comparison to visualize:",
+                range(len(filtered_df)),
+                format_func=lambda i: f"{filtered_df.iloc[i]['Reference']}{filtered_df.iloc[i]['Ref_Residues']} ({filtered_df.iloc[i]['Ref_Sequence']}) vs {filtered_df.iloc[i]['Query']}{filtered_df.iloc[i]['Query_Residues']} ({filtered_df.iloc[i]['Query_Sequence']}) | RMSD: {filtered_df.iloc[i]['RMSD']:.3f} Å"
+            )
+            selected_row = filtered_df.iloc[selected_idx]
+            # Display RMSD info
+            st.info(f"**RMSD: {selected_row['RMSD']:.3f} Å** ({selected_row['Num_Residues']} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})")
+            # Create visualization - wider display
+            col1, col2, col3 = st.columns([0.5, 4, 0.5])
+            with col2:
+                try:
+                    viz_html = create_structure_visualization(
+                        selected_row['Ref_Path'],
+                        selected_row['Query_Path'],
+                        selected_row['Ref_Indices'],
+                        selected_row['Query_Indices'],
+                        selected_row['Rotation_Matrix'],
+                        selected_row['Ref_COM'],
+                        selected_row['Query_COM'],
+                        selected_row['RMSD']
+                    )
+                    st.components.v1.html(viz_html, height=700, scrolling=False)
+                except Exception as e:
+                    st.error(f"Error creating visualization: {str(e)}")
+            # Show transformation details
+            with st.expander("🔧 Transformation Details"):
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.markdown("**Rotation Matrix (U):**")
+                    st.dataframe(
+                        pd.DataFrame(selected_row['Rotation_Matrix']).round(4),
+                        use_container_width=True
+                    )
+                with col2:
+                    st.markdown("**Translation Vectors:**")
+                    st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]")
+                    st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]")
+            # Download aligned structures
+            with st.expander("💾 Download Structure Files"):
+                st.markdown("**Download extracted and aligned structures for external visualization**")
+                from visualization import extract_window_pdb, transform_pdb_string
+                # Extract reference window
+                ref_pdb = extract_window_pdb(
+                    selected_row['Ref_Path'],
+                    selected_row['Ref_Indices']
+                )
+                # Extract and transform query window
+                query_pdb = extract_window_pdb(
+                    selected_row['Query_Path'],
+                    selected_row['Query_Indices']
+                )
+                query_aligned_pdb = transform_pdb_string(
+                    query_pdb,
+                    selected_row['Rotation_Matrix'],
+                    selected_row['Query_COM'],
+                    selected_row['Ref_COM']
+                )
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    # Reference structure
+                    ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Indices']]))}.pdb"
+                    st.download_button(
+                        label="📥 Reference PDB",
+                        data=ref_pdb,
+                        file_name=ref_filename,
+                        mime="chemical/x-pdb",
+                        help="Original reference structure (selected residues only)"
+                    )
+                with col2:
+                    # Query structure (original position)
+                    query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
+                    st.download_button(
+                        label="📥 Query PDB (Original)",
+                        data=query_pdb,
+                        file_name=query_filename,
+                        mime="chemical/x-pdb",
+                        help="Original query structure (selected residues only)"
+                    )
+                with col3:
+                    # Query structure (aligned)
+                    query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
+                    st.download_button(
+                        label="📥 Query PDB (Aligned)",
+                        data=query_aligned_pdb,
+                        file_name=query_aligned_filename,
+                        mime="chemical/x-pdb",
+                        help="Query structure aligned to reference"
+                    )
+                st.info("💡 **Tip:** Load reference and aligned query together in PyMOL/Chimera to examine the superposition")
+        else:
+            st.warning("No comparisons below the RMSD threshold. Try increasing the threshold.")
+        # Download results
+        st.markdown("---")
+        st.subheader("💾 Export Results")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.markdown("**Export Results Table**")
+            # Prepare CSV - make sure all columns exist
+            export_columns = ['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']
+            export_df = results_df[export_columns].copy()
+            export_df = export_df.sort_values('RMSD').reset_index(drop=True)
+            csv = export_df.to_csv(index=False)
+            st.download_button(
+                label="📥 Download Results (CSV)",
+                data=csv,
+                file_name="rna_motif_comparison_results.csv",
+                mime="text/csv"
+            )
+        with col2:
+            st.markdown("**Export All Aligned Structures**")
+            if st.button("📦 Generate PDB Archive", help="Create a ZIP file with all aligned structure pairs"):
+                with st.spinner("Generating PDB files..."):
+                    import zipfile
+                    import io
+                    from visualization import extract_window_pdb, transform_pdb_string
+                    # Create ZIP file in memory
+                    zip_buffer = io.BytesIO()
+                    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                        # Process each comparison
+                        for idx, row in filtered_df.iterrows():
+                            # Create a directory name for this comparison
+                            comp_name = f"comparison_{idx:03d}_rmsd_{row['RMSD']:.3f}"
+                            # Extract reference
+                            ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Indices'])
+                            ref_filename = f"{comp_name}/reference_{row['Reference'].replace('.pdb', '')}.pdb"
+                            zip_file.writestr(ref_filename, ref_pdb)
+                            # Extract query (original)
+                            query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Indices'])
+                            query_filename = f"{comp_name}/query_original_{row['Query'].replace('.pdb', '')}.pdb"
+                            zip_file.writestr(query_filename, query_pdb)
+                            # Extract and align query
+                            query_aligned_pdb = transform_pdb_string(
+                                query_pdb,
+                                row['Rotation_Matrix'],
+                                row['Query_COM'],
+                                row['Ref_COM']
+                            )
+                            query_aligned_filename = f"{comp_name}/query_aligned_{row['Query'].replace('.pdb', '')}.pdb"
+                            zip_file.writestr(query_aligned_filename, query_aligned_pdb)
+                            # Add a README for this comparison
+                            readme_content = f"""Comparison #{idx}
+RMSD: {row['RMSD']:.3f} Å
+Residues Compared: {row['Num_Residues']}
+Reference:
+  File: {row['Reference']}
+  Residues: {row['Ref_Residues']}
+  Sequence: {row['Ref_Sequence']}
+Query:
+  File: {row['Query']}
+  Residues: {row['Query_Residues']}
+  Sequence: {row['Query_Sequence']}
+Files:
+  - reference_*.pdb: Reference structure (selected residues)
+  - query_original_*.pdb: Query structure (original position)
+  - query_aligned_*.pdb: Query structure (aligned to reference)
+To visualize in PyMOL:
+  load reference_*.pdb
+  load query_aligned_*.pdb
+To visualize in Chimera:
+  File → Open → Select both reference and query_aligned PDB files
+"""
+                            readme_filename = f"{comp_name}/README.txt"
+                            zip_file.writestr(readme_filename, readme_content)
+                    zip_buffer.seek(0)
+                    st.download_button(
+                        label="📥 Download PDB Archive (ZIP)",
+                        data=zip_buffer.getvalue(),
+                        file_name="aligned_structures.zip",
+                        mime="application/zip",
+                        help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs"
+                    )
+                    st.success(f"✅ Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.")
+if __name__ == "__main__":
+    main()

rmsd_utils.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""
+RMSD Calculation Utilities for RNA Structure Comparison
+Fixed version with explicit purine-pyrimidine atom mapping
+"""
+import numpy as np
+def parse_residue_atoms(fname):
+    """
+    Parse PDB file and organize atoms by residue.
+    Args:
+        fname: Path to PDB file
+    Returns:
+        List of residues, where each residue is a dict with:
+        - 'resnum': residue number
+        - 'resname': residue name (A, C, G, U)
+        - 'atoms': dict of {atom_name: [x, y, z]}
+    """
+    with open(fname) as f:
+        content = f.readlines()
+    residues = {}
+    for line in content:
+        record = line[0:6].strip()
+        if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
+            atomname = line[12:16].strip()
+            resname = line[17:20].strip()  # residue name (A, C, G, U)
+            resnum = int(line[22:26].strip())  # residue number
+            x = float(line[30:38].strip())
+            y = float(line[38:46].strip())
+            z = float(line[46:54].strip())
+            # Initialize residue if not seen before
+            if resnum not in residues:
+                residues[resnum] = {
+                    'resnum': resnum,
+                    'resname': resname,
+                    'atoms': {}
+                }
+            residues[resnum]['atoms'][atomname] = [x, y, z]
+    # Convert to sorted list by residue number
+    sorted_residues = [residues[k] for k in sorted(residues.keys())]
+    return sorted_residues
+def get_backbone_sugar_coords_from_residue(residue):
+    """
+    Extract backbone and sugar atom coordinates from a residue dict.
+    Args:
+        residue: Dict with 'atoms' key containing atom coordinates
+    Returns:
+        List of [x, y, z] coordinates in consistent order
+    """
+    # Define the order of backbone and sugar atoms
+    backbone_sugar_atoms = ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"]
+    coords = []
+    atoms = residue['atoms']
+    for atom_name in backbone_sugar_atoms:
+        if atom_name in atoms:
+            coords.append(atoms[atom_name])
+    return coords
+def get_base_coords_from_residue(residue):
+    """
+    Extract the three key base atom coordinates from a residue.
+    Returns list of [x, y, z] coordinates in the correct order:
+    - For purines (A, G): N9, C8, C4
+    - For pyrimidines (C, U): N1, C2, C6
+    These are ordered to enable proper purine-pyrimidine mapping:
+    N9 <-> N1, C8 <-> C2, C4 <-> C6
+    Args:
+        residue: Dict with 'resname' and 'atoms' keys
+    Returns:
+        List of [x, y, z] coordinates
+    """
+    resname = residue['resname']
+    atoms = residue['atoms']
+    coords = []
+    if resname in ['A', 'G']:  # Purines
+        base_atoms = ['N9', 'C8', 'C4']
+    elif resname in ['C', 'U']:  # Pyrimidines
+        base_atoms = ['N1', 'C2', 'C6']
+    else:
+        # Unknown residue type
+        return coords
+    for atom_name in base_atoms:
+        if atom_name in atoms:
+            coords.append(atoms[atom_name])
+    return coords
+def get_backbone_sugar_and_selectbase_coords_fixed(fname):
+    """
+    Extract backbone, sugar, and select base atom coordinates.
+    Ensures proper ordering for purine-pyrimidine mapping.
+    For each residue, extracts:
+    1. All backbone and sugar atoms (in consistent order)
+    2. Three base atoms:
+       - Purines (A, G): N9, C8, C4
+       - Pyrimidines (C, U): N1, C2, C6
+    This ordering ensures that when comparing structures with different sequences,
+    the atoms are correctly mapped (N9<->N1, C8<->C2, C4<->C6).
+    Args:
+        fname: Path to PDB file
+    Returns:
+        Numpy array of coordinates
+    """
+    residues = parse_residue_atoms(fname)
+    all_coords = []
+    for residue in residues:
+        # Get backbone and sugar coordinates
+        backbone_coords = get_backbone_sugar_coords_from_residue(residue)
+        all_coords.extend(backbone_coords)
+        # Get base coordinates
+        base_coords = get_base_coords_from_residue(residue)
+        all_coords.extend(base_coords)
+    return np.asarray(all_coords)
+def calculate_COM(coords):
+    """
+    Calculate center of mass (geometric center) of coordinates.
+    Args:
+        coords: Numpy array of shape (N, 3)
+    Returns:
+        Numpy array of shape (3,) representing the center of mass
+    """
+    L = coords.shape[0]
+    COM = np.sum(coords, axis=0) / float(L)
+    return COM
+def calculate_rotation_rmsd(coords1, coords2, COM1, COM2):
+    """
+    Calculate rotation matrix and RMSD using Kabsch algorithm.
+    Args:
+        coords1: Coordinates of structure 1 (N, 3)
+        coords2: Coordinates of structure 2 (N, 3)
+        COM1: Center of mass of structure 1 (3,)
+        COM2: Center of mass of structure 2 (3,)
+    Returns:
+        U: Rotation matrix (3, 3)
+        RMSD: Root mean square deviation (float)
+    """
+    sel1 = coords1 - COM1
+    sel2 = coords2 - COM2
+    # Check for consistency
+    if len(sel1) != len(sel2):
+        return None, None
+    L = len(sel1)
+    assert L > 0
+    # Initial residual, see Kabsch.
+    R0 = np.sum(np.sum(sel1 * sel1, axis=0), axis=0) + np.sum(np.sum(sel2 * sel2, axis=0), axis=0)
+    # Calculate the components of the rotation matrix (V,W)
+    # S is used to calculate the error (RMSD)
+    V, S, W = np.linalg.svd(np.dot(sel2.T, sel1))
+    # Calculate if the product of the determinants is + or -
+    # if negative reflect the rotation matrix components prior
+    # determining the rotation matrix (U)
+    reflect = float(str(float(np.linalg.det(V) * np.linalg.det(W))))
+    if reflect == -1.0:
+        S[-1] = -S[-1]
+        V[:, -1] = -V[:, -1]
+    U = np.dot(V, W)
+    # Calculate the RMSD using sigma from the SVD calculation above
+    RMSD = R0 - (2.0 * sum(S))
+    RMSD = np.sqrt(abs(RMSD / L))
+    return U, RMSD
+def translate_rotate_coords(coords, COM, U=None):
+    """
+    Translate and optionally rotate coordinates.
+    Args:
+        coords: Coordinates to transform (N, 3)
+        COM: Center of mass to translate by (3,)
+        U: Rotation matrix (3, 3), optional
+    Returns:
+        Transformed coordinates (N, 3)
+    """
+    # Translate only
+    if U is None:
+        return coords - COM
+    # Translate and rotate
+    return np.dot((coords - COM), U)
+def get_all_atom_coords(fname):
+    """
+    Get all atom coordinates from a PDB file.
+    Args:
+        fname: Path to PDB file
+    Returns:
+        Numpy array of coordinates (N, 3)
+    """
+    with open(fname) as f:
+        content = f.readlines()
+    coords = []
+    for line in content:
+        record = line[0:6].strip()
+        if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
+            x = float(line[30:38].strip())
+            y = float(line[38:46].strip())
+            z = float(line[46:54].strip())
+            coords.append([x, y, z])
+    return np.asarray(coords)
+def apply_transformation_to_pdb(fname, U, COM, output_fname):
+    """
+    Apply rotation and translation to a PDB file and save result.
+    Args:
+        fname: Input PDB file path
+        U: Rotation matrix (3, 3)
+        COM: Center of mass to translate from (3,)
+        output_fname: Output PDB file path
+    """
+    with open(fname) as f:
+        lines = f.readlines()
+    with open(output_fname, 'w') as f:
+        for line in lines:
+            record = line[0:6].strip()
+            if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
+                # Extract coordinates
+                x = float(line[30:38].strip())
+                y = float(line[38:46].strip())
+                z = float(line[46:54].strip())
+                # Transform
+                coord = np.array([x, y, z])
+                new_coord = np.dot((coord - COM), U)
+                # Write transformed line
+                new_line = (
+                    line[:30] +
+                    f"{new_coord[0]:8.3f}" +
+                    f"{new_coord[1]:8.3f}" +
+                    f"{new_coord[2]:8.3f}" +
+                    line[54:]
+                )
+                f.write(new_line)
+            else:
+                f.write(line)

visualization.py ADDED Viewed

	@@ -0,0 +1,673 @@

+"""
+3D Visualization Module for RNA Structure Comparison
+Uses py3Dmol for interactive molecular visualization
+"""
+import numpy as np
+from rmsd_utils import (
+    parse_residue_atoms,
+    translate_rotate_coords,
+    calculate_COM,
+    get_backbone_sugar_and_selectbase_coords_fixed
+)
+def create_structure_visualization(ref_path, query_path, ref_window_indices, query_window_indices,
+                                   rotation_matrix, ref_com, query_com, rmsd=None):
+    """
+    Create an interactive 3D visualization of aligned structures.
+    Args:
+        ref_path: Path to reference motif PDB file
+        query_path: Path to query motif PDB file
+        ref_window_indices: List of residue indices for the reference window
+        query_window_indices: List of residue indices for the query window
+        rotation_matrix: Rotation matrix from RMSD calculation
+        ref_com: Center of mass of reference window
+        query_com: Center of mass of query window
+        rmsd: RMSD value (optional, for display)
+    Returns:
+        HTML string containing the py3Dmol visualization
+    """
+    # Read PDB files
+    with open(ref_path) as f:
+        ref_pdb = f.read()
+    with open(query_path) as f:
+        query_pdb_full = f.read()
+    # Extract only the window residues from both structures
+    ref_residues = parse_residue_atoms(ref_path)
+    query_residues = parse_residue_atoms(query_path)
+    ref_window_pdb = extract_window_pdb(ref_path, ref_window_indices)
+    query_window_pdb = extract_window_pdb(query_path, query_window_indices)
+    # Parse window coordinates for transformation
+    from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
+    ref_window_coords = []
+    for idx in ref_window_indices:
+        if idx < len(ref_residues):
+            residue = ref_residues[idx]
+            backbone_coords = get_backbone_sugar_coords_from_residue(residue)
+            ref_window_coords.extend(backbone_coords)
+            base_coords = get_base_coords_from_residue(residue)
+            ref_window_coords.extend(base_coords)
+    ref_window_coords = np.asarray(ref_window_coords)
+    query_window_coords = []
+    for idx in query_window_indices:
+        if idx < len(query_residues):
+            residue = query_residues[idx]
+            backbone_coords = get_backbone_sugar_coords_from_residue(residue)
+            query_window_coords.extend(backbone_coords)
+            base_coords = get_base_coords_from_residue(residue)
+            query_window_coords.extend(base_coords)
+    query_window_coords = np.asarray(query_window_coords)
+    # Transform query window to align with reference window
+    # Proper alignment: translate to origin, rotate, translate to reference position
+    # Note: We need both query_com and ref_com for proper alignment
+    transformed_query_pdb = transform_pdb_string(
+        query_window_pdb,
+        rotation_matrix,
+        query_com,
+        ref_com  # Add reference COM for proper alignment
+    )
+    # Create py3Dmol visualization
+    html = f"""
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
+        <style>
+            #container {{
+                width: 100%;
+                height: 700px;
+                position: relative;
+                border: 1px solid #ddd;
+            }}
+            .control-panel {{
+                position: absolute;
+                top: 10px;
+                right: 10px;
+                background: rgba(255, 255, 255, 0.95);
+                padding: 15px;
+                border-radius: 8px;
+                font-family: Arial, sans-serif;
+                font-size: 13px;
+                z-index: 1000;
+                box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+                max-width: 220px;
+            }}
+            .control-panel h4 {{
+                margin: 0 0 10px 0;
+                font-size: 14px;
+                color: #333;
+            }}
+            .control-section {{
+                margin-bottom: 12px;
+                padding-bottom: 12px;
+                border-bottom: 1px solid #eee;
+            }}
+            .control-section:last-child {{
+                border-bottom: none;
+                margin-bottom: 0;
+            }}
+            .control-section label {{
+                display: block;
+                margin: 6px 0;
+                cursor: pointer;
+            }}
+            .control-section input[type="checkbox"] {{
+                margin-right: 8px;
+            }}
+            .control-section select {{
+                width: 100%;
+                padding: 4px;
+                margin-top: 5px;
+                border: 1px solid #ccc;
+                border-radius: 4px;
+            }}
+            .legend {{
+                position: absolute;
+                top: 10px;
+                left: 10px;
+                background: rgba(255, 255, 255, 0.95);
+                padding: 15px;
+                border-radius: 8px;
+                font-family: Arial, sans-serif;
+                font-size: 13px;
+                z-index: 1000;
+                box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+            }}
+            .legend h4 {{
+                margin: 0 0 10px 0;
+                font-size: 14px;
+                color: #333;
+            }}
+            .legend-item {{
+                margin: 6px 0;
+                display: flex;
+                align-items: center;
+            }}
+            .color-box {{
+                width: 24px;
+                height: 16px;
+                margin-right: 10px;
+                border: 1px solid #333;
+                border-radius: 2px;
+            }}
+            .rmsd-info {{
+                position: absolute;
+                bottom: 10px;
+                left: 10px;
+                background: rgba(255, 255, 255, 0.95);
+                padding: 10px 15px;
+                border-radius: 8px;
+                font-family: Arial, sans-serif;
+                font-size: 13px;
+                z-index: 1000;
+                box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+            }}
+            .section-title {{
+                font-weight: bold;
+                color: #555;
+                margin-bottom: 5px;
+                font-size: 12px;
+                text-transform: uppercase;
+            }}
+        </style>
+    </head>
+    <body>
+        <div id="container"></div>
+        <div class="legend">
+            <h4>🧬 Structures</h4>
+            <div class="legend-item">
+                <div class="color-box" style="background: #4A90E2;"></div>
+                <span>Reference</span>
+            </div>
+            <div class="legend-item">
+                <div class="color-box" style="background: #E94B3C;"></div>
+                <span>Query (Aligned)</span>
+            </div>
+        </div>
+        <div class="control-panel">
+            <h4>⚙️ Display Options</h4>
+            <div class="control-section">
+                <div class="section-title">Structures</div>
+                <label>
+                    <input type="checkbox" id="showRef" checked onchange="updateDisplay()">
+                    Reference
+                </label>
+                <label>
+                    <input type="checkbox" id="showQuery" checked onchange="updateDisplay()">
+                    Query
+                </label>
+            </div>
+            <div class="control-section">
+                <div class="section-title">Style</div>
+                <select id="styleMode" onchange="updateDisplay()">
+                    <option value="sticks">Sticks</option>
+                    <option value="cartoon">Cartoon</option>
+                    <option value="spheres">Spheres</option>
+                    <option value="lines">Lines</option>
+                    <option value="cartoon_sticks">Cartoon + Sticks</option>
+                </select>
+            </div>
+            <div class="control-section">
+                <div class="section-title">Components</div>
+                <label>
+                    <input type="checkbox" id="showBackbone" checked onchange="updateDisplay()">
+                    Backbone/Sugar
+                </label>
+                <label>
+                    <input type="checkbox" id="showBases" checked onchange="updateDisplay()">
+                    Bases
+                </label>
+            </div>
+            <div class="control-section">
+                <div class="section-title">Labels</div>
+                <label>
+                    <input type="checkbox" id="showLabels" onchange="updateDisplay()">
+                    Residue Labels
+                </label>
+                <label>
+                    <input type="checkbox" id="showNumbers" onchange="updateDisplay()">
+                    Residue Numbers
+                </label>
+                <label>
+                    <input type="checkbox" id="showAtoms" onchange="updateDisplay()">
+                    Atom Names
+                </label>
+                <select id="atomLabelMode" style="margin-top: 5px; font-size: 11px;" onchange="updateDisplay()">
+                    <option value="all">All Atoms</option>
+                    <option value="backbone">Backbone Only</option>
+                    <option value="sidechain">Bases Only</option>
+                </select>
+            </div>
+            <div class="control-section">
+                <div class="section-title">Background</div>
+                <select id="bgColor" onchange="updateBackground()">
+                    <option value="white">White</option>
+                    <option value="black">Black</option>
+                    <option value="gray">Gray</option>
+                </select>
+            </div>
+        </div>
+        <div class="rmsd-info">
+            <strong>RMSD:</strong> <span style="color: #E94B3C; font-weight: bold;">{f"{rmsd:.3f}" if rmsd is not None else "N/A"} Å</span>
+        </div>
+        <script>
+            let viewer = null;
+            let refModel = null;
+            let queryModel = null;
+            const refPDB = `{ref_window_pdb}`;
+            const queryPDB = `{transformed_query_pdb}`;
+            // RNA backbone atoms
+            const backboneAtoms = ['P', 'OP1', 'OP2', "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"];
+            function initViewer() {{
+                try {{
+                    viewer = $3Dmol.createViewer("container", {{
+                        backgroundColor: 'white'
+                    }});
+                    if (!refPDB || refPDB.length < 10) {{
+                        throw new Error("Reference PDB data is empty");
+                    }}
+                    if (!queryPDB || queryPDB.length < 10) {{
+                        throw new Error("Query PDB data is empty");
+                    }}
+                    updateDisplay();
+                    viewer.zoomTo();
+                    viewer.render();
+                }} catch (error) {{
+                    console.error("Error initializing viewer:", error);
+                    document.getElementById("container").innerHTML =
+                        '<div style="padding: 20px; color: red; text-align: center;">Error loading visualization: ' + error.message + '</div>';
+                }}
+            }}
+            function updateBackground() {{
+                const bgColor = document.getElementById('bgColor').value;
+                viewer.setBackgroundColor(bgColor);
+                viewer.render();
+            }}
+            function updateDisplay() {{
+                if (!viewer) return;
+                try {{
+                    // Clear everything
+                    viewer.removeAllModels();
+                    viewer.removeAllLabels();
+                    const showRef = document.getElementById('showRef').checked;
+                    const showQuery = document.getElementById('showQuery').checked;
+                    const showBackbone = document.getElementById('showBackbone').checked;
+                    const showBases = document.getElementById('showBases').checked;
+                    const showLabels = document.getElementById('showLabels').checked;
+                    const showNumbers = document.getElementById('showNumbers').checked;
+                    const showAtoms = document.getElementById('showAtoms').checked;
+                    const styleMode = document.getElementById('styleMode').value;
+                    // Reference structure (blue)
+                    if (showRef) {{
+                        refModel = viewer.addModel(refPDB, "pdb");
+                        applyStyle(refModel, '#4A90E2', '#5BA3F5', styleMode, showBackbone, showBases);
+                        if (showLabels || showNumbers) {{
+                            addResidueLabels(refModel, '#4A90E2', showLabels, showNumbers);
+                        }}
+                        if (showAtoms) {{
+                            addAtomLabels(refModel, '#4A90E2');
+                        }}
+                    }}
+                    // Query structure (red)
+                    if (showQuery) {{
+                        queryModel = viewer.addModel(queryPDB, "pdb");
+                        applyStyle(queryModel, '#E94B3C', '#FF6B6B', styleMode, showBackbone, showBases);
+                        if (showLabels || showNumbers) {{
+                            addResidueLabels(queryModel, '#E94B3C', showLabels, showNumbers);
+                        }}
+                        if (showAtoms) {{
+                            addAtomLabels(queryModel, '#E94B3C');
+                        }}
+                    }}
+                    viewer.zoomTo();
+                    viewer.render();
+                }} catch (error) {{
+                    console.error("Error updating display:", error);
+                }}
+            }}
+            function applyStyle(model, backboneColor, baseColor, styleMode, showBackbone, showBases) {{
+                // Clear any existing styles
+                viewer.setStyle({{model: model}}, {{}});
+                if (styleMode === 'cartoon') {{
+                    // Cartoon representation
+                    viewer.setStyle({{model: model}}, {{
+                        cartoon: {{
+                            color: backboneColor,
+                            thickness: 0.5,
+                            opacity: 0.8
+                        }}
+                    }});
+                }} else if (styleMode === 'cartoon_sticks') {{
+                    // Cartoon + sticks for bases
+                    viewer.setStyle({{model: model}}, {{
+                        cartoon: {{
+                            color: backboneColor,
+                            thickness: 0.5,
+                            opacity: 0.7
+                        }}
+                    }});
+                    if (showBases) {{
+                        viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
+                            stick: {{
+                                color: baseColor,
+                                radius: 0.15
+                            }}
+                        }});
+                    }}
+                }} else if (styleMode === 'spheres') {{
+                    // Sphere representation
+                    if (showBackbone) {{
+                        viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
+                            sphere: {{
+                                color: backboneColor,
+                                radius: 0.4
+                            }}
+                        }});
+                    }}
+                    if (showBases) {{
+                        viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
+                            sphere: {{
+                                color: baseColor,
+                                radius: 0.35
+                            }}
+                        }});
+                    }}
+                }} else if (styleMode === 'lines') {{
+                    // Line representation
+                    if (showBackbone) {{
+                        viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
+                            line: {{
+                                color: backboneColor,
+                                linewidth: 2
+                            }}
+                        }});
+                    }}
+                    if (showBases) {{
+                        viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
+                            line: {{
+                                color: baseColor,
+                                linewidth: 2
+                            }}
+                        }});
+                    }}
+                }} else {{
+                    // Stick representation (default)
+                    if (showBackbone) {{
+                        viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
+                            stick: {{
+                                color: backboneColor,
+                                radius: 0.2
+                            }},
+                            sphere: {{
+                                color: backboneColor,
+                                radius: 0.3
+                            }}
+                        }});
+                    }}
+                    if (showBases) {{
+                        viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
+                            stick: {{
+                                color: baseColor,
+                                radius: 0.15
+                            }},
+                            sphere: {{
+                                color: baseColor,
+                                radius: 0.25
+                            }}
+                        }});
+                    }}
+                }}
+            }}
+            function addResidueLabels(model, color, showLabels, showNumbers) {{
+                const atoms = viewer.selectedAtoms({{model: model}});
+                const residues = {{}};
+                // Group atoms by residue
+                atoms.forEach(atom => {{
+                    const key = atom.chain + '_' + atom.resi;
+                    if (!residues[key]) {{
+                        residues[key] = atom;
+                    }}
+                }});
+                // Add labels for each residue
+                Object.values(residues).forEach(atom => {{
+                    let labelText = '';
+                    if (showLabels && showNumbers) {{
+                        labelText = atom.resn + atom.resi;
+                    }} else if (showLabels) {{
+                        labelText = atom.resn;
+                    }} else if (showNumbers) {{
+                        labelText = atom.resi.toString();
+                    }}
+                    if (labelText) {{
+                        viewer.addLabel(labelText, {{
+                            position: atom,
+                            backgroundColor: color,
+                            backgroundOpacity: 0.7,
+                            fontColor: 'white',
+                            fontSize: 11,
+                            fontWeight: 'bold',
+                            showBackground: true,
+                            borderRadius: 3
+                        }});
+                    }}
+                }});
+            }}
+            function addAtomLabels(model, color) {{
+                const atomLabelMode = document.getElementById('atomLabelMode').value;
+                const atoms = viewer.selectedAtoms({{model: model}});
+                // Filter atoms based on mode
+                let filteredAtoms = atoms;
+                if (atomLabelMode === 'backbone') {{
+                    // Only backbone atoms
+                    filteredAtoms = atoms.filter(atom => backboneAtoms.includes(atom.atom));
+                }} else if (atomLabelMode === 'sidechain') {{
+                    // Only base/sidechain atoms (not backbone)
+                    filteredAtoms = atoms.filter(atom => !backboneAtoms.includes(atom.atom));
+                }}
+                // 'all' mode uses all atoms (no filtering)
+                // Add label for each atom
+                filteredAtoms.forEach(atom => {{
+                    // Use atom name (e.g., P, C1', N1, O4, etc.)
+                    const atomName = atom.atom;
+                    viewer.addLabel(atomName, {{
+                        position: atom,
+                        backgroundColor: color,
+                        backgroundOpacity: 0.6,
+                        fontColor: 'white',
+                        fontSize: 9,
+                        fontWeight: 'normal',
+                        showBackground: true,
+                        borderRadius: 2,
+                        borderThickness: 0.5
+                    }});
+                }});
+            }}
+            // Initialize on load
+            initViewer();
+        </script>
+    </body>
+    </html>
+    """
+    return html
+def extract_window_pdb(pdb_path, window_indices):
+    """
+    Extract specific residues from a PDB file based on window indices.
+    Args:
+        pdb_path: Path to PDB file
+        window_indices: List of residue indices (0-based)
+    Returns:
+        String containing PDB data for only the specified residues
+    """
+    with open(pdb_path) as f:
+        lines = f.readlines()
+    # Get all residue numbers from the file
+    residues = parse_residue_atoms(pdb_path)
+    if not residues:
+        # If parsing failed, return original file
+        return ''.join(lines)
+    residue_numbers = [res['resnum'] for res in residues]
+    # Map window indices to actual residue numbers
+    target_resnums = set()
+    for idx in window_indices:
+        if idx < len(residue_numbers):
+            target_resnums.add(residue_numbers[idx])
+    if not target_resnums:
+        # If no valid residues, return original file
+        return ''.join(lines)
+    # Extract lines for these residues
+    window_lines = []
+    for line in lines:
+        if len(line) < 6:
+            continue
+        record = line[0:6].strip()
+        if record in ['ATOM', 'HETATM', 'HETAT']:
+            try:
+                # Handle different PDB formats
+                resnum_str = line[22:26].strip()
+                if resnum_str:
+                    resnum = int(resnum_str)
+                    if resnum in target_resnums:
+                        window_lines.append(line)
+            except (ValueError, IndexError):
+                continue
+        elif record in ['HEADER', 'TITLE', 'MODEL', 'ENDMDL']:
+            window_lines.append(line)
+    # Always add END record
+    if window_lines and not any('END' in line for line in window_lines):
+        window_lines.append('END\n')
+    result = ''.join(window_lines)
+    # Debug: print info about extraction
+    if not result or len(result) < 50:
+        print(f"Warning: Empty or very small PDB extracted from {pdb_path}")
+        print(f"  Window indices: {window_indices}")
+        print(f"  Target residue numbers: {target_resnums}")
+        print(f"  Result length: {len(result)}")
+        # Return full structure if extraction failed
+        return ''.join(lines)
+    return result
+def transform_pdb_string(pdb_string, rotation_matrix, query_com, ref_com=None):
+    """
+    Apply rotation and translation to coordinates in a PDB string to align with reference.
+    The transformation aligns the query structure to the reference structure:
+    1. Translate query to origin (subtract query_com)
+    2. Apply rotation matrix
+    3. Translate to reference position (add ref_com)
+    Args:
+        pdb_string: PDB format string
+        rotation_matrix: 3x3 rotation matrix
+        query_com: Center of mass of query structure (to translate FROM)
+        ref_com: Center of mass of reference structure (to translate TO), optional
+    Returns:
+        Transformed PDB string with aligned coordinates
+    """
+    lines = pdb_string.split('\n')
+    transformed_lines = []
+    # If ref_com not provided, just center at origin after rotation
+    if ref_com is None:
+        ref_com = np.array([0.0, 0.0, 0.0])
+    for line in lines:
+        if len(line) < 54:
+            transformed_lines.append(line)
+            continue
+        record = line[0:6].strip()
+        if record in ['ATOM', 'HETATM', 'HETAT']:
+            # Extract coordinates
+            try:
+                x = float(line[30:38].strip())
+                y = float(line[38:46].strip())
+                z = float(line[46:54].strip())
+                # Transform: (coord - query_com) @ rotation_matrix + ref_com
+                # This aligns query to reference coordinate system
+                coord = np.array([x, y, z])
+                centered = coord - query_com  # Move query to origin
+                rotated = np.dot(centered, rotation_matrix)  # Rotate
+                new_coord = rotated + ref_com  # Move to reference position
+                # Write transformed line
+                new_line = (
+                    line[:30] +
+                    f"{new_coord[0]:8.3f}" +
+                    f"{new_coord[1]:8.3f}" +
+                    f"{new_coord[2]:8.3f}" +
+                    line[54:]
+                )
+                transformed_lines.append(new_line)
+            except (ValueError, IndexError):
+                transformed_lines.append(line)
+        else:
+            transformed_lines.append(line)
+    return '\n'.join(transformed_lines)