Spaces:
Sleeping
Sleeping
| """ | |
| RNA Motif Structure Comparison Tool | |
| Streamlit app for comparing RNA motif structures with flexible residue selection | |
| """ | |
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| from pathlib import Path | |
| import io | |
| import tempfile | |
| import os | |
| # Import our RMSD calculation functions | |
| from rmsd_utils import ( | |
| parse_residue_atoms, | |
| get_backbone_sugar_and_selectbase_coords_fixed, | |
| calculate_COM, | |
| calculate_rotation_rmsd, | |
| translate_rotate_coords | |
| ) | |
| from visualization import create_structure_visualization | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="RNA Motif Structure Comparison", | |
| page_icon="π§¬", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 2.5rem; | |
| font-weight: bold; | |
| color: #1f77b4; | |
| margin-bottom: 1rem; | |
| } | |
| .sub-header { | |
| font-size: 1.2rem; | |
| color: #666; | |
| margin-bottom: 2rem; | |
| } | |
| .metric-box { | |
| background-color: #f0f2f6; | |
| padding: 1rem; | |
| border-radius: 0.5rem; | |
| margin: 0.5rem 0; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def save_uploaded_file(uploaded_file, directory): | |
| """Save an uploaded file to a temporary directory""" | |
| file_path = os.path.join(directory, uploaded_file.name) | |
| with open(file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| return file_path | |
| def get_structure_info(pdb_path): | |
| """ | |
| Get information about a structure's residues. | |
| Args: | |
| pdb_path: Path to PDB file | |
| Returns: | |
| List of dicts with residue info: [{index, resnum, resname, full_name}, ...] | |
| """ | |
| residues = parse_residue_atoms(pdb_path) | |
| structure_info = [] | |
| for idx, res in enumerate(residues): | |
| structure_info.append({ | |
| 'index': idx, | |
| 'resnum': res['resnum'], | |
| 'resname': res['resname'], | |
| 'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})" | |
| }) | |
| return structure_info | |
| def display_structure_selector(files, temp_dir, set_name): | |
| """ | |
| Display structure information and allow users to select residues. | |
| Args: | |
| files: List of uploaded files | |
| temp_dir: Temporary directory containing files | |
| set_name: Name of the set (e.g., "Reference" or "Query") | |
| Returns: | |
| Dict mapping filename to list of selected residue indices | |
| """ | |
| if not files: | |
| return {} | |
| st.subheader(f"π {set_name} Structure Preview & Selection") | |
| selections = {} | |
| for file in files: | |
| file_path = os.path.join(temp_dir, file.name) | |
| structure_info = get_structure_info(file_path) | |
| with st.expander(f"π {file.name} ({len(structure_info)} residues)"): | |
| # Display residue table | |
| info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']] | |
| info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type'] | |
| info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1 | |
| info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']] | |
| st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38)) | |
| # Selection method | |
| selection_method = st.radio( | |
| f"Selection method for {file.name}", | |
| ["Select by range", "Select specific residues", "Use all residues"], | |
| key=f"method_{set_name}_{file.name}", | |
| horizontal=True | |
| ) | |
| selected_indices = [] | |
| if selection_method == "Select by range": | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| start_idx = st.number_input( | |
| "Start index (1-based)", | |
| min_value=1, | |
| max_value=len(structure_info), | |
| value=1, | |
| key=f"start_{set_name}_{file.name}" | |
| ) | |
| with col2: | |
| end_idx = st.number_input( | |
| "End index (1-based, inclusive)", | |
| min_value=1, | |
| max_value=len(structure_info), | |
| value=min(4, len(structure_info)), | |
| key=f"end_{set_name}_{file.name}" | |
| ) | |
| if start_idx <= end_idx: | |
| selected_indices = list(range(start_idx - 1, end_idx)) | |
| st.info(f"β Selected residues: {[i+1 for i in selected_indices]}") | |
| else: | |
| st.error("Start index must be β€ end index") | |
| elif selection_method == "Select specific residues": | |
| # Multi-select for specific residues | |
| selected_names = st.multiselect( | |
| "Select residues", | |
| options=[info['full_name'] for info in structure_info], | |
| default=[structure_info[i]['full_name'] for i in range(min(4, len(structure_info)))], | |
| key=f"specific_{set_name}_{file.name}" | |
| ) | |
| # Map back to indices | |
| name_to_idx = {info['full_name']: info['index'] for info in structure_info} | |
| selected_indices = [name_to_idx[name] for name in selected_names] | |
| selected_indices.sort() | |
| if selected_indices: | |
| st.info(f"β Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}") | |
| else: # Use all residues | |
| selected_indices = list(range(len(structure_info))) | |
| st.info(f"β Using all {len(selected_indices)} residues") | |
| # Show selected residues details | |
| if selected_indices: | |
| selected_df = info_df[info_df['Index (0-based)'].isin(selected_indices)] | |
| st.markdown("**Selected residues:**") | |
| st.dataframe(selected_df, use_container_width=True) | |
| selections[file.name] = selected_indices | |
| return selections | |
| def save_uploaded_file(uploaded_file, directory): | |
| """Save an uploaded file to a temporary directory""" | |
| file_path = os.path.join(directory, uploaded_file.name) | |
| with open(file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| return file_path | |
| def extract_window_coords(residues, window_indices): | |
| """ | |
| Extract coordinates for a specific window of residues. | |
| Args: | |
| residues: List of all residues | |
| window_indices: List of indices to extract | |
| Returns: | |
| numpy array of coordinates | |
| """ | |
| from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue | |
| all_coords = [] | |
| for idx in window_indices: | |
| if idx < len(residues): | |
| residue = residues[idx] | |
| # Get backbone and sugar coordinates | |
| backbone_coords = get_backbone_sugar_coords_from_residue(residue) | |
| all_coords.extend(backbone_coords) | |
| # Get base coordinates | |
| base_coords = get_base_coords_from_residue(residue) | |
| all_coords.extend(base_coords) | |
| return np.asarray(all_coords) | |
| def compare_structures_with_selection(reference_files, query_files, ref_selections, query_selections, temp_dir): | |
| """ | |
| Compare reference and query structures using user-selected residues (direct comparison). | |
| Only compares structures with matching selection sizes. | |
| Args: | |
| reference_files: List of reference motif files | |
| query_files: List of query motif files | |
| ref_selections: Dict mapping filename to selected residue indices | |
| query_selections: Dict mapping filename to selected residue indices | |
| temp_dir: Temporary directory containing files | |
| Returns: | |
| DataFrame with comparison results | |
| """ | |
| results = [] | |
| # Count valid comparisons | |
| total_comparisons = 0 | |
| for ref_file in reference_files: | |
| ref_indices = ref_selections.get(ref_file.name, []) | |
| if len(ref_indices) < 2: | |
| continue | |
| for query_file in query_files: | |
| query_indices = query_selections.get(query_file.name, []) | |
| if len(query_indices) < 2: | |
| continue | |
| # Only compare if they have the same number of selected residues | |
| if len(ref_indices) == len(query_indices): | |
| total_comparisons += 1 | |
| if total_comparisons == 0: | |
| st.error("No valid comparisons found. Ensure selected regions have matching sizes.") | |
| return pd.DataFrame() | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| comparison_count = 0 | |
| for ref_file in reference_files: | |
| ref_name = ref_file.name | |
| ref_path = os.path.join(temp_dir, ref_name) | |
| ref_indices = ref_selections.get(ref_name, []) | |
| if len(ref_indices) < 2: | |
| continue | |
| # Parse reference motif | |
| ref_residues = parse_residue_atoms(ref_path) | |
| # Extract coordinates for selected residues | |
| ref_coords = extract_window_coords(ref_residues, ref_indices) | |
| ref_com = calculate_COM(ref_coords) | |
| # Get residue description | |
| ref_residue_desc = f"[{','.join([str(i+1) for i in ref_indices])}]" | |
| ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_indices if i < len(ref_residues)]) | |
| for query_file in query_files: | |
| query_name = query_file.name | |
| query_path = os.path.join(temp_dir, query_name) | |
| query_indices = query_selections.get(query_name, []) | |
| if len(query_indices) < 2: | |
| continue | |
| # Only compare if same number of residues | |
| if len(ref_indices) != len(query_indices): | |
| continue | |
| # Parse query motif | |
| query_residues = parse_residue_atoms(query_path) | |
| # Extract coordinates for selected residues | |
| query_coords = extract_window_coords(query_residues, query_indices) | |
| query_com = calculate_COM(query_coords) | |
| # Get residue description | |
| query_residue_desc = f"[{','.join([str(i+1) for i in query_indices])}]" | |
| query_sequence = ''.join([query_residues[i]['resname'] for i in query_indices if i < len(query_residues)]) | |
| # Calculate RMSD | |
| U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com) | |
| if U is None or RMSD is None: | |
| RMSD = 999.0 | |
| U = np.eye(3) | |
| # Store results | |
| results.append({ | |
| 'Reference': ref_name, | |
| 'Ref_Residues': ref_residue_desc, | |
| 'Ref_Sequence': ref_sequence, | |
| 'Ref_Indices': ref_indices, | |
| 'Query': query_name, | |
| 'Query_Residues': query_residue_desc, | |
| 'Query_Sequence': query_sequence, | |
| 'Query_Indices': query_indices, | |
| 'Num_Residues': len(ref_indices), | |
| 'RMSD': RMSD, | |
| 'Rotation_Matrix': U, | |
| 'Ref_COM': ref_com, | |
| 'Query_COM': query_com, | |
| 'Ref_Path': ref_path, | |
| 'Query_Path': query_path | |
| }) | |
| comparison_count += 1 | |
| progress = comparison_count / total_comparisons | |
| progress_bar.progress(progress) | |
| status_text.text(f"Processing: {ref_name}{ref_residue_desc} vs {query_name}{query_residue_desc}") | |
| progress_bar.empty() | |
| status_text.empty() | |
| return pd.DataFrame(results) | |
| def compare_structures_with_windows(reference_files, query_files, ref_selections, query_selections, | |
| window_size, window_type, temp_dir): | |
| """ | |
| Compare reference and query structures using sliding windows on selected residues. | |
| Allows comparison of different-sized selections. | |
| Args: | |
| reference_files: List of reference motif files | |
| query_files: List of query motif files | |
| ref_selections: Dict mapping filename to selected residue indices | |
| query_selections: Dict mapping filename to selected residue indices | |
| window_size: Size of comparison window | |
| window_type: "contiguous" or "non-contiguous" | |
| temp_dir: Temporary directory containing files | |
| Returns: | |
| DataFrame with comparison results | |
| """ | |
| from itertools import combinations | |
| results = [] | |
| def generate_windows_from_selection(selected_indices, win_size, win_type): | |
| """Generate windows from selected indices""" | |
| if len(selected_indices) < win_size: | |
| return [] | |
| if win_type == "contiguous": | |
| windows = [] | |
| for i in range(len(selected_indices) - win_size + 1): | |
| windows.append(selected_indices[i:i + win_size]) | |
| return windows | |
| else: # non-contiguous | |
| return [list(combo) for combo in combinations(selected_indices, win_size)] | |
| # Count total comparisons | |
| total_comparisons = 0 | |
| for ref_file in reference_files: | |
| ref_indices = ref_selections.get(ref_file.name, []) | |
| ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type) | |
| if not ref_windows: | |
| continue | |
| for query_file in query_files: | |
| query_indices = query_selections.get(query_file.name, []) | |
| query_windows = generate_windows_from_selection(query_indices, window_size, window_type) | |
| if not query_windows: | |
| continue | |
| total_comparisons += len(ref_windows) * len(query_windows) | |
| if total_comparisons == 0: | |
| st.error(f"No valid comparisons found. Ensure selected regions have at least {window_size} residues.") | |
| return pd.DataFrame() | |
| progress_bar = st.progress(0) | |
| status_text = st.empty() | |
| comparison_count = 0 | |
| for ref_file in reference_files: | |
| ref_name = ref_file.name | |
| ref_path = os.path.join(temp_dir, ref_name) | |
| ref_indices = ref_selections.get(ref_name, []) | |
| # Generate windows from selected residues | |
| ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type) | |
| if not ref_windows: | |
| st.warning(f"Skipping {ref_name}: selected {len(ref_indices)} residues, need at least {window_size}") | |
| continue | |
| # Parse reference motif | |
| ref_residues = parse_residue_atoms(ref_path) | |
| for ref_window in ref_windows: | |
| # Extract coordinates for this window | |
| ref_coords = extract_window_coords(ref_residues, ref_window) | |
| ref_com = calculate_COM(ref_coords) | |
| # Get descriptions | |
| ref_window_desc = f"[{','.join([str(i+1) for i in ref_window])}]" | |
| ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_window if i < len(ref_residues)]) | |
| for query_file in query_files: | |
| query_name = query_file.name | |
| query_path = os.path.join(temp_dir, query_name) | |
| query_indices = query_selections.get(query_name, []) | |
| # Generate windows from selected residues | |
| query_windows = generate_windows_from_selection(query_indices, window_size, window_type) | |
| if not query_windows: | |
| continue | |
| # Parse query motif | |
| query_residues = parse_residue_atoms(query_path) | |
| for query_window in query_windows: | |
| # Extract coordinates for this window | |
| query_coords = extract_window_coords(query_residues, query_window) | |
| query_com = calculate_COM(query_coords) | |
| # Get descriptions | |
| query_window_desc = f"[{','.join([str(i+1) for i in query_window])}]" | |
| query_sequence = ''.join([query_residues[i]['resname'] for i in query_window if i < len(query_residues)]) | |
| # Calculate RMSD | |
| U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com) | |
| if U is None or RMSD is None: | |
| RMSD = 999.0 | |
| U = np.eye(3) | |
| # Store results | |
| results.append({ | |
| 'Reference': ref_name, | |
| 'Ref_Residues': ref_window_desc, | |
| 'Ref_Sequence': ref_sequence, | |
| 'Ref_Indices': ref_window, | |
| 'Query': query_name, | |
| 'Query_Residues': query_window_desc, | |
| 'Query_Sequence': query_sequence, | |
| 'Query_Indices': query_window, | |
| 'Num_Residues': window_size, | |
| 'RMSD': RMSD, | |
| 'Rotation_Matrix': U, | |
| 'Ref_COM': ref_com, | |
| 'Query_COM': query_com, | |
| 'Ref_Path': ref_path, | |
| 'Query_Path': query_path | |
| }) | |
| comparison_count += 1 | |
| progress = comparison_count / total_comparisons | |
| progress_bar.progress(progress) | |
| status_text.text(f"Processing: {ref_name}{ref_window_desc} vs {query_name}{query_window_desc}") | |
| progress_bar.empty() | |
| status_text.empty() | |
| return pd.DataFrame(results) | |
| def main(): | |
| # Header | |
| st.markdown('<p class="main-header">𧬠RNA Motif Structure Comparison</p>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">Compare RNA motifs with flexible residue selection</p>', unsafe_allow_html=True) | |
| # Sidebar | |
| st.sidebar.header("βοΈ Configuration") | |
| # File upload | |
| st.sidebar.subheader("1οΈβ£ Upload Structures") | |
| reference_files = st.sidebar.file_uploader( | |
| "Upload Reference Motif PDB files (Set A)", | |
| type=['pdb', 'PDB'], | |
| accept_multiple_files=True, | |
| key="reference", | |
| help="Upload RNA motif structures to use as reference" | |
| ) | |
| query_files = st.sidebar.file_uploader( | |
| "Upload Query Motif PDB files (Set B)", | |
| type=['pdb', 'PDB'], | |
| accept_multiple_files=True, | |
| key="query", | |
| help="Upload RNA motif structures to compare against reference" | |
| ) | |
| # Main content area | |
| if not reference_files or not query_files: | |
| st.info("π Please upload reference and query motif PDB files to begin analysis") | |
| # Show example info | |
| with st.expander("βΉοΈ About this tool"): | |
| st.markdown(""" | |
| ### Purpose | |
| This tool compares the 3D structures of RNA motifs with **flexible residue selection** and **multiple comparison modes**. | |
| ### Workflow | |
| 1. **Upload PDB files** for reference and query motifs | |
| 2. **Preview structures** and see all residues in each file | |
| 3. **Select residues** to include in comparison (e.g., exclude stem bases, keep only loop) | |
| 4. **Choose comparison mode**: | |
| - **Direct comparison**: Compare selected regions directly (must be same size) | |
| - **Window-based comparison**: Generate windows from selections (handles different sizes) | |
| 5. **Run analysis** using RMSD-based structural alignment | |
| ### Comparison Modes | |
| #### Direct Comparison (Same Size) | |
| - Compares your exact selections | |
| - Example: You select 4 loop residues from each structure | |
| - Result: Direct 4-residue vs 4-residue comparison | |
| - Best for: When all structures have same-sized regions of interest | |
| #### Window-Based Comparison (Different Sizes) | |
| - Generates sliding windows from your selections | |
| - Example: You select 4 loop residues from ref, 6 loop residues from query | |
| - Set window size to 4 | |
| - Result: Ref's 4 residues compared against all 4-residue windows from query's 6 | |
| - Best for: When structures have different-sized regions but you want to find similar sub-regions | |
| ### Selection Methods | |
| - **By range**: Select consecutive residues (e.g., residues 3-6 for a tetraloop) | |
| - **Specific residues**: Pick any combination of residues (e.g., 1,3,5,7) | |
| - **All residues**: Use the entire structure | |
| ### Method Details | |
| - RMSD calculated using backbone, sugar, and select base atoms | |
| - Base atoms mapped: purines (N9,C8,C4) β pyrimidines (N1,C2,C6) | |
| - Kabsch algorithm for optimal structural alignment | |
| ### Example Use Cases | |
| **Case 1: Extract loops from 2+4+2 structures (Direct)** | |
| - All structures have 8 residues (2 stem + 4 loop + 2 stem) | |
| - Select residues 3-6 for all structures (the 4-residue loop) | |
| - Use "Direct comparison" | |
| - Result: Compare loop vs loop directly | |
| **Case 2: Compare 4-mer loop vs 6-mer loop (Window-based)** | |
| - Structure A: Select residues 3-6 (4 loop residues) | |
| - Structure B: Select residues 2-7 (6 loop residues) | |
| - Use "Window-based comparison" with window size = 4 | |
| - Result: Structure A compared against 3 windows from Structure B | |
| **Case 3: Find similar regions in different structures (Window-based)** | |
| - Reference: Select 5 residues of interest | |
| - Query: Select 10 residues from larger region | |
| - Use "Window-based comparison" with window size = 5 | |
| - Result: Find which 5-residue window in query best matches reference | |
| ### Output | |
| - RMSD values for all comparisons | |
| - Interactive 3D visualization of aligned structures | |
| - Rotation and translation matrices | |
| - Sequence information for compared regions | |
| """) | |
| return | |
| # Create temporary directory for file processing | |
| temp_dir = tempfile.mkdtemp() | |
| # Save uploaded files | |
| for file in reference_files: | |
| save_uploaded_file(file, temp_dir) | |
| for file in query_files: | |
| save_uploaded_file(file, temp_dir) | |
| # Display file info | |
| st.markdown("---") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Reference Motifs", len(reference_files)) | |
| with col2: | |
| st.metric("Query Motifs", len(query_files)) | |
| # Structure preview and selection | |
| st.markdown("---") | |
| # Get residue selections for reference and query sets | |
| ref_selections = display_structure_selector(reference_files, temp_dir, "Reference") | |
| st.markdown("---") | |
| query_selections = display_structure_selector(query_files, temp_dir, "Query") | |
| # Validate selections | |
| st.markdown("---") | |
| valid_selections = True | |
| min_residues = 2 | |
| for filename, indices in ref_selections.items(): | |
| if len(indices) < min_residues: | |
| st.error(f"β {filename}: Need at least {min_residues} residues selected, got {len(indices)}") | |
| valid_selections = False | |
| for filename, indices in query_selections.items(): | |
| if len(indices) < min_residues: | |
| st.error(f"β {filename}: Need at least {min_residues} residues selected, got {len(indices)}") | |
| valid_selections = False | |
| # Check if all selections have the same number of residues | |
| ref_lengths = set(len(indices) for indices in ref_selections.values()) | |
| query_lengths = set(len(indices) for indices in query_selections.values()) | |
| all_lengths = ref_lengths.union(query_lengths) | |
| if len(all_lengths) > 1: | |
| st.warning(f"β οΈ Selected regions have different sizes: {sorted(all_lengths)} residues. Only structures with matching sizes will be compared.") | |
| # Run analysis button | |
| st.sidebar.markdown("---") | |
| st.sidebar.subheader("2οΈβ£ Comparison Method") | |
| comparison_mode = st.sidebar.radio( | |
| "How to compare structures?", | |
| ["Direct comparison (same size)", "Window-based comparison (different sizes)"], | |
| help=""" | |
| Direct: Compare selected regions directly (must have same size) | |
| Window-based: Generate sliding windows for flexible comparison | |
| """ | |
| ) | |
| window_size = None | |
| window_type = None | |
| if comparison_mode == "Window-based comparison (different sizes)": | |
| st.sidebar.markdown("**Window Configuration**") | |
| window_size = st.sidebar.number_input( | |
| "Window Size", | |
| min_value=2, | |
| max_value=20, | |
| value=4, | |
| step=1, | |
| help="Number of residues per comparison window" | |
| ) | |
| window_type = st.sidebar.radio( | |
| "Window Type", | |
| ["contiguous", "non-contiguous"], | |
| help="Contiguous: sliding windows. Non-contiguous: all combinations" | |
| ) | |
| st.sidebar.markdown("---") | |
| st.sidebar.subheader("3οΈβ£ Run Analysis") | |
| if st.sidebar.button("π Run Analysis", type="primary", disabled=not valid_selections): | |
| if not valid_selections: | |
| st.error("Please fix selection errors before running analysis") | |
| return | |
| with st.spinner("Analyzing structures..."): | |
| if comparison_mode == "Direct comparison (same size)": | |
| results_df = compare_structures_with_selection( | |
| reference_files, | |
| query_files, | |
| ref_selections, | |
| query_selections, | |
| temp_dir | |
| ) | |
| else: # Window-based comparison | |
| results_df = compare_structures_with_windows( | |
| reference_files, | |
| query_files, | |
| ref_selections, | |
| query_selections, | |
| window_size, | |
| window_type, | |
| temp_dir | |
| ) | |
| # Store results in session state | |
| st.session_state['results_df'] = results_df | |
| st.session_state['ref_selections'] = ref_selections | |
| st.session_state['query_selections'] = query_selections | |
| st.session_state['comparison_mode'] = comparison_mode | |
| if len(results_df) > 0: | |
| st.success(f"β Analysis complete! {len(results_df)} comparisons performed.") | |
| else: | |
| st.warning("β οΈ No comparisons could be performed. Check that structures meet comparison requirements.") | |
| # Display results if available | |
| if 'results_df' in st.session_state and len(st.session_state['results_df']) > 0: | |
| results_df = st.session_state['results_df'] | |
| # Add RMSD threshold filter | |
| st.sidebar.markdown("---") | |
| st.sidebar.subheader("4οΈβ£ Filter Results") | |
| rmsd_threshold = st.sidebar.slider( | |
| "RMSD Threshold (Γ )", | |
| min_value=0.0, | |
| max_value=5.0, | |
| value=2.0, | |
| step=0.1, | |
| help="Only show results below this RMSD value" | |
| ) | |
| # Show comparison mode | |
| if 'comparison_mode' in st.session_state: | |
| mode_display = "Direct" if "Direct" in st.session_state['comparison_mode'] else "Window-based" | |
| st.sidebar.info(f"**Mode**: {mode_display}") | |
| # Filter by threshold | |
| filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold].copy() | |
| # Summary statistics | |
| st.markdown("---") | |
| st.subheader("π Summary Statistics") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric("Total Comparisons", len(results_df)) | |
| with col2: | |
| st.metric("Below Threshold", len(filtered_df)) | |
| with col3: | |
| st.metric("Best RMSD", f"{results_df['RMSD'].min():.3f} Γ ") | |
| with col4: | |
| st.metric("Mean RMSD", f"{results_df['RMSD'].mean():.3f} Γ ") | |
| # Results table | |
| st.markdown("---") | |
| st.subheader("π Comparison Results") | |
| # Prepare display dataframe | |
| display_df = filtered_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']].copy() | |
| display_df = display_df.sort_values('RMSD').reset_index(drop=True) | |
| display_df['RMSD'] = display_df['RMSD'].round(3) | |
| # Display with selection | |
| st.dataframe( | |
| display_df, | |
| use_container_width=True, | |
| height=300 | |
| ) | |
| # Structure selection for visualization | |
| st.markdown("---") | |
| st.subheader("π¬ 3D Structure Visualization") | |
| if len(filtered_df) > 0: | |
| # Select a comparison to visualize | |
| selected_idx = st.selectbox( | |
| "Select a comparison to visualize:", | |
| range(len(filtered_df)), | |
| format_func=lambda i: f"{filtered_df.iloc[i]['Reference']}{filtered_df.iloc[i]['Ref_Residues']} ({filtered_df.iloc[i]['Ref_Sequence']}) vs {filtered_df.iloc[i]['Query']}{filtered_df.iloc[i]['Query_Residues']} ({filtered_df.iloc[i]['Query_Sequence']}) | RMSD: {filtered_df.iloc[i]['RMSD']:.3f} Γ " | |
| ) | |
| selected_row = filtered_df.iloc[selected_idx] | |
| # Display RMSD info | |
| st.info(f"**RMSD: {selected_row['RMSD']:.3f} Γ ** ({selected_row['Num_Residues']} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})") | |
| # Create visualization - wider display | |
| col1, col2, col3 = st.columns([0.5, 4, 0.5]) | |
| with col2: | |
| try: | |
| viz_html = create_structure_visualization( | |
| selected_row['Ref_Path'], | |
| selected_row['Query_Path'], | |
| selected_row['Ref_Indices'], | |
| selected_row['Query_Indices'], | |
| selected_row['Rotation_Matrix'], | |
| selected_row['Ref_COM'], | |
| selected_row['Query_COM'], | |
| selected_row['RMSD'] | |
| ) | |
| st.components.v1.html(viz_html, height=700, scrolling=False) | |
| except Exception as e: | |
| st.error(f"Error creating visualization: {str(e)}") | |
| # Show transformation details | |
| with st.expander("π§ Transformation Details"): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Rotation Matrix (U):**") | |
| st.dataframe( | |
| pd.DataFrame(selected_row['Rotation_Matrix']).round(4), | |
| use_container_width=True | |
| ) | |
| with col2: | |
| st.markdown("**Translation Vectors:**") | |
| st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]") | |
| st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]") | |
| # Download aligned structures | |
| with st.expander("πΎ Download Structure Files"): | |
| st.markdown("**Download extracted and aligned structures for external visualization**") | |
| from visualization import extract_window_pdb, transform_pdb_string | |
| # Extract reference window | |
| ref_pdb = extract_window_pdb( | |
| selected_row['Ref_Path'], | |
| selected_row['Ref_Indices'] | |
| ) | |
| # Extract and transform query window | |
| query_pdb = extract_window_pdb( | |
| selected_row['Query_Path'], | |
| selected_row['Query_Indices'] | |
| ) | |
| query_aligned_pdb = transform_pdb_string( | |
| query_pdb, | |
| selected_row['Rotation_Matrix'], | |
| selected_row['Query_COM'], | |
| selected_row['Ref_COM'] | |
| ) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| # Reference structure | |
| ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Indices']]))}.pdb" | |
| st.download_button( | |
| label="π₯ Reference PDB", | |
| data=ref_pdb, | |
| file_name=ref_filename, | |
| mime="chemical/x-pdb", | |
| help="Original reference structure (selected residues only)" | |
| ) | |
| with col2: | |
| # Query structure (original position) | |
| query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb" | |
| st.download_button( | |
| label="π₯ Query PDB (Original)", | |
| data=query_pdb, | |
| file_name=query_filename, | |
| mime="chemical/x-pdb", | |
| help="Original query structure (selected residues only)" | |
| ) | |
| with col3: | |
| # Query structure (aligned) | |
| query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb" | |
| st.download_button( | |
| label="π₯ Query PDB (Aligned)", | |
| data=query_aligned_pdb, | |
| file_name=query_aligned_filename, | |
| mime="chemical/x-pdb", | |
| help="Query structure aligned to reference" | |
| ) | |
| st.info("π‘ **Tip:** Load reference and aligned query together in PyMOL/Chimera to examine the superposition") | |
| else: | |
| st.warning("No comparisons below the RMSD threshold. Try increasing the threshold.") | |
| # Download results | |
| st.markdown("---") | |
| st.subheader("πΎ Export Results") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Export Results Table**") | |
| # Prepare CSV - make sure all columns exist | |
| export_columns = ['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD'] | |
| export_df = results_df[export_columns].copy() | |
| export_df = export_df.sort_values('RMSD').reset_index(drop=True) | |
| csv = export_df.to_csv(index=False) | |
| st.download_button( | |
| label="π₯ Download Results (CSV)", | |
| data=csv, | |
| file_name="rna_motif_comparison_results.csv", | |
| mime="text/csv" | |
| ) | |
| with col2: | |
| st.markdown("**Export All Aligned Structures**") | |
| if st.button("π¦ Generate PDB Archive", help="Create a ZIP file with all aligned structure pairs"): | |
| with st.spinner("Generating PDB files..."): | |
| import zipfile | |
| import io | |
| from visualization import extract_window_pdb, transform_pdb_string | |
| # Create ZIP file in memory | |
| zip_buffer = io.BytesIO() | |
| with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: | |
| # Process each comparison | |
| for idx, row in filtered_df.iterrows(): | |
| # Create a directory name for this comparison | |
| comp_name = f"comparison_{idx:03d}_rmsd_{row['RMSD']:.3f}" | |
| # Extract reference | |
| ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Indices']) | |
| ref_filename = f"{comp_name}/reference_{row['Reference'].replace('.pdb', '')}.pdb" | |
| zip_file.writestr(ref_filename, ref_pdb) | |
| # Extract query (original) | |
| query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Indices']) | |
| query_filename = f"{comp_name}/query_original_{row['Query'].replace('.pdb', '')}.pdb" | |
| zip_file.writestr(query_filename, query_pdb) | |
| # Extract and align query | |
| query_aligned_pdb = transform_pdb_string( | |
| query_pdb, | |
| row['Rotation_Matrix'], | |
| row['Query_COM'], | |
| row['Ref_COM'] | |
| ) | |
| query_aligned_filename = f"{comp_name}/query_aligned_{row['Query'].replace('.pdb', '')}.pdb" | |
| zip_file.writestr(query_aligned_filename, query_aligned_pdb) | |
| # Add a README for this comparison | |
| readme_content = f"""Comparison #{idx} | |
| RMSD: {row['RMSD']:.3f} Γ | |
| Residues Compared: {row['Num_Residues']} | |
| Reference: | |
| File: {row['Reference']} | |
| Residues: {row['Ref_Residues']} | |
| Sequence: {row['Ref_Sequence']} | |
| Query: | |
| File: {row['Query']} | |
| Residues: {row['Query_Residues']} | |
| Sequence: {row['Query_Sequence']} | |
| Files: | |
| - reference_*.pdb: Reference structure (selected residues) | |
| - query_original_*.pdb: Query structure (original position) | |
| - query_aligned_*.pdb: Query structure (aligned to reference) | |
| To visualize in PyMOL: | |
| load reference_*.pdb | |
| load query_aligned_*.pdb | |
| To visualize in Chimera: | |
| File β Open β Select both reference and query_aligned PDB files | |
| """ | |
| readme_filename = f"{comp_name}/README.txt" | |
| zip_file.writestr(readme_filename, readme_content) | |
| zip_buffer.seek(0) | |
| st.download_button( | |
| label="π₯ Download PDB Archive (ZIP)", | |
| data=zip_buffer.getvalue(), | |
| file_name="aligned_structures.zip", | |
| mime="application/zip", | |
| help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs" | |
| ) | |
| st.success(f"β Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.") | |
| if __name__ == "__main__": | |
| main() | |