Spaces:
Sleeping
Sleeping
| """ | |
| RNA Motif Multi-Structure Comparison Tool - Pairwise Mode | |
| Streamlit app for comparing multiple RNA motif structures with separate reference and query sets | |
| Uses dropdown menu for residue configuration and default Backbone + Sugar atom selection | |
| """ | |
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| from pathlib import Path | |
| import io | |
| import tempfile | |
| import os | |
| from itertools import combinations | |
| # Import our RMSD calculation functions | |
| from rmsd_utils import ( | |
| parse_residue_atoms, | |
| get_backbone_sugar_and_selectbase_coords_fixed, | |
| calculate_COM, | |
| calculate_rotation_rmsd, | |
| translate_rotate_coords, | |
| get_backbone_sugar_coords_from_residue, | |
| get_base_coords_from_residue | |
| ) | |
| # Import example data loader | |
| try: | |
| from example_data_loader import ( | |
| get_example_pdbs, | |
| load_example_as_uploaded_file, | |
| get_example_info | |
| ) | |
| EXAMPLES_AVAILABLE = True | |
| except ImportError: | |
| EXAMPLES_AVAILABLE = False | |
| st.warning("Example data loader not available. Please use 'Upload Files' mode.") | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="RNA Motif Multi-Structure Comparison - Pairwise", | |
| page_icon="π§¬", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| from image_annotator import annotate_alignment_image | |
| # Custom CSS - IMPROVED VERSION with larger fonts | |
| st.markdown(""" | |
| <style> | |
| /* ======================================== | |
| MAIN CONTENT - LARGER FONTS | |
| ======================================== */ | |
| /* Increase base font size for all main content */ | |
| .main .element-container, | |
| .main [data-testid="stMarkdownContainer"], | |
| .main [data-testid="stText"], | |
| .main p, | |
| .main span, | |
| .main div { | |
| font-size: 1.15rem !important; | |
| } | |
| /* Headers in main content */ | |
| .main h1 { | |
| font-size: 2.8rem !important; | |
| font-weight: 700 !important; | |
| } | |
| .main h2 { | |
| font-size: 2.0rem !important; | |
| font-weight: 600 !important; | |
| } | |
| .main h3 { | |
| font-size: 1.6rem !important; | |
| font-weight: 600 !important; | |
| } | |
| /* Custom header classes */ | |
| .main-header { | |
| font-size: 2.8rem !important; | |
| font-weight: bold; | |
| color: #1f77b4; | |
| margin-bottom: 1rem; | |
| } | |
| .sub-header { | |
| font-size: 1.4rem !important; | |
| color: #666; | |
| margin-bottom: 2rem; | |
| } | |
| /* Info/warning/success boxes */ | |
| .main [data-testid="stAlert"] p, | |
| .main [data-testid="stAlert"] { | |
| font-size: 1.1rem !important; | |
| } | |
| /* Dataframes and tables */ | |
| .main [data-testid="stDataFrame"], | |
| .main .dataframe, | |
| .main table { | |
| font-size: 1.05rem !important; | |
| } | |
| .main .dataframe th, | |
| .main .dataframe td { | |
| font-size: 1.05rem !important; | |
| padding: 8px !important; | |
| } | |
| /* Metrics */ | |
| .main [data-testid="stMetric"] { | |
| font-size: 1.15rem !important; | |
| } | |
| .main [data-testid="stMetricLabel"] { | |
| font-size: 1.1rem !important; | |
| } | |
| .main [data-testid="stMetricValue"] { | |
| font-size: 1.8rem !important; | |
| } | |
| /* Buttons in main content */ | |
| .main button p, | |
| .main button span { | |
| font-size: 1.05rem !important; | |
| } | |
| /* Selectbox, radio, and other inputs in main */ | |
| .main .stSelectbox label, | |
| .main .stRadio label, | |
| .main .stNumberInput label, | |
| .main .stMultiSelect label { | |
| font-size: 1.1rem !important; | |
| } | |
| .main .stSelectbox [data-baseweb="select"] div, | |
| .main .stRadio [role="radiogroup"] label, | |
| .main .stNumberInput input { | |
| font-size: 1.05rem !important; | |
| } | |
| /* Expander headers */ | |
| .main [data-testid="stExpander"] summary { | |
| font-size: 1.15rem !important; | |
| } | |
| /* Code blocks */ | |
| .main code, | |
| .main pre { | |
| font-size: 1.0rem !important; | |
| } | |
| /* ======================================== | |
| SIDEBAR - COMPACT & NORMAL FONT | |
| ======================================== */ | |
| /* Ultra-compact sidebar spacing */ | |
| section[data-testid="stSidebar"] { | |
| padding-top: 0.2rem !important; | |
| } | |
| section[data-testid="stSidebar"] > div { | |
| padding-top: 0.2rem !important; | |
| } | |
| /* Minimal margins */ | |
| section[data-testid="stSidebar"] [data-testid="stMarkdownContainer"] { | |
| margin: 0rem !important; | |
| } | |
| /* Minimal header spacing */ | |
| section[data-testid="stSidebar"] h1, | |
| section[data-testid="stSidebar"] h2, | |
| section[data-testid="stSidebar"] h3 { | |
| margin-top: 0.1rem !important; | |
| margin-bottom: 0.2rem !important; | |
| padding: 0rem !important; | |
| line-height: 1.2 !important; | |
| font-size: 1.0rem !important; | |
| } | |
| /* Tight widget spacing */ | |
| section[data-testid="stSidebar"] .stSelectbox, | |
| section[data-testid="stSidebar"] .stNumberInput, | |
| section[data-testid="stSidebar"] .stRadio, | |
| section[data-testid="stSidebar"] .stFileUploader { | |
| margin-top: 0.1rem !important; | |
| margin-bottom: 0.2rem !important; | |
| } | |
| section[data-testid="stSidebar"] .stButton { | |
| margin: 0.2rem 0 !important; | |
| } | |
| section[data-testid="stSidebar"] .element-container { | |
| margin: 0.1rem 0 !important; | |
| } | |
| section[data-testid="stSidebar"] .stAlert { | |
| padding: 0.3rem 0.5rem !important; | |
| margin: 0.1rem 0 !important; | |
| } | |
| section[data-testid="stSidebar"] label { | |
| margin-bottom: 0.1rem !important; | |
| font-size: 0.9rem !important; | |
| } | |
| section[data-testid="stSidebar"] .stCaptionContainer { | |
| margin: 0.1rem 0 !important; | |
| } | |
| section[data-testid="stSidebar"] hr { | |
| margin: 0.2rem 0 !important; | |
| } | |
| /* Sidebar font sizes - keep normal/small */ | |
| section[data-testid="stSidebar"] * { | |
| font-size: 0.9rem !important; | |
| } | |
| section[data-testid="stSidebar"] p, | |
| section[data-testid="stSidebar"] span, | |
| section[data-testid="stSidebar"] div { | |
| font-size: 0.9rem !important; | |
| } | |
| section[data-testid="stSidebar"] button { | |
| font-size: 0.9rem !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def save_uploaded_file(uploaded_file, directory): | |
| """Save an uploaded file to a temporary directory""" | |
| file_path = os.path.join(directory, uploaded_file.name) | |
| with open(file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| return file_path | |
| def get_structure_info(pdb_path): | |
| """ | |
| Get information about a structure's residues. | |
| Args: | |
| pdb_path: Path to PDB file | |
| Returns: | |
| List of dicts with residue info: [{index, resnum, resname, full_name}, ...] | |
| """ | |
| residues = parse_residue_atoms(pdb_path) | |
| structure_info = [] | |
| for idx, res in enumerate(residues): | |
| structure_info.append({ | |
| 'index': idx, | |
| 'resnum': res['resnum'], | |
| 'resname': res['resname'], | |
| 'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})" | |
| }) | |
| return structure_info | |
| def load_structure_data(uploaded_files, temp_dir): | |
| """Load structure data from uploaded files""" | |
| structure_data = [] | |
| for uploaded_file in uploaded_files: | |
| file_path = save_uploaded_file(uploaded_file, temp_dir) | |
| residues = parse_residue_atoms(file_path) | |
| structure_data.append({ | |
| 'name': uploaded_file.name, | |
| 'path': file_path, | |
| 'residues': residues, | |
| 'num_residues': len(residues) | |
| }) | |
| return structure_data | |
| def extract_window_coords(residues, window_indices): | |
| """ | |
| Extract coordinates for a specific window of residues. | |
| Args: | |
| residues: List of all residues | |
| window_indices: List of indices to extract | |
| Returns: | |
| numpy array of coordinates | |
| """ | |
| from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue | |
| all_coords = [] | |
| for idx in window_indices: | |
| if idx < len(residues): | |
| residue = residues[idx] | |
| # Get backbone and sugar coordinates | |
| backbone_coords = get_backbone_sugar_coords_from_residue(residue) | |
| all_coords.extend(backbone_coords) | |
| # Get base coordinates | |
| base_coords = get_base_coords_from_residue(residue) | |
| all_coords.extend(base_coords) | |
| return np.asarray(all_coords) | |
| def generate_windows_from_selection(selected_indices, window_size, window_type): | |
| """Generate windows from selected residue indices""" | |
| if len(selected_indices) < window_size: | |
| return [] | |
| windows = [] | |
| if len(selected_indices) == window_size: | |
| windows.append(selected_indices) | |
| return windows | |
| if window_type == "contiguous": | |
| # Only sliding windows | |
| for i in range(len(selected_indices) - window_size + 1): | |
| windows.append(selected_indices[i:i+window_size]) | |
| elif window_type == "non-contiguous": | |
| from itertools import combinations | |
| all_combos = list(combinations(selected_indices, window_size)) | |
| # Get the contiguous windows (to exclude them) | |
| contiguous_windows = [] | |
| for i in range(len(selected_indices) - window_size + 1): | |
| contiguous_windows.append(tuple(selected_indices[i:i+window_size])) | |
| # Filter: keep only combinations that are NOT in contiguous_windows | |
| for combo in all_combos: | |
| if combo not in contiguous_windows: | |
| windows.append(list(combo)) | |
| else: | |
| from itertools import combinations | |
| all_combos = list(combinations(selected_indices, window_size)) | |
| # Filter: keep only combinations that are NOT in contiguous_windows | |
| for combo in all_combos: | |
| windows.append(list(combo)) | |
| return windows | |
| def main(): | |
| st.markdown('<h1 class="main-header">𧬠RNA Motif Multi-Structure Comparison</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">Pairwise comparison: Reference structures vs Query structures</p>', unsafe_allow_html=True) | |
| # Create temporary directory | |
| if 'temp_dir' not in st.session_state: | |
| st.session_state['temp_dir'] = tempfile.mkdtemp() | |
| temp_dir = st.session_state['temp_dir'] | |
| # Initialize session state | |
| if 'data_mode' not in st.session_state: | |
| st.session_state['data_mode'] = 'upload' | |
| if 'ref_selections' not in st.session_state: | |
| st.session_state['ref_selections'] = {} | |
| if 'query_selections' not in st.session_state: | |
| st.session_state['query_selections'] = {} | |
| # Sidebar: Step 1 - Data Source Selection | |
| st.sidebar.title("βοΈ Configuration") | |
| st.sidebar.subheader("1οΈβ£ Data Source") | |
| # Check if examples are available | |
| if EXAMPLES_AVAILABLE: | |
| data_mode = st.sidebar.radio( | |
| "Choose data source", | |
| ["Upload Files", "Use Example Data"], | |
| key="data_mode_radio", | |
| help="Upload your own PDB files or use provided examples" | |
| ) | |
| else: | |
| st.sidebar.info("βΉοΈ Example data not available. Using upload mode.") | |
| data_mode = "Upload Files" | |
| # Update data mode | |
| if data_mode == "Upload Files": | |
| st.session_state['data_mode'] = 'upload' | |
| # Reset example initialization when switching to upload mode | |
| if 'example_mode_initialized' in st.session_state: | |
| del st.session_state['example_mode_initialized'] | |
| else: | |
| st.session_state['data_mode'] = 'example' | |
| # Step 2: File Upload/Selection - SEPARATE FOR REFERENCE AND QUERY | |
| st.sidebar.subheader("2οΈβ£ Structure Files") | |
| reference_files = [] | |
| query_files = [] | |
| if st.session_state['data_mode'] == 'upload': | |
| st.sidebar.markdown("**Upload Reference Structures**") | |
| ref_uploaded = st.sidebar.file_uploader( | |
| "Reference PDB files", | |
| type=['pdb'], | |
| accept_multiple_files=True, | |
| key="ref_uploader", | |
| help="Upload one or more reference structures (e.g., Pentaloop)" | |
| ) | |
| st.sidebar.markdown("**Upload Query Structures**") | |
| query_uploaded = st.sidebar.file_uploader( | |
| "Query PDB files", | |
| type=['pdb'], | |
| accept_multiple_files=True, | |
| key="query_uploader", | |
| help="Upload one or more query structures (e.g., Tetraloop)" | |
| ) | |
| reference_files = ref_uploaded if ref_uploaded else [] | |
| query_files = query_uploaded if query_uploaded else [] | |
| else: # Example data mode | |
| if not EXAMPLES_AVAILABLE: | |
| st.sidebar.error("β Example data loader module not found") | |
| reference_files = [] | |
| query_files = [] | |
| else: | |
| try: | |
| examples = get_example_pdbs() | |
| if not examples or len(examples) == 0: | |
| st.sidebar.error("β No example data available. Please add PDB files to 'data/' folder") | |
| st.sidebar.info("π‘ Create a 'data/' folder in the same directory as the app and add .pdb files") | |
| reference_files = [] | |
| query_files = [] | |
| else: | |
| example_names = sorted(list(examples.keys())) | |
| # Auto-select examples when first switching to example mode | |
| if 'example_mode_initialized' not in st.session_state: | |
| st.session_state['example_mode_initialized'] = True | |
| # Auto-select first half as reference, second half as query | |
| mid_point = max(1, len(example_names) // 2) | |
| st.session_state['auto_ref_examples'] = example_names[:mid_point] | |
| st.session_state['auto_query_examples'] = example_names[mid_point:mid_point*2] | |
| st.sidebar.markdown("**Select Reference Examples**") | |
| ref_example_names = st.sidebar.multiselect( | |
| "Reference structures", | |
| options=example_names, | |
| default=st.session_state.get('auto_ref_examples', []), | |
| key="ref_examples", | |
| help="Select example reference structures" | |
| ) | |
| if ref_example_names: | |
| st.sidebar.success(f"β {len(ref_example_names)} reference file(s) selected") | |
| st.sidebar.markdown("**Select Query Examples**") | |
| query_example_names = st.sidebar.multiselect( | |
| "Query structures", | |
| options=example_names, | |
| default=st.session_state.get('auto_query_examples', []), | |
| key="query_examples", | |
| help="Select example query structures" | |
| ) | |
| if query_example_names: | |
| st.sidebar.success(f"β {len(query_example_names)} query file(s) selected") | |
| # Convert names to paths and load files | |
| try: | |
| reference_files = [load_example_as_uploaded_file(examples[name]) for name in ref_example_names] | |
| query_files = [load_example_as_uploaded_file(examples[name]) for name in query_example_names] | |
| except Exception as load_error: | |
| st.sidebar.error(f"Error loading files: {str(load_error)}") | |
| import traceback | |
| st.sidebar.code(traceback.format_exc()) | |
| reference_files = [] | |
| query_files = [] | |
| except Exception as e: | |
| st.sidebar.error(f"β Error loading examples: {str(e)}") | |
| import traceback | |
| st.sidebar.code(traceback.format_exc()) | |
| reference_files = [] | |
| query_files = [] | |
| # Show upload status | |
| if reference_files and query_files: | |
| st.sidebar.success(f"β {len(reference_files)} reference + {len(query_files)} query structures") | |
| elif reference_files: | |
| st.sidebar.info(f"βΉοΈ {len(reference_files)} reference structures loaded") | |
| elif query_files: | |
| st.sidebar.info(f"βΉοΈ {len(query_files)} query structures loaded") | |
| else: | |
| st.sidebar.warning("β οΈ Upload or select structures") | |
| # Residue trimming controls - add early so they're available when needed | |
| st.sidebar.markdown("---") | |
| st.sidebar.markdown("**π§ 5'/3' Base Trimming (Reference) **") | |
| col1, col2 = st.sidebar.columns(2) | |
| with col1: | |
| n_term_trim_ref = st.number_input( | |
| "5' trim_ref", | |
| min_value=0, | |
| max_value=10, | |
| value=2, | |
| step=1, | |
| help="Number of bases to remove from 5' end", | |
| key="n_term_trim_ref" | |
| ) | |
| with col2: | |
| c_term_trim_ref = st.number_input( | |
| "3' trim_ref", | |
| min_value=0, | |
| max_value=10, | |
| value=2, | |
| step=1, | |
| help="Number of bases to remove from 3' end", | |
| key="c_term_trim_ref" | |
| ) | |
| # Residue trimming controls - add early so they're available when needed | |
| st.sidebar.markdown("---") | |
| st.sidebar.markdown("**π§ 5'/3' Base Trimming (Query) **") | |
| col1, col2 = st.sidebar.columns(2) | |
| with col1: | |
| n_term_trim_query = st.number_input( | |
| "5' trim_query", | |
| min_value=0, | |
| max_value=10, | |
| value=2, | |
| step=1, | |
| help="Number of bases to remove from 5' end", | |
| key="n_term_trim_query" | |
| ) | |
| with col2: | |
| c_term_trim_query = st.number_input( | |
| "3' trim_query", | |
| min_value=0, | |
| max_value=10, | |
| value=2, | |
| step=1, | |
| help="Number of bases to remove from 3' end", | |
| key="c_term_trim_query" | |
| ) | |
| # Load structure data | |
| ref_structure_data = [] | |
| query_structure_data = [] | |
| if reference_files: | |
| ref_structure_data = load_structure_data(reference_files, temp_dir) | |
| if query_files: | |
| query_structure_data = load_structure_data(query_files, temp_dir) | |
| # Track current files to reset selections if files change | |
| current_ref_files = set([s['name'] for s in ref_structure_data]) | |
| current_query_files = set([s['name'] for s in query_structure_data]) | |
| if 'current_ref_files' not in st.session_state: | |
| st.session_state['current_ref_files'] = current_ref_files | |
| if 'current_query_files' not in st.session_state: | |
| st.session_state['current_query_files'] = current_query_files | |
| # Reset selections if files changed | |
| if st.session_state['current_ref_files'] != current_ref_files: | |
| st.session_state['current_ref_files'] = current_ref_files | |
| st.session_state['ref_selections'] = {} | |
| if 'ref_auto_initialized' in st.session_state: | |
| del st.session_state['ref_auto_initialized'] | |
| if st.session_state['current_query_files'] != current_query_files: | |
| st.session_state['current_query_files'] = current_query_files | |
| st.session_state['query_selections'] = {} | |
| if 'query_auto_initialized' in st.session_state: | |
| del st.session_state['query_auto_initialized'] | |
| # Auto-initialize selections (exclude first and last residue by default) | |
| if 'ref_auto_initialized' not in st.session_state and ref_structure_data: | |
| for struct in ref_structure_data: | |
| num_res = struct['num_residues'] | |
| if num_res > n_term_trim_ref + c_term_trim_ref: | |
| auto_selection = list(range(n_term_trim_ref, num_res - c_term_trim_ref)) | |
| st.session_state['ref_selections'][struct['name']] = auto_selection | |
| else: | |
| st.session_state['ref_selections'][struct['name']] = list(range(num_res)) | |
| st.session_state['ref_auto_initialized'] = True | |
| if 'query_auto_initialized' not in st.session_state and query_structure_data: | |
| for struct in query_structure_data: | |
| num_res = struct['num_residues'] | |
| if num_res > n_term_trim_query + c_term_trim_query: | |
| auto_selection = list(range(n_term_trim_query, num_res - c_term_trim_query)) | |
| st.session_state['query_selections'][struct['name']] = auto_selection | |
| else: | |
| st.session_state['query_selections'][struct['name']] = list(range(num_res)) | |
| st.session_state['query_auto_initialized'] = True | |
| # Step 3: Configure Atom Selections in Main Area | |
| st.markdown("---") | |
| st.subheader("π¬ Configure Atom Selections") | |
| st.info(f"""βΉοΈ **Atom Selection:** Backbone + Sugar\n | |
| - For purines (A, G): N9, C8, C4\n | |
| - For pyrimidines (C, U): N1, C2, C6\n | |
| - For backbone and sugar atoms: "P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"\n | |
| """) | |
| # Create two columns for Reference and Query | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("### π Reference Structures") | |
| if ref_structure_data: | |
| selected_ref_name = st.selectbox( | |
| "Select structure to configure (excluding two bases in 5' and 3' by default)", | |
| options=[s['name'] for s in ref_structure_data], | |
| key="ref_dropdown", | |
| help="Choose a reference structure to configure its residue selection" | |
| ) | |
| selected_ref = next((s for s in ref_structure_data if s['name'] == selected_ref_name), None) | |
| if selected_ref: | |
| st.markdown(f"**{selected_ref['name']}** ({selected_ref['num_residues']} residues)") | |
| # Display residue table | |
| structure_info = get_structure_info(selected_ref['path']) | |
| info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']] | |
| info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type'] | |
| info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1 | |
| info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']] | |
| with st.expander("π View Residue Table", expanded=False): | |
| st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38)) | |
| # Selection method | |
| selection_method = st.radio( | |
| f"Selection method for {selected_ref['name']}", | |
| ["Select by range", "Select specific residues", "Use all residues"], | |
| key=f"method_ref_{selected_ref['name']}", | |
| index=1, | |
| horizontal=True | |
| ) | |
| selected_indices = [] | |
| if selection_method == "Select by range": | |
| current_selection = st.session_state['ref_selections'].get(selected_ref['name'], []) | |
| default_start = current_selection[0] + n_term_trim_ref if current_selection else n_term_trim_ref | |
| default_end = current_selection[-1] + 1 if current_selection else max(n_term_trim_ref, len(structure_info) - c_term_trim_ref) | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| start_idx = st.number_input( | |
| "Start index (1-based)", | |
| min_value=1, | |
| max_value=len(structure_info), | |
| value=default_start, | |
| key=f"start_ref_{selected_ref['name']}" | |
| ) | |
| with c2: | |
| end_idx = st.number_input( | |
| "End index (1-based, inclusive)", | |
| min_value=1, | |
| max_value=len(structure_info), | |
| value=default_end, | |
| key=f"end_ref_{selected_ref['name']}" | |
| ) | |
| if start_idx <= end_idx: | |
| selected_indices = list(range(start_idx - 1, end_idx)) | |
| st.success(f"β Selected residues: {[i+1 for i in selected_indices]}") | |
| # Auto-save the selection | |
| st.session_state['ref_selections'][selected_ref['name']] = selected_indices | |
| else: | |
| st.error("Start index must be β€ end index") | |
| elif selection_method == "Select specific residues": | |
| # Always use current trim values for default selection (updates when trim values change) | |
| default_names = [structure_info[i]['full_name'] for i in range(n_term_trim_ref, len(structure_info)-c_term_trim_ref)] | |
| selected_names = st.multiselect( | |
| "Select residues", | |
| options=[info['full_name'] for info in structure_info], | |
| default=default_names, | |
| key=f"specific_ref_{selected_ref['name']}_n{n_term_trim_ref}_c{c_term_trim_ref}" | |
| ) | |
| name_to_idx = {info['full_name']: info['index'] for info in structure_info} | |
| selected_indices = [name_to_idx[name] for name in selected_names] | |
| selected_indices.sort() | |
| if selected_indices: | |
| st.success(f"β Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}") | |
| # Auto-save the selection | |
| st.session_state['ref_selections'][selected_ref['name']] = selected_indices | |
| else: # Use all residues | |
| selected_indices = list(range(len(structure_info))) | |
| st.info(f"β Using all {len(selected_indices)} residues") | |
| # Auto-save the selection | |
| st.session_state['ref_selections'][selected_ref['name']] = selected_indices | |
| # Show current saved selection (now always up-to-date) | |
| if selected_ref['name'] in st.session_state['ref_selections']: | |
| saved_indices = st.session_state['ref_selections'][selected_ref['name']] | |
| st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}") | |
| else: | |
| st.info("Upload reference structures to configure") | |
| with col2: | |
| st.markdown("### π Query Structures") | |
| if query_structure_data: | |
| selected_query_name = st.selectbox( | |
| "Select structure to configure (excluding two bases in 5' and 3' by default)", | |
| options=[s['name'] for s in query_structure_data], | |
| key="query_dropdown", | |
| help="Choose a query structure to configure its residue selection" | |
| ) | |
| selected_query = next((s for s in query_structure_data if s['name'] == selected_query_name), None) | |
| if selected_query: | |
| st.markdown(f"**{selected_query['name']}** ({selected_query['num_residues']} residues)") | |
| # Display residue table | |
| structure_info = get_structure_info(selected_query['path']) | |
| info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']] | |
| info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type'] | |
| info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1 | |
| info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']] | |
| with st.expander("π View Residue Table", expanded=False): | |
| st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38)) | |
| # Selection method | |
| selection_method = st.radio( | |
| f"Selection method for {selected_query['name']}", | |
| ["Select by range", "Select specific residues", "Use all residues"], | |
| key=f"method_query_{selected_query['name']}", | |
| index=1, | |
| horizontal=True | |
| ) | |
| selected_indices = [] | |
| if selection_method == "Select by range": | |
| current_selection = st.session_state['query_selections'].get(selected_query['name'], []) | |
| default_start = current_selection[0] + n_term_trim_query if current_selection else 3 | |
| default_end = current_selection[-1] + 1 if current_selection else max(2, len(structure_info) - c_term_trim_query) | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| start_idx = st.number_input( | |
| "Start index (1-based)", | |
| min_value=1, | |
| max_value=len(structure_info), | |
| value=default_start, | |
| key=f"start_query_{selected_query['name']}" | |
| ) | |
| with c2: | |
| end_idx = st.number_input( | |
| "End index (1-based, inclusive)", | |
| min_value=1, | |
| max_value=len(structure_info), | |
| value=default_end, | |
| key=f"end_query_{selected_query['name']}" | |
| ) | |
| if start_idx <= end_idx: | |
| selected_indices = list(range(start_idx - 1, end_idx)) | |
| st.success(f"β Selected residues: {[i+1 for i in selected_indices]}") | |
| # Auto-save the selection | |
| st.session_state['query_selections'][selected_query['name']] = selected_indices | |
| else: | |
| st.error("Start index must be β€ end index") | |
| elif selection_method == "Select specific residues": | |
| # Always use current trim values for default selection (updates when trim values change) | |
| default_names = [structure_info[i]['full_name'] for i in range(n_term_trim_query, len(structure_info)-c_term_trim_query)] | |
| selected_names = st.multiselect( | |
| "Select residues", | |
| options=[info['full_name'] for info in structure_info], | |
| default=default_names, | |
| key=f"specific_query_{selected_query['name']}_n{n_term_trim_query}_c{c_term_trim_query}" | |
| ) | |
| name_to_idx = {info['full_name']: info['index'] for info in structure_info} | |
| selected_indices = [name_to_idx[name] for name in selected_names] | |
| selected_indices.sort() | |
| if selected_indices: | |
| st.success(f"β Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}") | |
| # Auto-save the selection | |
| st.session_state['query_selections'][selected_query['name']] = selected_indices | |
| else: # Use all residues | |
| selected_indices = list(range(len(structure_info))) | |
| st.info(f"β Using all {len(selected_indices)} residues") | |
| # Auto-save the selection | |
| st.session_state['query_selections'][selected_query['name']] = selected_indices | |
| # Show current saved selection (now always up-to-date) | |
| if selected_query['name'] in st.session_state['query_selections']: | |
| saved_indices = st.session_state['query_selections'][selected_query['name']] | |
| st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}") | |
| else: | |
| st.info("Upload query structures to configure") | |
| # Step 4: Window Configuration | |
| st.sidebar.subheader("3οΈβ£ Window Configuration") | |
| # Check if all structures have selections | |
| all_ref_have_selections = all(s['name'] in st.session_state['ref_selections'] for s in ref_structure_data) | |
| all_query_have_selections = all(s['name'] in st.session_state['query_selections'] for s in query_structure_data) | |
| if all_ref_have_selections and all_query_have_selections and ref_structure_data and query_structure_data: | |
| # Find minimum selection size | |
| all_selections = list(st.session_state['ref_selections'].values()) + list(st.session_state['query_selections'].values()) | |
| min_selection_size = min(len(sel) for sel in all_selections) | |
| window_size = st.sidebar.number_input( | |
| "Window Size", | |
| min_value=2, | |
| max_value=min_selection_size, | |
| value=min(4, min_selection_size), | |
| step=1, | |
| help="Number of residues per comparison window" | |
| ) | |
| window_type = st.sidebar.radio( | |
| "Window Type", | |
| ["contiguous", "non-contiguous", "both"], | |
| index=0, | |
| help="Contiguous: sliding windows. Non-contiguous: all combinations" | |
| ) | |
| else: | |
| st.sidebar.warning("β οΈ Configure selections first") | |
| window_size = 4 | |
| window_type = "contiguous" | |
| # Step 5: Run Analysis | |
| st.sidebar.subheader("4οΈβ£ Run Analysis") | |
| can_run = (all_ref_have_selections and all_query_have_selections and | |
| ref_structure_data and query_structure_data) | |
| if st.sidebar.button("π Run Pairwise Analysis", type="primary", disabled=not can_run): | |
| if not can_run: | |
| st.error("Please upload and configure both reference and query structures") | |
| return | |
| # Run comparisons | |
| with st.spinner("Analyzing structures..."): | |
| results = [] | |
| # For each reference structure | |
| for ref_struct in ref_structure_data: | |
| ref_indices = st.session_state['ref_selections'][ref_struct['name']] | |
| ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type) | |
| if not ref_windows: | |
| continue | |
| # For each reference window | |
| for ref_window in ref_windows: | |
| # Extract reference coords | |
| ref_coords = extract_window_coords(ref_struct['residues'], ref_window) | |
| ref_com = calculate_COM(ref_coords) | |
| ref_sequence = ''.join([ref_struct['residues'][i]['resname'] for i in ref_window]) | |
| # Compare against all query structures | |
| for query_struct in query_structure_data: | |
| query_indices = st.session_state['query_selections'][query_struct['name']] | |
| query_windows = generate_windows_from_selection(query_indices, window_size, window_type) | |
| for query_window in query_windows: | |
| # Extract query coords | |
| query_coords = extract_window_coords(query_struct['residues'], query_window) | |
| query_com = calculate_COM(query_coords) | |
| query_sequence = ''.join([query_struct['residues'][i]['resname'] for i in query_window]) | |
| # Calculate RMSD | |
| U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com) | |
| if U is None or RMSD is None: | |
| RMSD = 999.0 | |
| U = np.eye(3) | |
| results.append({ | |
| 'Reference': ref_struct['name'], | |
| 'Ref_Window': ref_window, | |
| 'Ref_Sequence': ref_sequence, | |
| 'Query': query_struct['name'], | |
| 'Query_Window': query_window, | |
| 'Query_Sequence': query_sequence, | |
| 'RMSD': RMSD, | |
| 'Rotation_Matrix': U, | |
| 'Ref_COM': ref_com, | |
| 'Query_COM': query_com, | |
| 'Ref_Path': ref_struct['path'], | |
| 'Query_Path': query_struct['path'] | |
| }) | |
| results_df = pd.DataFrame(results) | |
| st.session_state['results'] = results_df | |
| st.session_state['ref_structure_data'] = ref_structure_data | |
| st.session_state['query_structure_data'] = query_structure_data | |
| st.success(f"β Analysis complete! {len(results_df)} comparisons performed.") | |
| # Display results | |
| if 'results' in st.session_state: | |
| results_df = st.session_state['results'] | |
| st.markdown("---") | |
| st.subheader("π Results Summary") | |
| # RMSD threshold filter | |
| col1, col2 = st.columns([1, 3]) | |
| with col1: | |
| rmsd_threshold = st.slider( | |
| "RMSD Threshold (Γ )", | |
| min_value=0.0, | |
| max_value=10.0, | |
| value=3.0, | |
| step=0.1 | |
| ) | |
| filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold] | |
| with col2: | |
| st.metric("Comparisons Below Threshold", f"{len(filtered_df)} / {len(results_df)}") | |
| # Best match per Reference-Query pair | |
| st.markdown("### π Best Match per Reference-Query Pair") | |
| if len(filtered_df) > 0: | |
| # Group by Reference and Query to find best match for each pair | |
| best_matches = filtered_df.loc[filtered_df.groupby(['Reference', 'Query'])['RMSD'].idxmin()] | |
| best_display = best_matches[['Reference', 'Query', 'Ref_Sequence', 'Query_Sequence', 'RMSD']].copy() | |
| best_display['RMSD'] = best_display['RMSD'].round(3) | |
| best_display.columns = ['Reference', 'Query', 'Ref Sequence', 'Query Sequence', 'RMSD (Γ )'] | |
| st.dataframe(best_display, use_container_width=True) | |
| else: | |
| st.warning("No matches found below threshold") | |
| # Full results | |
| with st.expander("π All Comparison Results"): | |
| if len(filtered_df) > 0: | |
| display_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy() | |
| # Format the window indices to be 1-based | |
| display_df['Ref_Residues'] = display_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x])) | |
| display_df['Query_Residues'] = display_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x])) | |
| # Reorder columns | |
| display_df = display_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']] | |
| display_df['RMSD'] = display_df['RMSD'].round(3) | |
| display_df = display_df.sort_values('RMSD').reset_index(drop=True) | |
| # Rename columns for better display | |
| display_df.columns = ['Reference', 'Ref_Indices', 'Ref_Sequence', 'Query', 'Query_Indices', 'Query_Sequence', 'RMSD (Γ )'] | |
| st.dataframe(display_df, use_container_width=True, height=400) | |
| else: | |
| st.info("No results to display") | |
| # Visualization | |
| st.markdown("---") | |
| st.subheader("π¬ 3D Structure Visualization") | |
| if len(filtered_df) > 0: | |
| st.markdown("**Select a comparison to visualize:**") | |
| # Create dropdown options | |
| viz_options = [] | |
| for idx, row in filtered_df.iterrows(): | |
| ref_res_str = ','.join([str(i+1) for i in row['Ref_Window']]) | |
| query_res_str = ','.join([str(i+1) for i in row['Query_Window']]) | |
| option_text = f"{row['Reference']}[{ref_res_str}] ({row['Ref_Sequence']}) vs {row['Query']}[{query_res_str}] ({row['Query_Sequence']}) | RMSD: {row['RMSD']:.3f} Γ " | |
| viz_options.append((idx, option_text)) | |
| # Sort by RMSD | |
| viz_options.sort(key=lambda x: filtered_df.loc[x[0], 'RMSD']) | |
| selected_viz_idx = st.selectbox( | |
| "Choose comparison to visualize", | |
| options=[opt[0] for opt in viz_options], | |
| format_func=lambda idx: next(opt[1] for opt in viz_options if opt[0] == idx), | |
| help="All comparisons below RMSD threshold, sorted by RMSD" | |
| ) | |
| # Get the selected comparison | |
| selected_row = filtered_df.loc[selected_viz_idx] | |
| # Import visualization function | |
| from visualization import create_structure_visualization | |
| # Display RMSD info | |
| #st.info(f"**RMSD: {selected_row['RMSD']:.3f} Γ ** ({len(selected_row['Query_Indices'])} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})") | |
| # Create visualization - wider display | |
| col1, col2, col3 = st.columns([0.5, 4, 0.5]) | |
| with col2: | |
| try: | |
| viz_html = create_structure_visualization( | |
| selected_row['Ref_Path'], | |
| selected_row['Query_Path'], | |
| selected_row['Ref_Window'], | |
| selected_row['Query_Window'], | |
| selected_row['Rotation_Matrix'], | |
| selected_row['Ref_COM'], | |
| selected_row['Query_COM'], | |
| selected_row['RMSD'], | |
| ref_name=selected_row['Reference'], | |
| query_name=selected_row['Query'], | |
| ref_sequence=selected_row['Ref_Sequence'], | |
| query_sequence=selected_row['Query_Sequence'] | |
| ) | |
| st.components.v1.html(viz_html, width=1400, height=750, scrolling=False) | |
| except Exception as e: | |
| st.error(f"Error creating visualization: {str(e)}") | |
| import traceback | |
| st.code(traceback.format_exc()) | |
| # Automatic Annotation Info | |
| st.markdown("---") | |
| st.success("β **Automatic Annotation:** When you click 'Download PNG' in the 3D viewer above, the image automatically includes RMSD, structure names, and sequences!") | |
| st.info("π‘ **Customize font size:** Use the 'Annotation Font Size' dropdown in the viewer controls (top-right) to choose from Small, Medium, Large (default), or Extra Large fonts!") | |
| # Show transformation details | |
| with st.expander("π§ Transformation Details"): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Rotation Matrix (U):**") | |
| st.dataframe( | |
| pd.DataFrame(selected_row['Rotation_Matrix']).round(4), | |
| use_container_width=True | |
| ) | |
| with col2: | |
| st.markdown("**Translation Vectors:**") | |
| st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]") | |
| st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]") | |
| # Download aligned structures | |
| with st.expander("πΎ Download Structure Files"): | |
| st.markdown("**Download extracted and aligned structures for external visualization**") | |
| from visualization import extract_window_pdb, transform_pdb_string | |
| # Extract reference window | |
| ref_pdb = extract_window_pdb( | |
| selected_row['Ref_Path'], | |
| selected_row['Ref_Window'] | |
| ) | |
| # Extract and transform query window | |
| query_pdb = extract_window_pdb( | |
| selected_row['Query_Path'], | |
| selected_row['Query_Window'] | |
| ) | |
| query_aligned_pdb = transform_pdb_string( | |
| query_pdb, | |
| selected_row['Rotation_Matrix'], | |
| selected_row['Query_COM'], | |
| selected_row['Ref_COM'] | |
| ) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| # Reference structure | |
| ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Window']]))}.pdb" | |
| st.download_button( | |
| label="π₯ Reference PDB", | |
| data=ref_pdb, | |
| file_name=ref_filename, | |
| mime="chemical/x-pdb", | |
| help="Original reference structure (selected residues only)" | |
| ) | |
| with col2: | |
| # Query structure (original position) | |
| query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Window']]))}.pdb" | |
| st.download_button( | |
| label="π₯ Query PDB (Original)", | |
| data=query_pdb, | |
| file_name=query_filename, | |
| mime="chemical/x-pdb", | |
| help="Original query structure (selected residues only)" | |
| ) | |
| with col3: | |
| # Query structure (aligned) | |
| query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Window']]))}.pdb" | |
| st.download_button( | |
| label="π₯ Query PDB (Aligned)", | |
| data=query_aligned_pdb, | |
| file_name=query_aligned_filename, | |
| mime="chemical/x-pdb", | |
| help="Query structure aligned to reference" | |
| ) | |
| # Combined aligned structure | |
| st.markdown("---") | |
| st.markdown("**Combined Aligned Structure (Reference + Query)**") | |
| # Create combined PDB with both structures | |
| combined_pdb_lines = [] | |
| # Add header information as REMARK records | |
| combined_pdb_lines.append(f"REMARK Reference: {selected_row['Reference']}") | |
| combined_pdb_lines.append(f"REMARK Reference Residues: {','.join(map(str, [i+1 for i in selected_row['Ref_Window']]))}") | |
| combined_pdb_lines.append(f"REMARK Reference Sequence: {selected_row['Ref_Sequence']}") | |
| combined_pdb_lines.append(f"REMARK Query: {selected_row['Query']}") | |
| combined_pdb_lines.append(f"REMARK Query Residues: {','.join(map(str, [i+1 for i in selected_row['Query_Window']]))}") | |
| combined_pdb_lines.append(f"REMARK Query Sequence: {selected_row['Query_Sequence']}") | |
| combined_pdb_lines.append(f"REMARK RMSD: {selected_row['RMSD']:.3f} Angstroms") | |
| combined_pdb_lines.append("MODEL 1") | |
| # Add reference atoms with chain A | |
| for line in ref_pdb.split('\n'): | |
| if line.startswith(('ATOM', 'HETATM')): | |
| # Set chain to A for reference | |
| modified_line = line[:21] + 'A' + line[22:] | |
| combined_pdb_lines.append(modified_line) | |
| combined_pdb_lines.append("ENDMDL") | |
| combined_pdb_lines.append("MODEL 2") | |
| # Add aligned query atoms with chain B | |
| for line in query_aligned_pdb.split('\n'): | |
| if line.startswith(('ATOM', 'HETATM')): | |
| # Set chain to B for query | |
| modified_line = line[:21] + 'B' + line[22:] | |
| combined_pdb_lines.append(modified_line) | |
| combined_pdb_lines.append("ENDMDL") | |
| combined_pdb_lines.append("END") | |
| combined_pdb = '\n'.join(combined_pdb_lines) | |
| combined_filename = f"aligned_{selected_row['Reference'].replace('.pdb', '')}_{selected_row['Query'].replace('.pdb', '')}_rmsd_{selected_row['RMSD']:.3f}.pdb" | |
| st.download_button( | |
| label="π₯ Download Combined Aligned Structure", | |
| data=combined_pdb, | |
| file_name=combined_filename, | |
| mime="chemical/x-pdb", | |
| help="Reference (chain A) and aligned query (chain B) in one file", | |
| use_container_width=True | |
| ) | |
| st.info("π‘ **Tip:** The combined PDB contains reference (chain A) and aligned query (chain B) - ready for PyMOL/Chimera") | |
| else: | |
| st.warning("No comparisons below RMSD threshold to visualize") | |
| # Export Results | |
| st.markdown("---") | |
| st.subheader("πΎ Export Results") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Download Results Table**") | |
| if len(filtered_df) > 0: | |
| export_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy() | |
| export_df['Ref_Residues'] = export_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x])) | |
| export_df['Query_Residues'] = export_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x])) | |
| export_df = export_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']] | |
| export_df = export_df.sort_values('RMSD').reset_index(drop=True) | |
| csv = export_df.to_csv(index=False) | |
| st.download_button( | |
| label="π₯ Download Results (CSV)", | |
| data=csv, | |
| file_name="rna_pairwise_comparison_results.csv", | |
| mime="text/csv" | |
| ) | |
| else: | |
| st.info("No results to export") | |
| with col2: | |
| st.markdown("**Download Aligned Structures**") | |
| if len(filtered_df) > 0 and st.button("π¦ Generate PDB Archive"): | |
| with st.spinner("Creating archive..."): | |
| import zipfile | |
| from visualization_multi import extract_window_pdb, transform_pdb_string | |
| zip_buffer = io.BytesIO() | |
| with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: | |
| for idx, row in filtered_df.iterrows(): | |
| comp_name = f"comp_{idx:03d}_rmsd_{row['RMSD']:.3f}" | |
| # Reference | |
| ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Window']) | |
| zip_file.writestr(f"{comp_name}/reference.pdb", ref_pdb) | |
| # Query original | |
| query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Window']) | |
| zip_file.writestr(f"{comp_name}/query_original.pdb", query_pdb) | |
| # Query aligned | |
| query_aligned = transform_pdb_string( | |
| query_pdb, | |
| row['Rotation_Matrix'], | |
| row['Query_COM'], | |
| row['Ref_COM'] | |
| ) | |
| zip_file.writestr(f"{comp_name}/query_aligned.pdb", query_aligned) | |
| # README | |
| readme = f"""Comparison #{idx} | |
| RMSD: {row['RMSD']:.3f} Γ | |
| Atom Selection: Backbone + Sugar (default) | |
| Reference: {row['Reference']} | |
| Residues: {','.join([str(i+1) for i in row['Ref_Window']])} | |
| Sequence: {row['Ref_Sequence']} | |
| Query: {row['Query']} | |
| Residues: {','.join([str(i+1) for i in row['Query_Window']])} | |
| Sequence: {row['Query_Sequence']} | |
| """ | |
| zip_file.writestr(f"{comp_name}/README.txt", readme) | |
| zip_buffer.seek(0) | |
| st.download_button( | |
| label="π₯ Download PDB Archive (ZIP)", | |
| data=zip_buffer.getvalue(), | |
| file_name="aligned_structures.zip", | |
| mime="application/zip", | |
| help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs" | |
| ) | |
| st.success(f"β Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.") | |
| if __name__ == "__main__": | |
| main() |