""" RNA Motif Multi-Structure Comparison Tool - Pairwise Mode Streamlit app for comparing multiple RNA motif structures with separate reference and query sets Uses dropdown menu for residue configuration and default Backbone + Sugar atom selection """ import streamlit as st import numpy as np import pandas as pd from pathlib import Path import io import tempfile import os from itertools import combinations # Import our RMSD calculation functions from rmsd_utils import ( parse_residue_atoms, get_backbone_sugar_and_selectbase_coords_fixed, calculate_COM, calculate_rotation_rmsd, translate_rotate_coords, get_backbone_sugar_coords_from_residue, get_base_coords_from_residue ) # Import example data loader try: from example_data_loader import ( get_example_pdbs, load_example_as_uploaded_file, get_example_info ) EXAMPLES_AVAILABLE = True except ImportError: EXAMPLES_AVAILABLE = False st.warning("Example data loader not available. Please use 'Upload Files' mode.") # Page configuration st.set_page_config( page_title="RNA Motif Multi-Structure Comparison - Pairwise", page_icon="🧬", layout="wide", initial_sidebar_state="expanded" ) from image_annotator import annotate_alignment_image # Custom CSS - IMPROVED VERSION with larger fonts st.markdown(""" """, unsafe_allow_html=True) def save_uploaded_file(uploaded_file, directory): """Save an uploaded file to a temporary directory""" file_path = os.path.join(directory, uploaded_file.name) with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) return file_path def get_structure_info(pdb_path): """ Get information about a structure's residues. Args: pdb_path: Path to PDB file Returns: List of dicts with residue info: [{index, resnum, resname, full_name}, ...] """ residues = parse_residue_atoms(pdb_path) structure_info = [] for idx, res in enumerate(residues): structure_info.append({ 'index': idx, 'resnum': res['resnum'], 'resname': res['resname'], 'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})" }) return structure_info def load_structure_data(uploaded_files, temp_dir): """Load structure data from uploaded files""" structure_data = [] for uploaded_file in uploaded_files: file_path = save_uploaded_file(uploaded_file, temp_dir) residues = parse_residue_atoms(file_path) structure_data.append({ 'name': uploaded_file.name, 'path': file_path, 'residues': residues, 'num_residues': len(residues) }) return structure_data def extract_window_coords(residues, window_indices): """ Extract coordinates for a specific window of residues. Args: residues: List of all residues window_indices: List of indices to extract Returns: numpy array of coordinates """ from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue all_coords = [] for idx in window_indices: if idx < len(residues): residue = residues[idx] # Get backbone and sugar coordinates backbone_coords = get_backbone_sugar_coords_from_residue(residue) all_coords.extend(backbone_coords) # Get base coordinates base_coords = get_base_coords_from_residue(residue) all_coords.extend(base_coords) return np.asarray(all_coords) def generate_windows_from_selection(selected_indices, window_size, window_type): """Generate windows from selected residue indices""" if len(selected_indices) < window_size: return [] windows = [] if len(selected_indices) == window_size: windows.append(selected_indices) return windows if window_type == "contiguous": # Only sliding windows for i in range(len(selected_indices) - window_size + 1): windows.append(selected_indices[i:i+window_size]) elif window_type == "non-contiguous": from itertools import combinations all_combos = list(combinations(selected_indices, window_size)) # Get the contiguous windows (to exclude them) contiguous_windows = [] for i in range(len(selected_indices) - window_size + 1): contiguous_windows.append(tuple(selected_indices[i:i+window_size])) # Filter: keep only combinations that are NOT in contiguous_windows for combo in all_combos: if combo not in contiguous_windows: windows.append(list(combo)) else: from itertools import combinations all_combos = list(combinations(selected_indices, window_size)) # Filter: keep only combinations that are NOT in contiguous_windows for combo in all_combos: windows.append(list(combo)) return windows def main(): st.markdown('

🧬 RNA Motif Multi-Structure Comparison

', unsafe_allow_html=True) st.markdown('

Pairwise comparison: Reference structures vs Query structures

', unsafe_allow_html=True) # Create temporary directory if 'temp_dir' not in st.session_state: st.session_state['temp_dir'] = tempfile.mkdtemp() temp_dir = st.session_state['temp_dir'] # Initialize session state if 'data_mode' not in st.session_state: st.session_state['data_mode'] = 'upload' if 'ref_selections' not in st.session_state: st.session_state['ref_selections'] = {} if 'query_selections' not in st.session_state: st.session_state['query_selections'] = {} # Sidebar: Step 1 - Data Source Selection st.sidebar.title("⚙️ Configuration") st.sidebar.subheader("1️⃣ Data Source") # Check if examples are available if EXAMPLES_AVAILABLE: data_mode = st.sidebar.radio( "Choose data source", ["Upload Files", "Use Example Data"], key="data_mode_radio", help="Upload your own PDB files or use provided examples" ) else: st.sidebar.info("ℹ️ Example data not available. Using upload mode.") data_mode = "Upload Files" # Update data mode if data_mode == "Upload Files": st.session_state['data_mode'] = 'upload' # Reset example initialization when switching to upload mode if 'example_mode_initialized' in st.session_state: del st.session_state['example_mode_initialized'] else: st.session_state['data_mode'] = 'example' # Step 2: File Upload/Selection - SEPARATE FOR REFERENCE AND QUERY st.sidebar.subheader("2️⃣ Structure Files") reference_files = [] query_files = [] if st.session_state['data_mode'] == 'upload': st.sidebar.markdown("**Upload Reference Structures**") ref_uploaded = st.sidebar.file_uploader( "Reference PDB files", type=['pdb'], accept_multiple_files=True, key="ref_uploader", help="Upload one or more reference structures (e.g., Pentaloop)" ) st.sidebar.markdown("**Upload Query Structures**") query_uploaded = st.sidebar.file_uploader( "Query PDB files", type=['pdb'], accept_multiple_files=True, key="query_uploader", help="Upload one or more query structures (e.g., Tetraloop)" ) reference_files = ref_uploaded if ref_uploaded else [] query_files = query_uploaded if query_uploaded else [] else: # Example data mode if not EXAMPLES_AVAILABLE: st.sidebar.error("❌ Example data loader module not found") reference_files = [] query_files = [] else: try: examples = get_example_pdbs() if not examples or len(examples) == 0: st.sidebar.error("❌ No example data available. Please add PDB files to 'data/' folder") st.sidebar.info("💡 Create a 'data/' folder in the same directory as the app and add .pdb files") reference_files = [] query_files = [] else: example_names = sorted(list(examples.keys())) # Auto-select examples when first switching to example mode if 'example_mode_initialized' not in st.session_state: st.session_state['example_mode_initialized'] = True # Auto-select first half as reference, second half as query mid_point = max(1, len(example_names) // 2) st.session_state['auto_ref_examples'] = example_names[:mid_point] st.session_state['auto_query_examples'] = example_names[mid_point:mid_point*2] st.sidebar.markdown("**Select Reference Examples**") ref_example_names = st.sidebar.multiselect( "Reference structures", options=example_names, default=st.session_state.get('auto_ref_examples', []), key="ref_examples", help="Select example reference structures" ) if ref_example_names: st.sidebar.success(f"✅ {len(ref_example_names)} reference file(s) selected") st.sidebar.markdown("**Select Query Examples**") query_example_names = st.sidebar.multiselect( "Query structures", options=example_names, default=st.session_state.get('auto_query_examples', []), key="query_examples", help="Select example query structures" ) if query_example_names: st.sidebar.success(f"✅ {len(query_example_names)} query file(s) selected") # Convert names to paths and load files try: reference_files = [load_example_as_uploaded_file(examples[name]) for name in ref_example_names] query_files = [load_example_as_uploaded_file(examples[name]) for name in query_example_names] except Exception as load_error: st.sidebar.error(f"Error loading files: {str(load_error)}") import traceback st.sidebar.code(traceback.format_exc()) reference_files = [] query_files = [] except Exception as e: st.sidebar.error(f"❌ Error loading examples: {str(e)}") import traceback st.sidebar.code(traceback.format_exc()) reference_files = [] query_files = [] # Show upload status if reference_files and query_files: st.sidebar.success(f"✅ {len(reference_files)} reference + {len(query_files)} query structures") elif reference_files: st.sidebar.info(f"ℹ️ {len(reference_files)} reference structures loaded") elif query_files: st.sidebar.info(f"ℹ️ {len(query_files)} query structures loaded") else: st.sidebar.warning("⚠️ Upload or select structures") # Residue trimming controls - add early so they're available when needed st.sidebar.markdown("---") st.sidebar.markdown("**🔧 5'/3' Base Trimming (Reference) **") col1, col2 = st.sidebar.columns(2) with col1: n_term_trim_ref = st.number_input( "5' trim_ref", min_value=0, max_value=10, value=2, step=1, help="Number of bases to remove from 5' end", key="n_term_trim_ref" ) with col2: c_term_trim_ref = st.number_input( "3' trim_ref", min_value=0, max_value=10, value=2, step=1, help="Number of bases to remove from 3' end", key="c_term_trim_ref" ) # Residue trimming controls - add early so they're available when needed st.sidebar.markdown("---") st.sidebar.markdown("**🔧 5'/3' Base Trimming (Query) **") col1, col2 = st.sidebar.columns(2) with col1: n_term_trim_query = st.number_input( "5' trim_query", min_value=0, max_value=10, value=2, step=1, help="Number of bases to remove from 5' end", key="n_term_trim_query" ) with col2: c_term_trim_query = st.number_input( "3' trim_query", min_value=0, max_value=10, value=2, step=1, help="Number of bases to remove from 3' end", key="c_term_trim_query" ) # Load structure data ref_structure_data = [] query_structure_data = [] if reference_files: ref_structure_data = load_structure_data(reference_files, temp_dir) if query_files: query_structure_data = load_structure_data(query_files, temp_dir) # Track current files to reset selections if files change current_ref_files = set([s['name'] for s in ref_structure_data]) current_query_files = set([s['name'] for s in query_structure_data]) if 'current_ref_files' not in st.session_state: st.session_state['current_ref_files'] = current_ref_files if 'current_query_files' not in st.session_state: st.session_state['current_query_files'] = current_query_files # Reset selections if files changed if st.session_state['current_ref_files'] != current_ref_files: st.session_state['current_ref_files'] = current_ref_files st.session_state['ref_selections'] = {} if 'ref_auto_initialized' in st.session_state: del st.session_state['ref_auto_initialized'] if st.session_state['current_query_files'] != current_query_files: st.session_state['current_query_files'] = current_query_files st.session_state['query_selections'] = {} if 'query_auto_initialized' in st.session_state: del st.session_state['query_auto_initialized'] # Auto-initialize selections (exclude first and last residue by default) if 'ref_auto_initialized' not in st.session_state and ref_structure_data: for struct in ref_structure_data: num_res = struct['num_residues'] if num_res > n_term_trim_ref + c_term_trim_ref: auto_selection = list(range(n_term_trim_ref, num_res - c_term_trim_ref)) st.session_state['ref_selections'][struct['name']] = auto_selection else: st.session_state['ref_selections'][struct['name']] = list(range(num_res)) st.session_state['ref_auto_initialized'] = True if 'query_auto_initialized' not in st.session_state and query_structure_data: for struct in query_structure_data: num_res = struct['num_residues'] if num_res > n_term_trim_query + c_term_trim_query: auto_selection = list(range(n_term_trim_query, num_res - c_term_trim_query)) st.session_state['query_selections'][struct['name']] = auto_selection else: st.session_state['query_selections'][struct['name']] = list(range(num_res)) st.session_state['query_auto_initialized'] = True # Step 3: Configure Atom Selections in Main Area st.markdown("---") st.subheader("🔬 Configure Atom Selections") st.info(f"""ℹ️ **Atom Selection:** Backbone + Sugar\n - For purines (A, G): N9, C8, C4\n - For pyrimidines (C, U): N1, C2, C6\n - For backbone and sugar atoms: "P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"\n """) # Create two columns for Reference and Query col1, col2 = st.columns(2) with col1: st.markdown("### 📋 Reference Structures") if ref_structure_data: selected_ref_name = st.selectbox( "Select structure to configure (excluding two bases in 5' and 3' by default)", options=[s['name'] for s in ref_structure_data], key="ref_dropdown", help="Choose a reference structure to configure its residue selection" ) selected_ref = next((s for s in ref_structure_data if s['name'] == selected_ref_name), None) if selected_ref: st.markdown(f"**{selected_ref['name']}** ({selected_ref['num_residues']} residues)") # Display residue table structure_info = get_structure_info(selected_ref['path']) info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']] info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type'] info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1 info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']] with st.expander("📋 View Residue Table", expanded=False): st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38)) # Selection method selection_method = st.radio( f"Selection method for {selected_ref['name']}", ["Select by range", "Select specific residues", "Use all residues"], key=f"method_ref_{selected_ref['name']}", index=1, horizontal=True ) selected_indices = [] if selection_method == "Select by range": current_selection = st.session_state['ref_selections'].get(selected_ref['name'], []) default_start = current_selection[0] + n_term_trim_ref if current_selection else n_term_trim_ref default_end = current_selection[-1] + 1 if current_selection else max(n_term_trim_ref, len(structure_info) - c_term_trim_ref) c1, c2 = st.columns(2) with c1: start_idx = st.number_input( "Start index (1-based)", min_value=1, max_value=len(structure_info), value=default_start, key=f"start_ref_{selected_ref['name']}" ) with c2: end_idx = st.number_input( "End index (1-based, inclusive)", min_value=1, max_value=len(structure_info), value=default_end, key=f"end_ref_{selected_ref['name']}" ) if start_idx <= end_idx: selected_indices = list(range(start_idx - 1, end_idx)) st.success(f"✓ Selected residues: {[i+1 for i in selected_indices]}") # Auto-save the selection st.session_state['ref_selections'][selected_ref['name']] = selected_indices else: st.error("Start index must be ≤ end index") elif selection_method == "Select specific residues": # Always use current trim values for default selection (updates when trim values change) default_names = [structure_info[i]['full_name'] for i in range(n_term_trim_ref, len(structure_info)-c_term_trim_ref)] selected_names = st.multiselect( "Select residues", options=[info['full_name'] for info in structure_info], default=default_names, key=f"specific_ref_{selected_ref['name']}_n{n_term_trim_ref}_c{c_term_trim_ref}" ) name_to_idx = {info['full_name']: info['index'] for info in structure_info} selected_indices = [name_to_idx[name] for name in selected_names] selected_indices.sort() if selected_indices: st.success(f"✓ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}") # Auto-save the selection st.session_state['ref_selections'][selected_ref['name']] = selected_indices else: # Use all residues selected_indices = list(range(len(structure_info))) st.info(f"✓ Using all {len(selected_indices)} residues") # Auto-save the selection st.session_state['ref_selections'][selected_ref['name']] = selected_indices # Show current saved selection (now always up-to-date) if selected_ref['name'] in st.session_state['ref_selections']: saved_indices = st.session_state['ref_selections'][selected_ref['name']] st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}") else: st.info("Upload reference structures to configure") with col2: st.markdown("### 📋 Query Structures") if query_structure_data: selected_query_name = st.selectbox( "Select structure to configure (excluding two bases in 5' and 3' by default)", options=[s['name'] for s in query_structure_data], key="query_dropdown", help="Choose a query structure to configure its residue selection" ) selected_query = next((s for s in query_structure_data if s['name'] == selected_query_name), None) if selected_query: st.markdown(f"**{selected_query['name']}** ({selected_query['num_residues']} residues)") # Display residue table structure_info = get_structure_info(selected_query['path']) info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']] info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type'] info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1 info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']] with st.expander("📋 View Residue Table", expanded=False): st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38)) # Selection method selection_method = st.radio( f"Selection method for {selected_query['name']}", ["Select by range", "Select specific residues", "Use all residues"], key=f"method_query_{selected_query['name']}", index=1, horizontal=True ) selected_indices = [] if selection_method == "Select by range": current_selection = st.session_state['query_selections'].get(selected_query['name'], []) default_start = current_selection[0] + n_term_trim_query if current_selection else 3 default_end = current_selection[-1] + 1 if current_selection else max(2, len(structure_info) - c_term_trim_query) c1, c2 = st.columns(2) with c1: start_idx = st.number_input( "Start index (1-based)", min_value=1, max_value=len(structure_info), value=default_start, key=f"start_query_{selected_query['name']}" ) with c2: end_idx = st.number_input( "End index (1-based, inclusive)", min_value=1, max_value=len(structure_info), value=default_end, key=f"end_query_{selected_query['name']}" ) if start_idx <= end_idx: selected_indices = list(range(start_idx - 1, end_idx)) st.success(f"✓ Selected residues: {[i+1 for i in selected_indices]}") # Auto-save the selection st.session_state['query_selections'][selected_query['name']] = selected_indices else: st.error("Start index must be ≤ end index") elif selection_method == "Select specific residues": # Always use current trim values for default selection (updates when trim values change) default_names = [structure_info[i]['full_name'] for i in range(n_term_trim_query, len(structure_info)-c_term_trim_query)] selected_names = st.multiselect( "Select residues", options=[info['full_name'] for info in structure_info], default=default_names, key=f"specific_query_{selected_query['name']}_n{n_term_trim_query}_c{c_term_trim_query}" ) name_to_idx = {info['full_name']: info['index'] for info in structure_info} selected_indices = [name_to_idx[name] for name in selected_names] selected_indices.sort() if selected_indices: st.success(f"✓ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}") # Auto-save the selection st.session_state['query_selections'][selected_query['name']] = selected_indices else: # Use all residues selected_indices = list(range(len(structure_info))) st.info(f"✓ Using all {len(selected_indices)} residues") # Auto-save the selection st.session_state['query_selections'][selected_query['name']] = selected_indices # Show current saved selection (now always up-to-date) if selected_query['name'] in st.session_state['query_selections']: saved_indices = st.session_state['query_selections'][selected_query['name']] st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}") else: st.info("Upload query structures to configure") # Step 4: Window Configuration st.sidebar.subheader("3️⃣ Window Configuration") # Check if all structures have selections all_ref_have_selections = all(s['name'] in st.session_state['ref_selections'] for s in ref_structure_data) all_query_have_selections = all(s['name'] in st.session_state['query_selections'] for s in query_structure_data) if all_ref_have_selections and all_query_have_selections and ref_structure_data and query_structure_data: # Find minimum selection size all_selections = list(st.session_state['ref_selections'].values()) + list(st.session_state['query_selections'].values()) min_selection_size = min(len(sel) for sel in all_selections) window_size = st.sidebar.number_input( "Window Size", min_value=2, max_value=min_selection_size, value=min(4, min_selection_size), step=1, help="Number of residues per comparison window" ) window_type = st.sidebar.radio( "Window Type", ["contiguous", "non-contiguous", "both"], index=0, help="Contiguous: sliding windows. Non-contiguous: all combinations" ) else: st.sidebar.warning("⚠️ Configure selections first") window_size = 4 window_type = "contiguous" # Step 5: Run Analysis st.sidebar.subheader("4️⃣ Run Analysis") can_run = (all_ref_have_selections and all_query_have_selections and ref_structure_data and query_structure_data) if st.sidebar.button("🚀 Run Pairwise Analysis", type="primary", disabled=not can_run): if not can_run: st.error("Please upload and configure both reference and query structures") return # Run comparisons with st.spinner("Analyzing structures..."): results = [] # For each reference structure for ref_struct in ref_structure_data: ref_indices = st.session_state['ref_selections'][ref_struct['name']] ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type) if not ref_windows: continue # For each reference window for ref_window in ref_windows: # Extract reference coords ref_coords = extract_window_coords(ref_struct['residues'], ref_window) ref_com = calculate_COM(ref_coords) ref_sequence = ''.join([ref_struct['residues'][i]['resname'] for i in ref_window]) # Compare against all query structures for query_struct in query_structure_data: query_indices = st.session_state['query_selections'][query_struct['name']] query_windows = generate_windows_from_selection(query_indices, window_size, window_type) for query_window in query_windows: # Extract query coords query_coords = extract_window_coords(query_struct['residues'], query_window) query_com = calculate_COM(query_coords) query_sequence = ''.join([query_struct['residues'][i]['resname'] for i in query_window]) # Calculate RMSD U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com) if U is None or RMSD is None: RMSD = 999.0 U = np.eye(3) results.append({ 'Reference': ref_struct['name'], 'Ref_Window': ref_window, 'Ref_Sequence': ref_sequence, 'Query': query_struct['name'], 'Query_Window': query_window, 'Query_Sequence': query_sequence, 'RMSD': RMSD, 'Rotation_Matrix': U, 'Ref_COM': ref_com, 'Query_COM': query_com, 'Ref_Path': ref_struct['path'], 'Query_Path': query_struct['path'] }) results_df = pd.DataFrame(results) st.session_state['results'] = results_df st.session_state['ref_structure_data'] = ref_structure_data st.session_state['query_structure_data'] = query_structure_data st.success(f"✅ Analysis complete! {len(results_df)} comparisons performed.") # Display results if 'results' in st.session_state: results_df = st.session_state['results'] st.markdown("---") st.subheader("📊 Results Summary") # RMSD threshold filter col1, col2 = st.columns([1, 3]) with col1: rmsd_threshold = st.slider( "RMSD Threshold (Å)", min_value=0.0, max_value=10.0, value=3.0, step=0.1 ) filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold] with col2: st.metric("Comparisons Below Threshold", f"{len(filtered_df)} / {len(results_df)}") # Best match per Reference-Query pair st.markdown("### 🏆 Best Match per Reference-Query Pair") if len(filtered_df) > 0: # Group by Reference and Query to find best match for each pair best_matches = filtered_df.loc[filtered_df.groupby(['Reference', 'Query'])['RMSD'].idxmin()] best_display = best_matches[['Reference', 'Query', 'Ref_Sequence', 'Query_Sequence', 'RMSD']].copy() best_display['RMSD'] = best_display['RMSD'].round(3) best_display.columns = ['Reference', 'Query', 'Ref Sequence', 'Query Sequence', 'RMSD (Å)'] st.dataframe(best_display, use_container_width=True) else: st.warning("No matches found below threshold") # Full results with st.expander("📋 All Comparison Results"): if len(filtered_df) > 0: display_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy() # Format the window indices to be 1-based display_df['Ref_Residues'] = display_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x])) display_df['Query_Residues'] = display_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x])) # Reorder columns display_df = display_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']] display_df['RMSD'] = display_df['RMSD'].round(3) display_df = display_df.sort_values('RMSD').reset_index(drop=True) # Rename columns for better display display_df.columns = ['Reference', 'Ref_Indices', 'Ref_Sequence', 'Query', 'Query_Indices', 'Query_Sequence', 'RMSD (Å)'] st.dataframe(display_df, use_container_width=True, height=400) else: st.info("No results to display") # Visualization st.markdown("---") st.subheader("🔬 3D Structure Visualization") if len(filtered_df) > 0: st.markdown("**Select a comparison to visualize:**") # Create dropdown options viz_options = [] for idx, row in filtered_df.iterrows(): ref_res_str = ','.join([str(i+1) for i in row['Ref_Window']]) query_res_str = ','.join([str(i+1) for i in row['Query_Window']]) option_text = f"{row['Reference']}[{ref_res_str}] ({row['Ref_Sequence']}) vs {row['Query']}[{query_res_str}] ({row['Query_Sequence']}) | RMSD: {row['RMSD']:.3f} Å" viz_options.append((idx, option_text)) # Sort by RMSD viz_options.sort(key=lambda x: filtered_df.loc[x[0], 'RMSD']) selected_viz_idx = st.selectbox( "Choose comparison to visualize", options=[opt[0] for opt in viz_options], format_func=lambda idx: next(opt[1] for opt in viz_options if opt[0] == idx), help="All comparisons below RMSD threshold, sorted by RMSD" ) # Get the selected comparison selected_row = filtered_df.loc[selected_viz_idx] # Import visualization function from visualization import create_structure_visualization # Display RMSD info #st.info(f"**RMSD: {selected_row['RMSD']:.3f} Å** ({len(selected_row['Query_Indices'])} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})") # Create visualization - wider display col1, col2, col3 = st.columns([0.5, 4, 0.5]) with col2: try: viz_html = create_structure_visualization( selected_row['Ref_Path'], selected_row['Query_Path'], selected_row['Ref_Window'], selected_row['Query_Window'], selected_row['Rotation_Matrix'], selected_row['Ref_COM'], selected_row['Query_COM'], selected_row['RMSD'], ref_name=selected_row['Reference'], query_name=selected_row['Query'], ref_sequence=selected_row['Ref_Sequence'], query_sequence=selected_row['Query_Sequence'] ) st.components.v1.html(viz_html, width=1400, height=750, scrolling=False) except Exception as e: st.error(f"Error creating visualization: {str(e)}") import traceback st.code(traceback.format_exc()) # Automatic Annotation Info st.markdown("---") st.success("✅ **Automatic Annotation:** When you click 'Download PNG' in the 3D viewer above, the image automatically includes RMSD, structure names, and sequences!") st.info("💡 **Customize font size:** Use the 'Annotation Font Size' dropdown in the viewer controls (top-right) to choose from Small, Medium, Large (default), or Extra Large fonts!") # Show transformation details with st.expander("🔧 Transformation Details"): col1, col2 = st.columns(2) with col1: st.markdown("**Rotation Matrix (U):**") st.dataframe( pd.DataFrame(selected_row['Rotation_Matrix']).round(4), use_container_width=True ) with col2: st.markdown("**Translation Vectors:**") st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]") st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]") # Download aligned structures with st.expander("💾 Download Structure Files"): st.markdown("**Download extracted and aligned structures for external visualization**") from visualization import extract_window_pdb, transform_pdb_string # Extract reference window ref_pdb = extract_window_pdb( selected_row['Ref_Path'], selected_row['Ref_Window'] ) # Extract and transform query window query_pdb = extract_window_pdb( selected_row['Query_Path'], selected_row['Query_Window'] ) query_aligned_pdb = transform_pdb_string( query_pdb, selected_row['Rotation_Matrix'], selected_row['Query_COM'], selected_row['Ref_COM'] ) col1, col2, col3 = st.columns(3) with col1: # Reference structure ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Window']]))}.pdb" st.download_button( label="📥 Reference PDB", data=ref_pdb, file_name=ref_filename, mime="chemical/x-pdb", help="Original reference structure (selected residues only)" ) with col2: # Query structure (original position) query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Window']]))}.pdb" st.download_button( label="📥 Query PDB (Original)", data=query_pdb, file_name=query_filename, mime="chemical/x-pdb", help="Original query structure (selected residues only)" ) with col3: # Query structure (aligned) query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Window']]))}.pdb" st.download_button( label="📥 Query PDB (Aligned)", data=query_aligned_pdb, file_name=query_aligned_filename, mime="chemical/x-pdb", help="Query structure aligned to reference" ) # Combined aligned structure st.markdown("---") st.markdown("**Combined Aligned Structure (Reference + Query)**") # Create combined PDB with both structures combined_pdb_lines = [] # Add header information as REMARK records combined_pdb_lines.append(f"REMARK Reference: {selected_row['Reference']}") combined_pdb_lines.append(f"REMARK Reference Residues: {','.join(map(str, [i+1 for i in selected_row['Ref_Window']]))}") combined_pdb_lines.append(f"REMARK Reference Sequence: {selected_row['Ref_Sequence']}") combined_pdb_lines.append(f"REMARK Query: {selected_row['Query']}") combined_pdb_lines.append(f"REMARK Query Residues: {','.join(map(str, [i+1 for i in selected_row['Query_Window']]))}") combined_pdb_lines.append(f"REMARK Query Sequence: {selected_row['Query_Sequence']}") combined_pdb_lines.append(f"REMARK RMSD: {selected_row['RMSD']:.3f} Angstroms") combined_pdb_lines.append("MODEL 1") # Add reference atoms with chain A for line in ref_pdb.split('\n'): if line.startswith(('ATOM', 'HETATM')): # Set chain to A for reference modified_line = line[:21] + 'A' + line[22:] combined_pdb_lines.append(modified_line) combined_pdb_lines.append("ENDMDL") combined_pdb_lines.append("MODEL 2") # Add aligned query atoms with chain B for line in query_aligned_pdb.split('\n'): if line.startswith(('ATOM', 'HETATM')): # Set chain to B for query modified_line = line[:21] + 'B' + line[22:] combined_pdb_lines.append(modified_line) combined_pdb_lines.append("ENDMDL") combined_pdb_lines.append("END") combined_pdb = '\n'.join(combined_pdb_lines) combined_filename = f"aligned_{selected_row['Reference'].replace('.pdb', '')}_{selected_row['Query'].replace('.pdb', '')}_rmsd_{selected_row['RMSD']:.3f}.pdb" st.download_button( label="📥 Download Combined Aligned Structure", data=combined_pdb, file_name=combined_filename, mime="chemical/x-pdb", help="Reference (chain A) and aligned query (chain B) in one file", use_container_width=True ) st.info("💡 **Tip:** The combined PDB contains reference (chain A) and aligned query (chain B) - ready for PyMOL/Chimera") else: st.warning("No comparisons below RMSD threshold to visualize") # Export Results st.markdown("---") st.subheader("💾 Export Results") col1, col2 = st.columns(2) with col1: st.markdown("**Download Results Table**") if len(filtered_df) > 0: export_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy() export_df['Ref_Residues'] = export_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x])) export_df['Query_Residues'] = export_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x])) export_df = export_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']] export_df = export_df.sort_values('RMSD').reset_index(drop=True) csv = export_df.to_csv(index=False) st.download_button( label="📥 Download Results (CSV)", data=csv, file_name="rna_pairwise_comparison_results.csv", mime="text/csv" ) else: st.info("No results to export") with col2: st.markdown("**Download Aligned Structures**") if len(filtered_df) > 0 and st.button("📦 Generate PDB Archive"): with st.spinner("Creating archive..."): import zipfile from visualization_multi import extract_window_pdb, transform_pdb_string zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: for idx, row in filtered_df.iterrows(): comp_name = f"comp_{idx:03d}_rmsd_{row['RMSD']:.3f}" # Reference ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Window']) zip_file.writestr(f"{comp_name}/reference.pdb", ref_pdb) # Query original query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Window']) zip_file.writestr(f"{comp_name}/query_original.pdb", query_pdb) # Query aligned query_aligned = transform_pdb_string( query_pdb, row['Rotation_Matrix'], row['Query_COM'], row['Ref_COM'] ) zip_file.writestr(f"{comp_name}/query_aligned.pdb", query_aligned) # README readme = f"""Comparison #{idx} RMSD: {row['RMSD']:.3f} Å Atom Selection: Backbone + Sugar (default) Reference: {row['Reference']} Residues: {','.join([str(i+1) for i in row['Ref_Window']])} Sequence: {row['Ref_Sequence']} Query: {row['Query']} Residues: {','.join([str(i+1) for i in row['Query_Window']])} Sequence: {row['Query_Sequence']} """ zip_file.writestr(f"{comp_name}/README.txt", readme) zip_buffer.seek(0) st.download_button( label="📥 Download PDB Archive (ZIP)", data=zip_buffer.getvalue(), file_name="aligned_structures.zip", mime="application/zip", help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs" ) st.success(f"✅ Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.") if __name__ == "__main__": main()