Spaces:

HouBioLab
/

MotifAlign

Sleeping

App Files Files Community

MotifAlign / app.py

jiehou

Update app.py

1c36d3a verified 5 months ago

raw

history blame contribute delete

55.2 kB

	"""
	RNA Motif Multi-Structure Comparison Tool - Pairwise Mode
	Streamlit app for comparing multiple RNA motif structures with separate reference and query sets
	Uses dropdown menu for residue configuration and default Backbone + Sugar atom selection
	"""

	import streamlit as st
	import numpy as np
	import pandas as pd
	from pathlib import Path
	import io
	import tempfile
	import os
	from itertools import combinations

	# Import our RMSD calculation functions
	from rmsd_utils import (
	parse_residue_atoms,
	get_backbone_sugar_and_selectbase_coords_fixed,
	calculate_COM,
	calculate_rotation_rmsd,
	translate_rotate_coords,
	get_backbone_sugar_coords_from_residue,
	get_base_coords_from_residue
	)

	# Import example data loader
	try:
	from example_data_loader import (
	get_example_pdbs,
	load_example_as_uploaded_file,
	get_example_info
	)
	EXAMPLES_AVAILABLE = True
	except ImportError:
	EXAMPLES_AVAILABLE = False
	st.warning("Example data loader not available. Please use 'Upload Files' mode.")

	# Page configuration
	st.set_page_config(
	page_title="RNA Motif Multi-Structure Comparison - Pairwise",
	page_icon="🧬",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	from image_annotator import annotate_alignment_image


	# Custom CSS - IMPROVED VERSION with larger fonts
	st.markdown("""
	<style>
	/* ========================================
	MAIN CONTENT - LARGER FONTS
	======================================== */

	/* Increase base font size for all main content */
	.main .element-container,
	.main [data-testid="stMarkdownContainer"],
	.main [data-testid="stText"],
	.main p,
	.main span,
	.main div {
	font-size: 1.15rem !important;
	}

	/* Headers in main content */
	.main h1 {
	font-size: 2.8rem !important;
	font-weight: 700 !important;
	}
	.main h2 {
	font-size: 2.0rem !important;
	font-weight: 600 !important;
	}
	.main h3 {
	font-size: 1.6rem !important;
	font-weight: 600 !important;
	}

	/* Custom header classes */
	.main-header {
	font-size: 2.8rem !important;
	font-weight: bold;
	color: #1f77b4;
	margin-bottom: 1rem;
	}
	.sub-header {
	font-size: 1.4rem !important;
	color: #666;
	margin-bottom: 2rem;
	}

	/* Info/warning/success boxes */
	.main [data-testid="stAlert"] p,
	.main [data-testid="stAlert"] {
	font-size: 1.1rem !important;
	}

	/* Dataframes and tables */
	.main [data-testid="stDataFrame"],
	.main .dataframe,
	.main table {
	font-size: 1.05rem !important;
	}
	.main .dataframe th,
	.main .dataframe td {
	font-size: 1.05rem !important;
	padding: 8px !important;
	}

	/* Metrics */
	.main [data-testid="stMetric"] {
	font-size: 1.15rem !important;
	}
	.main [data-testid="stMetricLabel"] {
	font-size: 1.1rem !important;
	}
	.main [data-testid="stMetricValue"] {
	font-size: 1.8rem !important;
	}

	/* Buttons in main content */
	.main button p,
	.main button span {
	font-size: 1.05rem !important;
	}

	/* Selectbox, radio, and other inputs in main */
	.main .stSelectbox label,
	.main .stRadio label,
	.main .stNumberInput label,
	.main .stMultiSelect label {
	font-size: 1.1rem !important;
	}

	.main .stSelectbox [data-baseweb="select"] div,
	.main .stRadio [role="radiogroup"] label,
	.main .stNumberInput input {
	font-size: 1.05rem !important;
	}

	/* Expander headers */
	.main [data-testid="stExpander"] summary {
	font-size: 1.15rem !important;
	}

	/* Code blocks */
	.main code,
	.main pre {
	font-size: 1.0rem !important;
	}

	/* ========================================
	SIDEBAR - COMPACT & NORMAL FONT
	======================================== */

	/* Ultra-compact sidebar spacing */
	section[data-testid="stSidebar"] {
	padding-top: 0.2rem !important;
	}
	section[data-testid="stSidebar"] > div {
	padding-top: 0.2rem !important;
	}

	/* Minimal margins */
	section[data-testid="stSidebar"] [data-testid="stMarkdownContainer"] {
	margin: 0rem !important;
	}

	/* Minimal header spacing */
	section[data-testid="stSidebar"] h1,
	section[data-testid="stSidebar"] h2,
	section[data-testid="stSidebar"] h3 {
	margin-top: 0.1rem !important;
	margin-bottom: 0.2rem !important;
	padding: 0rem !important;
	line-height: 1.2 !important;
	font-size: 1.0rem !important;
	}

	/* Tight widget spacing */
	section[data-testid="stSidebar"] .stSelectbox,
	section[data-testid="stSidebar"] .stNumberInput,
	section[data-testid="stSidebar"] .stRadio,
	section[data-testid="stSidebar"] .stFileUploader {
	margin-top: 0.1rem !important;
	margin-bottom: 0.2rem !important;
	}

	section[data-testid="stSidebar"] .stButton {
	margin: 0.2rem 0 !important;
	}

	section[data-testid="stSidebar"] .element-container {
	margin: 0.1rem 0 !important;
	}

	section[data-testid="stSidebar"] .stAlert {
	padding: 0.3rem 0.5rem !important;
	margin: 0.1rem 0 !important;
	}

	section[data-testid="stSidebar"] label {
	margin-bottom: 0.1rem !important;
	font-size: 0.9rem !important;
	}

	section[data-testid="stSidebar"] .stCaptionContainer {
	margin: 0.1rem 0 !important;
	}

	section[data-testid="stSidebar"] hr {
	margin: 0.2rem 0 !important;
	}

	/* Sidebar font sizes - keep normal/small */
	section[data-testid="stSidebar"] * {
	font-size: 0.9rem !important;
	}

	section[data-testid="stSidebar"] p,
	section[data-testid="stSidebar"] span,
	section[data-testid="stSidebar"] div {
	font-size: 0.9rem !important;
	}

	section[data-testid="stSidebar"] button {
	font-size: 0.9rem !important;
	}
	</style>
	""", unsafe_allow_html=True)


	def save_uploaded_file(uploaded_file, directory):
	"""Save an uploaded file to a temporary directory"""
	file_path = os.path.join(directory, uploaded_file.name)
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())
	return file_path


	def get_structure_info(pdb_path):
	"""
	Get information about a structure's residues.

	Args:
	pdb_path: Path to PDB file

	Returns:
	List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
	"""
	residues = parse_residue_atoms(pdb_path)

	structure_info = []
	for idx, res in enumerate(residues):
	structure_info.append({
	'index': idx,
	'resnum': res['resnum'],
	'resname': res['resname'],
	'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})"
	})

	return structure_info


	def load_structure_data(uploaded_files, temp_dir):
	"""Load structure data from uploaded files"""
	structure_data = []

	for uploaded_file in uploaded_files:
	file_path = save_uploaded_file(uploaded_file, temp_dir)
	residues = parse_residue_atoms(file_path)

	structure_data.append({
	'name': uploaded_file.name,
	'path': file_path,
	'residues': residues,
	'num_residues': len(residues)
	})

	return structure_data



	def extract_window_coords(residues, window_indices):
	"""
	Extract coordinates for a specific window of residues.

	Args:
	residues: List of all residues
	window_indices: List of indices to extract

	Returns:
	numpy array of coordinates
	"""
	from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue

	all_coords = []
	for idx in window_indices:
	if idx < len(residues):
	residue = residues[idx]
	# Get backbone and sugar coordinates
	backbone_coords = get_backbone_sugar_coords_from_residue(residue)
	all_coords.extend(backbone_coords)
	# Get base coordinates
	base_coords = get_base_coords_from_residue(residue)
	all_coords.extend(base_coords)

	return np.asarray(all_coords)


	def generate_windows_from_selection(selected_indices, window_size, window_type):
	"""Generate windows from selected residue indices"""
	if len(selected_indices) < window_size:
	return []

	windows = []


	if len(selected_indices) == window_size:
	windows.append(selected_indices)
	return windows

	if window_type == "contiguous":
	# Only sliding windows
	for i in range(len(selected_indices) - window_size + 1):
	windows.append(selected_indices[i:i+window_size])

	elif window_type == "non-contiguous":
	from itertools import combinations
	all_combos = list(combinations(selected_indices, window_size))

	# Get the contiguous windows (to exclude them)
	contiguous_windows = []
	for i in range(len(selected_indices) - window_size + 1):
	contiguous_windows.append(tuple(selected_indices[i:i+window_size]))

	# Filter: keep only combinations that are NOT in contiguous_windows
	for combo in all_combos:
	if combo not in contiguous_windows:
	windows.append(list(combo))
	else:
	from itertools import combinations
	all_combos = list(combinations(selected_indices, window_size))

	# Filter: keep only combinations that are NOT in contiguous_windows
	for combo in all_combos:
	windows.append(list(combo))
	return windows

	def main():
	st.markdown('<h1 class="main-header">🧬 RNA Motif Multi-Structure Comparison</h1>', unsafe_allow_html=True)
	st.markdown('<p class="sub-header">Pairwise comparison: Reference structures vs Query structures</p>', unsafe_allow_html=True)

	# Create temporary directory
	if 'temp_dir' not in st.session_state:
	st.session_state['temp_dir'] = tempfile.mkdtemp()
	temp_dir = st.session_state['temp_dir']

	# Initialize session state
	if 'data_mode' not in st.session_state:
	st.session_state['data_mode'] = 'upload'
	if 'ref_selections' not in st.session_state:
	st.session_state['ref_selections'] = {}
	if 'query_selections' not in st.session_state:
	st.session_state['query_selections'] = {}

	# Sidebar: Step 1 - Data Source Selection
	st.sidebar.title("⚙️ Configuration")
	st.sidebar.subheader("1️⃣ Data Source")

	# Check if examples are available
	if EXAMPLES_AVAILABLE:
	data_mode = st.sidebar.radio(
	"Choose data source",
	["Upload Files", "Use Example Data"],
	key="data_mode_radio",
	help="Upload your own PDB files or use provided examples"
	)
	else:
	st.sidebar.info("ℹ️ Example data not available. Using upload mode.")
	data_mode = "Upload Files"

	# Update data mode
	if data_mode == "Upload Files":
	st.session_state['data_mode'] = 'upload'
	# Reset example initialization when switching to upload mode
	if 'example_mode_initialized' in st.session_state:
	del st.session_state['example_mode_initialized']
	else:
	st.session_state['data_mode'] = 'example'

	# Step 2: File Upload/Selection - SEPARATE FOR REFERENCE AND QUERY
	st.sidebar.subheader("2️⃣ Structure Files")

	reference_files = []
	query_files = []

	if st.session_state['data_mode'] == 'upload':
	st.sidebar.markdown("Upload Reference Structures")
	ref_uploaded = st.sidebar.file_uploader(
	"Reference PDB files",
	type=['pdb'],
	accept_multiple_files=True,
	key="ref_uploader",
	help="Upload one or more reference structures (e.g., Pentaloop)"
	)

	st.sidebar.markdown("Upload Query Structures")
	query_uploaded = st.sidebar.file_uploader(
	"Query PDB files",
	type=['pdb'],
	accept_multiple_files=True,
	key="query_uploader",
	help="Upload one or more query structures (e.g., Tetraloop)"
	)

	reference_files = ref_uploaded if ref_uploaded else []
	query_files = query_uploaded if query_uploaded else []

	else: # Example data mode
	if not EXAMPLES_AVAILABLE:
	st.sidebar.error("❌ Example data loader module not found")
	reference_files = []
	query_files = []
	else:
	try:
	examples = get_example_pdbs()

	if not examples or len(examples) == 0:
	st.sidebar.error("❌ No example data available. Please add PDB files to 'data/' folder")
	st.sidebar.info("💡 Create a 'data/' folder in the same directory as the app and add .pdb files")
	reference_files = []
	query_files = []
	else:
	example_names = sorted(list(examples.keys()))

	# Auto-select examples when first switching to example mode
	if 'example_mode_initialized' not in st.session_state:
	st.session_state['example_mode_initialized'] = True
	# Auto-select first half as reference, second half as query
	mid_point = max(1, len(example_names) // 2)
	st.session_state['auto_ref_examples'] = example_names[:mid_point]
	st.session_state['auto_query_examples'] = example_names[mid_point:mid_point*2]

	st.sidebar.markdown("Select Reference Examples")
	ref_example_names = st.sidebar.multiselect(
	"Reference structures",
	options=example_names,
	default=st.session_state.get('auto_ref_examples', []),
	key="ref_examples",
	help="Select example reference structures"
	)

	if ref_example_names:
	st.sidebar.success(f"✅ {len(ref_example_names)} reference file(s) selected")

	st.sidebar.markdown("Select Query Examples")
	query_example_names = st.sidebar.multiselect(
	"Query structures",
	options=example_names,
	default=st.session_state.get('auto_query_examples', []),
	key="query_examples",
	help="Select example query structures"
	)

	if query_example_names:
	st.sidebar.success(f"✅ {len(query_example_names)} query file(s) selected")

	# Convert names to paths and load files
	try:
	reference_files = [load_example_as_uploaded_file(examples[name]) for name in ref_example_names]
	query_files = [load_example_as_uploaded_file(examples[name]) for name in query_example_names]

	except Exception as load_error:
	st.sidebar.error(f"Error loading files: {str(load_error)}")
	import traceback
	st.sidebar.code(traceback.format_exc())
	reference_files = []
	query_files = []
	except Exception as e:
	st.sidebar.error(f"❌ Error loading examples: {str(e)}")
	import traceback
	st.sidebar.code(traceback.format_exc())
	reference_files = []
	query_files = []

	# Show upload status
	if reference_files and query_files:
	st.sidebar.success(f"✅ {len(reference_files)} reference + {len(query_files)} query structures")
	elif reference_files:
	st.sidebar.info(f"ℹ️ {len(reference_files)} reference structures loaded")
	elif query_files:
	st.sidebar.info(f"ℹ️ {len(query_files)} query structures loaded")
	else:
	st.sidebar.warning("⚠️ Upload or select structures")

	# Residue trimming controls - add early so they're available when needed
	st.sidebar.markdown("---")
	st.sidebar.markdown("🔧 5'/3' Base Trimming (Reference) ")
	col1, col2 = st.sidebar.columns(2)
	with col1:
	n_term_trim_ref = st.number_input(
	"5' trim_ref",
	min_value=0,
	max_value=10,
	value=2,
	step=1,
	help="Number of bases to remove from 5' end",
	key="n_term_trim_ref"
	)
	with col2:
	c_term_trim_ref = st.number_input(
	"3' trim_ref",
	min_value=0,
	max_value=10,
	value=2,
	step=1,
	help="Number of bases to remove from 3' end",
	key="c_term_trim_ref"
	)


	# Residue trimming controls - add early so they're available when needed
	st.sidebar.markdown("---")
	st.sidebar.markdown("🔧 5'/3' Base Trimming (Query) ")
	col1, col2 = st.sidebar.columns(2)
	with col1:
	n_term_trim_query = st.number_input(
	"5' trim_query",
	min_value=0,
	max_value=10,
	value=2,
	step=1,
	help="Number of bases to remove from 5' end",
	key="n_term_trim_query"
	)
	with col2:
	c_term_trim_query = st.number_input(
	"3' trim_query",
	min_value=0,
	max_value=10,
	value=2,
	step=1,
	help="Number of bases to remove from 3' end",
	key="c_term_trim_query"
	)

	# Load structure data
	ref_structure_data = []
	query_structure_data = []

	if reference_files:
	ref_structure_data = load_structure_data(reference_files, temp_dir)

	if query_files:
	query_structure_data = load_structure_data(query_files, temp_dir)

	# Track current files to reset selections if files change
	current_ref_files = set([s['name'] for s in ref_structure_data])
	current_query_files = set([s['name'] for s in query_structure_data])

	if 'current_ref_files' not in st.session_state:
	st.session_state['current_ref_files'] = current_ref_files
	if 'current_query_files' not in st.session_state:
	st.session_state['current_query_files'] = current_query_files

	# Reset selections if files changed
	if st.session_state['current_ref_files'] != current_ref_files:
	st.session_state['current_ref_files'] = current_ref_files
	st.session_state['ref_selections'] = {}
	if 'ref_auto_initialized' in st.session_state:
	del st.session_state['ref_auto_initialized']

	if st.session_state['current_query_files'] != current_query_files:
	st.session_state['current_query_files'] = current_query_files
	st.session_state['query_selections'] = {}
	if 'query_auto_initialized' in st.session_state:
	del st.session_state['query_auto_initialized']

	# Auto-initialize selections (exclude first and last residue by default)
	if 'ref_auto_initialized' not in st.session_state and ref_structure_data:
	for struct in ref_structure_data:
	num_res = struct['num_residues']
	if num_res > n_term_trim_ref + c_term_trim_ref:
	auto_selection = list(range(n_term_trim_ref, num_res - c_term_trim_ref))
	st.session_state['ref_selections'][struct['name']] = auto_selection
	else:
	st.session_state['ref_selections'][struct['name']] = list(range(num_res))
	st.session_state['ref_auto_initialized'] = True

	if 'query_auto_initialized' not in st.session_state and query_structure_data:
	for struct in query_structure_data:
	num_res = struct['num_residues']
	if num_res > n_term_trim_query + c_term_trim_query:
	auto_selection = list(range(n_term_trim_query, num_res - c_term_trim_query))
	st.session_state['query_selections'][struct['name']] = auto_selection
	else:
	st.session_state['query_selections'][struct['name']] = list(range(num_res))
	st.session_state['query_auto_initialized'] = True

	# Step 3: Configure Atom Selections in Main Area
	st.markdown("---")
	st.subheader("🔬 Configure Atom Selections")
	st.info(f"""ℹ️ Atom Selection: Backbone + Sugar\n
	- For purines (A, G): N9, C8, C4\n
	- For pyrimidines (C, U): N1, C2, C6\n
	- For backbone and sugar atoms: "P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"\n
	""")


	# Create two columns for Reference and Query
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("### 📋 Reference Structures")
	if ref_structure_data:
	selected_ref_name = st.selectbox(
	"Select structure to configure (excluding two bases in 5' and 3' by default)",
	options=[s['name'] for s in ref_structure_data],
	key="ref_dropdown",
	help="Choose a reference structure to configure its residue selection"
	)

	selected_ref = next((s for s in ref_structure_data if s['name'] == selected_ref_name), None)

	if selected_ref:
	st.markdown(f"{selected_ref['name']} ({selected_ref['num_residues']} residues)")

	# Display residue table
	structure_info = get_structure_info(selected_ref['path'])
	info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
	info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
	info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
	info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]

	with st.expander("📋 View Residue Table", expanded=False):
	st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))

	# Selection method
	selection_method = st.radio(
	f"Selection method for {selected_ref['name']}",
	["Select by range", "Select specific residues", "Use all residues"],
	key=f"method_ref_{selected_ref['name']}",
	index=1,
	horizontal=True
	)

	selected_indices = []

	if selection_method == "Select by range":
	current_selection = st.session_state['ref_selections'].get(selected_ref['name'], [])
	default_start = current_selection[0] + n_term_trim_ref if current_selection else n_term_trim_ref
	default_end = current_selection[-1] + 1 if current_selection else max(n_term_trim_ref, len(structure_info) - c_term_trim_ref)

	c1, c2 = st.columns(2)
	with c1:
	start_idx = st.number_input(
	"Start index (1-based)",
	min_value=1,
	max_value=len(structure_info),
	value=default_start,
	key=f"start_ref_{selected_ref['name']}"
	)
	with c2:
	end_idx = st.number_input(
	"End index (1-based, inclusive)",
	min_value=1,
	max_value=len(structure_info),
	value=default_end,
	key=f"end_ref_{selected_ref['name']}"
	)

	if start_idx <= end_idx:
	selected_indices = list(range(start_idx - 1, end_idx))
	st.success(f"✓ Selected residues: {[i+1 for i in selected_indices]}")
	# Auto-save the selection
	st.session_state['ref_selections'][selected_ref['name']] = selected_indices
	else:
	st.error("Start index must be ≤ end index")

	elif selection_method == "Select specific residues":
	# Always use current trim values for default selection (updates when trim values change)
	default_names = [structure_info[i]['full_name'] for i in range(n_term_trim_ref, len(structure_info)-c_term_trim_ref)]

	selected_names = st.multiselect(
	"Select residues",
	options=[info['full_name'] for info in structure_info],
	default=default_names,
	key=f"specific_ref_{selected_ref['name']}_n{n_term_trim_ref}_c{c_term_trim_ref}"
	)


	name_to_idx = {info['full_name']: info['index'] for info in structure_info}
	selected_indices = [name_to_idx[name] for name in selected_names]
	selected_indices.sort()

	if selected_indices:
	st.success(f"✓ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
	# Auto-save the selection
	st.session_state['ref_selections'][selected_ref['name']] = selected_indices

	else: # Use all residues
	selected_indices = list(range(len(structure_info)))
	st.info(f"✓ Using all {len(selected_indices)} residues")
	# Auto-save the selection
	st.session_state['ref_selections'][selected_ref['name']] = selected_indices

	# Show current saved selection (now always up-to-date)
	if selected_ref['name'] in st.session_state['ref_selections']:
	saved_indices = st.session_state['ref_selections'][selected_ref['name']]
	st.info(f"Current saved selection: {len(saved_indices)} residues: {[i+1 for i in saved_indices]}")
	else:
	st.info("Upload reference structures to configure")

	with col2:
	st.markdown("### 📋 Query Structures")
	if query_structure_data:
	selected_query_name = st.selectbox(
	"Select structure to configure (excluding two bases in 5' and 3' by default)",
	options=[s['name'] for s in query_structure_data],
	key="query_dropdown",
	help="Choose a query structure to configure its residue selection"
	)

	selected_query = next((s for s in query_structure_data if s['name'] == selected_query_name), None)

	if selected_query:
	st.markdown(f"{selected_query['name']} ({selected_query['num_residues']} residues)")

	# Display residue table
	structure_info = get_structure_info(selected_query['path'])
	info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
	info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
	info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
	info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]

	with st.expander("📋 View Residue Table", expanded=False):
	st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))

	# Selection method
	selection_method = st.radio(
	f"Selection method for {selected_query['name']}",
	["Select by range", "Select specific residues", "Use all residues"],
	key=f"method_query_{selected_query['name']}",
	index=1,
	horizontal=True
	)

	selected_indices = []

	if selection_method == "Select by range":
	current_selection = st.session_state['query_selections'].get(selected_query['name'], [])
	default_start = current_selection[0] + n_term_trim_query if current_selection else 3
	default_end = current_selection[-1] + 1 if current_selection else max(2, len(structure_info) - c_term_trim_query)

	c1, c2 = st.columns(2)
	with c1:
	start_idx = st.number_input(
	"Start index (1-based)",
	min_value=1,
	max_value=len(structure_info),
	value=default_start,
	key=f"start_query_{selected_query['name']}"
	)
	with c2:
	end_idx = st.number_input(
	"End index (1-based, inclusive)",
	min_value=1,
	max_value=len(structure_info),
	value=default_end,
	key=f"end_query_{selected_query['name']}"
	)

	if start_idx <= end_idx:
	selected_indices = list(range(start_idx - 1, end_idx))
	st.success(f"✓ Selected residues: {[i+1 for i in selected_indices]}")
	# Auto-save the selection
	st.session_state['query_selections'][selected_query['name']] = selected_indices
	else:
	st.error("Start index must be ≤ end index")

	elif selection_method == "Select specific residues":
	# Always use current trim values for default selection (updates when trim values change)
	default_names = [structure_info[i]['full_name'] for i in range(n_term_trim_query, len(structure_info)-c_term_trim_query)]

	selected_names = st.multiselect(
	"Select residues",
	options=[info['full_name'] for info in structure_info],
	default=default_names,
	key=f"specific_query_{selected_query['name']}_n{n_term_trim_query}_c{c_term_trim_query}"
	)

	name_to_idx = {info['full_name']: info['index'] for info in structure_info}
	selected_indices = [name_to_idx[name] for name in selected_names]
	selected_indices.sort()

	if selected_indices:
	st.success(f"✓ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
	# Auto-save the selection
	st.session_state['query_selections'][selected_query['name']] = selected_indices

	else: # Use all residues
	selected_indices = list(range(len(structure_info)))
	st.info(f"✓ Using all {len(selected_indices)} residues")
	# Auto-save the selection
	st.session_state['query_selections'][selected_query['name']] = selected_indices

	# Show current saved selection (now always up-to-date)
	if selected_query['name'] in st.session_state['query_selections']:
	saved_indices = st.session_state['query_selections'][selected_query['name']]
	st.info(f"Current saved selection: {len(saved_indices)} residues: {[i+1 for i in saved_indices]}")
	else:
	st.info("Upload query structures to configure")

	# Step 4: Window Configuration
	st.sidebar.subheader("3️⃣ Window Configuration")

	# Check if all structures have selections
	all_ref_have_selections = all(s['name'] in st.session_state['ref_selections'] for s in ref_structure_data)
	all_query_have_selections = all(s['name'] in st.session_state['query_selections'] for s in query_structure_data)

	if all_ref_have_selections and all_query_have_selections and ref_structure_data and query_structure_data:
	# Find minimum selection size
	all_selections = list(st.session_state['ref_selections'].values()) + list(st.session_state['query_selections'].values())
	min_selection_size = min(len(sel) for sel in all_selections)

	window_size = st.sidebar.number_input(
	"Window Size",
	min_value=2,
	max_value=min_selection_size,
	value=min(4, min_selection_size),
	step=1,
	help="Number of residues per comparison window"
	)

	window_type = st.sidebar.radio(
	"Window Type",
	["contiguous", "non-contiguous", "both"],
	index=0,
	help="Contiguous: sliding windows. Non-contiguous: all combinations"
	)
	else:
	st.sidebar.warning("⚠️ Configure selections first")
	window_size = 4
	window_type = "contiguous"


	# Step 5: Run Analysis
	st.sidebar.subheader("4️⃣ Run Analysis")

	can_run = (all_ref_have_selections and all_query_have_selections and
	ref_structure_data and query_structure_data)

	if st.sidebar.button("🚀 Run Pairwise Analysis", type="primary", disabled=not can_run):
	if not can_run:
	st.error("Please upload and configure both reference and query structures")
	return

	# Run comparisons
	with st.spinner("Analyzing structures..."):
	results = []

	# For each reference structure
	for ref_struct in ref_structure_data:
	ref_indices = st.session_state['ref_selections'][ref_struct['name']]
	ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)

	if not ref_windows:
	continue

	# For each reference window
	for ref_window in ref_windows:
	# Extract reference coords
	ref_coords = extract_window_coords(ref_struct['residues'], ref_window)
	ref_com = calculate_COM(ref_coords)
	ref_sequence = ''.join([ref_struct['residues'][i]['resname'] for i in ref_window])

	# Compare against all query structures
	for query_struct in query_structure_data:
	query_indices = st.session_state['query_selections'][query_struct['name']]
	query_windows = generate_windows_from_selection(query_indices, window_size, window_type)

	for query_window in query_windows:
	# Extract query coords
	query_coords = extract_window_coords(query_struct['residues'], query_window)
	query_com = calculate_COM(query_coords)
	query_sequence = ''.join([query_struct['residues'][i]['resname'] for i in query_window])

	# Calculate RMSD
	U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)

	if U is None or RMSD is None:
	RMSD = 999.0
	U = np.eye(3)

	results.append({
	'Reference': ref_struct['name'],
	'Ref_Window': ref_window,
	'Ref_Sequence': ref_sequence,
	'Query': query_struct['name'],
	'Query_Window': query_window,
	'Query_Sequence': query_sequence,
	'RMSD': RMSD,
	'Rotation_Matrix': U,
	'Ref_COM': ref_com,
	'Query_COM': query_com,
	'Ref_Path': ref_struct['path'],
	'Query_Path': query_struct['path']
	})

	results_df = pd.DataFrame(results)
	st.session_state['results'] = results_df
	st.session_state['ref_structure_data'] = ref_structure_data
	st.session_state['query_structure_data'] = query_structure_data

	st.success(f"✅ Analysis complete! {len(results_df)} comparisons performed.")

	# Display results
	if 'results' in st.session_state:
	results_df = st.session_state['results']

	st.markdown("---")
	st.subheader("📊 Results Summary")

	# RMSD threshold filter
	col1, col2 = st.columns([1, 3])
	with col1:
	rmsd_threshold = st.slider(
	"RMSD Threshold (Å)",
	min_value=0.0,
	max_value=10.0,
	value=3.0,
	step=0.1
	)

	filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold]

	with col2:
	st.metric("Comparisons Below Threshold", f"{len(filtered_df)} / {len(results_df)}")

	# Best match per Reference-Query pair
	st.markdown("### 🏆 Best Match per Reference-Query Pair")

	if len(filtered_df) > 0:
	# Group by Reference and Query to find best match for each pair
	best_matches = filtered_df.loc[filtered_df.groupby(['Reference', 'Query'])['RMSD'].idxmin()]

	best_display = best_matches[['Reference', 'Query', 'Ref_Sequence', 'Query_Sequence', 'RMSD']].copy()
	best_display['RMSD'] = best_display['RMSD'].round(3)
	best_display.columns = ['Reference', 'Query', 'Ref Sequence', 'Query Sequence', 'RMSD (Å)']
	st.dataframe(best_display, use_container_width=True)
	else:
	st.warning("No matches found below threshold")

	# Full results
	with st.expander("📋 All Comparison Results"):
	if len(filtered_df) > 0:
	display_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy()

	# Format the window indices to be 1-based
	display_df['Ref_Residues'] = display_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
	display_df['Query_Residues'] = display_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))

	# Reorder columns
	display_df = display_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']]
	display_df['RMSD'] = display_df['RMSD'].round(3)
	display_df = display_df.sort_values('RMSD').reset_index(drop=True)

	# Rename columns for better display
	display_df.columns = ['Reference', 'Ref_Indices', 'Ref_Sequence', 'Query', 'Query_Indices', 'Query_Sequence', 'RMSD (Å)']

	st.dataframe(display_df, use_container_width=True, height=400)
	else:
	st.info("No results to display")

	# Visualization
	st.markdown("---")
	st.subheader("🔬 3D Structure Visualization")

	if len(filtered_df) > 0:
	st.markdown("Select a comparison to visualize:")

	# Create dropdown options
	viz_options = []
	for idx, row in filtered_df.iterrows():
	ref_res_str = ','.join([str(i+1) for i in row['Ref_Window']])
	query_res_str = ','.join([str(i+1) for i in row['Query_Window']])
	option_text = f"{row['Reference']}[{ref_res_str}] ({row['Ref_Sequence']}) vs {row['Query']}[{query_res_str}] ({row['Query_Sequence']}) \| RMSD: {row['RMSD']:.3f} Å"
	viz_options.append((idx, option_text))

	# Sort by RMSD
	viz_options.sort(key=lambda x: filtered_df.loc[x[0], 'RMSD'])

	selected_viz_idx = st.selectbox(
	"Choose comparison to visualize",
	options=[opt[0] for opt in viz_options],
	format_func=lambda idx: next(opt[1] for opt in viz_options if opt[0] == idx),
	help="All comparisons below RMSD threshold, sorted by RMSD"
	)

	# Get the selected comparison
	selected_row = filtered_df.loc[selected_viz_idx]

	# Import visualization function
	from visualization import create_structure_visualization

	# Display RMSD info
	#st.info(f"RMSD: {selected_row['RMSD']:.3f} Å ({len(selected_row['Query_Indices'])} residues) \| Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) \| Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})")

	# Create visualization - wider display
	col1, col2, col3 = st.columns([0.5, 4, 0.5])

	with col2:
	try:
	viz_html = create_structure_visualization(
	selected_row['Ref_Path'],
	selected_row['Query_Path'],
	selected_row['Ref_Window'],
	selected_row['Query_Window'],
	selected_row['Rotation_Matrix'],
	selected_row['Ref_COM'],
	selected_row['Query_COM'],
	selected_row['RMSD'],
	ref_name=selected_row['Reference'],
	query_name=selected_row['Query'],
	ref_sequence=selected_row['Ref_Sequence'],
	query_sequence=selected_row['Query_Sequence']
	)
	st.components.v1.html(viz_html, width=1400, height=750, scrolling=False)
	except Exception as e:
	st.error(f"Error creating visualization: {str(e)}")
	import traceback
	st.code(traceback.format_exc())

	# Automatic Annotation Info
	st.markdown("---")
	st.success("✅ Automatic Annotation: When you click 'Download PNG' in the 3D viewer above, the image automatically includes RMSD, structure names, and sequences!")
	st.info("💡 Customize font size: Use the 'Annotation Font Size' dropdown in the viewer controls (top-right) to choose from Small, Medium, Large (default), or Extra Large fonts!")





	# Show transformation details
	with st.expander("🔧 Transformation Details"):
	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Rotation Matrix (U):")
	st.dataframe(
	pd.DataFrame(selected_row['Rotation_Matrix']).round(4),
	use_container_width=True
	)

	with col2:
	st.markdown("Translation Vectors:")
	st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]")
	st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]")


	# Download aligned structures
	with st.expander("💾 Download Structure Files"):
	st.markdown("Download extracted and aligned structures for external visualization")

	from visualization import extract_window_pdb, transform_pdb_string

	# Extract reference window
	ref_pdb = extract_window_pdb(
	selected_row['Ref_Path'],
	selected_row['Ref_Window']
	)

	# Extract and transform query window
	query_pdb = extract_window_pdb(
	selected_row['Query_Path'],
	selected_row['Query_Window']
	)

	query_aligned_pdb = transform_pdb_string(
	query_pdb,
	selected_row['Rotation_Matrix'],
	selected_row['Query_COM'],
	selected_row['Ref_COM']
	)

	col1, col2, col3 = st.columns(3)

	with col1:
	# Reference structure
	ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Window']]))}.pdb"
	st.download_button(
	label="📥 Reference PDB",
	data=ref_pdb,
	file_name=ref_filename,
	mime="chemical/x-pdb",
	help="Original reference structure (selected residues only)"
	)

	with col2:
	# Query structure (original position)
	query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Window']]))}.pdb"
	st.download_button(
	label="📥 Query PDB (Original)",
	data=query_pdb,
	file_name=query_filename,
	mime="chemical/x-pdb",
	help="Original query structure (selected residues only)"
	)

	with col3:
	# Query structure (aligned)
	query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Window']]))}.pdb"
	st.download_button(
	label="📥 Query PDB (Aligned)",
	data=query_aligned_pdb,
	file_name=query_aligned_filename,
	mime="chemical/x-pdb",
	help="Query structure aligned to reference"
	)

	# Combined aligned structure
	st.markdown("---")
	st.markdown("Combined Aligned Structure (Reference + Query)")

	# Create combined PDB with both structures
	combined_pdb_lines = []

	# Add header information as REMARK records
	combined_pdb_lines.append(f"REMARK Reference: {selected_row['Reference']}")
	combined_pdb_lines.append(f"REMARK Reference Residues: {','.join(map(str, [i+1 for i in selected_row['Ref_Window']]))}")
	combined_pdb_lines.append(f"REMARK Reference Sequence: {selected_row['Ref_Sequence']}")
	combined_pdb_lines.append(f"REMARK Query: {selected_row['Query']}")
	combined_pdb_lines.append(f"REMARK Query Residues: {','.join(map(str, [i+1 for i in selected_row['Query_Window']]))}")
	combined_pdb_lines.append(f"REMARK Query Sequence: {selected_row['Query_Sequence']}")
	combined_pdb_lines.append(f"REMARK RMSD: {selected_row['RMSD']:.3f} Angstroms")
	combined_pdb_lines.append("MODEL 1")

	# Add reference atoms with chain A
	for line in ref_pdb.split('\n'):
	if line.startswith(('ATOM', 'HETATM')):
	# Set chain to A for reference
	modified_line = line[:21] + 'A' + line[22:]
	combined_pdb_lines.append(modified_line)

	combined_pdb_lines.append("ENDMDL")
	combined_pdb_lines.append("MODEL 2")

	# Add aligned query atoms with chain B
	for line in query_aligned_pdb.split('\n'):
	if line.startswith(('ATOM', 'HETATM')):
	# Set chain to B for query
	modified_line = line[:21] + 'B' + line[22:]
	combined_pdb_lines.append(modified_line)

	combined_pdb_lines.append("ENDMDL")
	combined_pdb_lines.append("END")

	combined_pdb = '\n'.join(combined_pdb_lines)

	combined_filename = f"aligned_{selected_row['Reference'].replace('.pdb', '')}_{selected_row['Query'].replace('.pdb', '')}_rmsd_{selected_row['RMSD']:.3f}.pdb"

	st.download_button(
	label="📥 Download Combined Aligned Structure",
	data=combined_pdb,
	file_name=combined_filename,
	mime="chemical/x-pdb",
	help="Reference (chain A) and aligned query (chain B) in one file",
	use_container_width=True
	)

	st.info("💡 Tip: The combined PDB contains reference (chain A) and aligned query (chain B) - ready for PyMOL/Chimera")

	else:
	st.warning("No comparisons below RMSD threshold to visualize")

	# Export Results
	st.markdown("---")
	st.subheader("💾 Export Results")

	col1, col2 = st.columns(2)

	with col1:
	st.markdown("Download Results Table")
	if len(filtered_df) > 0:
	export_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy()
	export_df['Ref_Residues'] = export_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
	export_df['Query_Residues'] = export_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
	export_df = export_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']]
	export_df = export_df.sort_values('RMSD').reset_index(drop=True)

	csv = export_df.to_csv(index=False)
	st.download_button(
	label="📥 Download Results (CSV)",
	data=csv,
	file_name="rna_pairwise_comparison_results.csv",
	mime="text/csv"
	)
	else:
	st.info("No results to export")

	with col2:
	st.markdown("Download Aligned Structures")
	if len(filtered_df) > 0 and st.button("📦 Generate PDB Archive"):
	with st.spinner("Creating archive..."):
	import zipfile
	from visualization_multi import extract_window_pdb, transform_pdb_string

	zip_buffer = io.BytesIO()

	with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
	for idx, row in filtered_df.iterrows():
	comp_name = f"comp_{idx:03d}_rmsd_{row['RMSD']:.3f}"

	# Reference
	ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Window'])
	zip_file.writestr(f"{comp_name}/reference.pdb", ref_pdb)

	# Query original
	query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Window'])
	zip_file.writestr(f"{comp_name}/query_original.pdb", query_pdb)

	# Query aligned
	query_aligned = transform_pdb_string(
	query_pdb,
	row['Rotation_Matrix'],
	row['Query_COM'],
	row['Ref_COM']
	)
	zip_file.writestr(f"{comp_name}/query_aligned.pdb", query_aligned)

	# README
	readme = f"""Comparison #{idx}
	RMSD: {row['RMSD']:.3f} Å
	Atom Selection: Backbone + Sugar (default)

	Reference: {row['Reference']}
	Residues: {','.join([str(i+1) for i in row['Ref_Window']])}
	Sequence: {row['Ref_Sequence']}

	Query: {row['Query']}
	Residues: {','.join([str(i+1) for i in row['Query_Window']])}
	Sequence: {row['Query_Sequence']}
	"""
	zip_file.writestr(f"{comp_name}/README.txt", readme)

	zip_buffer.seek(0)

	st.download_button(
	label="📥 Download PDB Archive (ZIP)",
	data=zip_buffer.getvalue(),
	file_name="aligned_structures.zip",
	mime="application/zip",
	help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs"
	)

	st.success(f"✅ Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.")




	if __name__ == "__main__":
	main()