jiehou commited on
Commit
4a5024a
Β·
verified Β·
1 Parent(s): a6c9f2a

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +971 -33
src/streamlit_app.py CHANGED
@@ -1,40 +1,978 @@
1
- import altair as alt
 
 
 
 
 
2
  import numpy as np
3
  import pandas as pd
4
- import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ """
2
+ RNA Motif Structure Comparison Tool
3
+ Streamlit app for comparing RNA motif structures with flexible residue selection
4
+ """
5
+
6
+ import streamlit as st
7
  import numpy as np
8
  import pandas as pd
9
+ from pathlib import Path
10
+ import io
11
+ import tempfile
12
+ import os
13
+
14
+ # Import our RMSD calculation functions
15
+ from rmsd_utils import (
16
+ parse_residue_atoms,
17
+ get_backbone_sugar_and_selectbase_coords_fixed,
18
+ calculate_COM,
19
+ calculate_rotation_rmsd,
20
+ translate_rotate_coords
21
+ )
22
+
23
+ from visualization import create_structure_visualization
24
+
25
+ # Page configuration
26
+ st.set_page_config(
27
+ page_title="RNA Motif Structure Comparison",
28
+ page_icon="🧬",
29
+ layout="wide",
30
+ initial_sidebar_state="expanded"
31
+ )
32
+
33
+ # Custom CSS
34
+ st.markdown("""
35
+ <style>
36
+ .main-header {
37
+ font-size: 2.5rem;
38
+ font-weight: bold;
39
+ color: #1f77b4;
40
+ margin-bottom: 1rem;
41
+ }
42
+ .sub-header {
43
+ font-size: 1.2rem;
44
+ color: #666;
45
+ margin-bottom: 2rem;
46
+ }
47
+ .metric-box {
48
+ background-color: #f0f2f6;
49
+ padding: 1rem;
50
+ border-radius: 0.5rem;
51
+ margin: 0.5rem 0;
52
+ }
53
+ </style>
54
+ """, unsafe_allow_html=True)
55
+
56
+
57
+ def save_uploaded_file(uploaded_file, directory):
58
+ """Save an uploaded file to a temporary directory"""
59
+ file_path = os.path.join(directory, uploaded_file.name)
60
+ with open(file_path, "wb") as f:
61
+ f.write(uploaded_file.getbuffer())
62
+ return file_path
63
+
64
+
65
+ def get_structure_info(pdb_path):
66
+ """
67
+ Get information about a structure's residues.
68
+
69
+ Args:
70
+ pdb_path: Path to PDB file
71
+
72
+ Returns:
73
+ List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
74
+ """
75
+ residues = parse_residue_atoms(pdb_path)
76
+
77
+ structure_info = []
78
+ for idx, res in enumerate(residues):
79
+ structure_info.append({
80
+ 'index': idx,
81
+ 'resnum': res['resnum'],
82
+ 'resname': res['resname'],
83
+ 'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})"
84
+ })
85
+
86
+ return structure_info
87
 
 
 
88
 
89
+ def display_structure_selector(files, temp_dir, set_name):
90
+ """
91
+ Display structure information and allow users to select residues.
92
+
93
+ Args:
94
+ files: List of uploaded files
95
+ temp_dir: Temporary directory containing files
96
+ set_name: Name of the set (e.g., "Reference" or "Query")
97
+
98
+ Returns:
99
+ Dict mapping filename to list of selected residue indices
100
+ """
101
+ if not files:
102
+ return {}
103
+
104
+ st.subheader(f"πŸ“‹ {set_name} Structure Preview & Selection")
105
+
106
+ selections = {}
107
+
108
+ for file in files:
109
+ file_path = os.path.join(temp_dir, file.name)
110
+ structure_info = get_structure_info(file_path)
111
+
112
+ with st.expander(f"πŸ” {file.name} ({len(structure_info)} residues)"):
113
+ # Display residue table
114
+ info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
115
+ info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
116
+ info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
117
+ info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
118
+
119
+ st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
120
+
121
+ # Selection method
122
+ selection_method = st.radio(
123
+ f"Selection method for {file.name}",
124
+ ["Select by range", "Select specific residues", "Use all residues"],
125
+ key=f"method_{set_name}_{file.name}",
126
+ horizontal=True
127
+ )
128
+
129
+ selected_indices = []
130
+
131
+ if selection_method == "Select by range":
132
+ col1, col2 = st.columns(2)
133
+ with col1:
134
+ start_idx = st.number_input(
135
+ "Start index (1-based)",
136
+ min_value=1,
137
+ max_value=len(structure_info),
138
+ value=1,
139
+ key=f"start_{set_name}_{file.name}"
140
+ )
141
+ with col2:
142
+ end_idx = st.number_input(
143
+ "End index (1-based, inclusive)",
144
+ min_value=1,
145
+ max_value=len(structure_info),
146
+ value=min(4, len(structure_info)),
147
+ key=f"end_{set_name}_{file.name}"
148
+ )
149
+
150
+ if start_idx <= end_idx:
151
+ selected_indices = list(range(start_idx - 1, end_idx))
152
+ st.info(f"βœ“ Selected residues: {[i+1 for i in selected_indices]}")
153
+ else:
154
+ st.error("Start index must be ≀ end index")
155
+
156
+ elif selection_method == "Select specific residues":
157
+ # Multi-select for specific residues
158
+ selected_names = st.multiselect(
159
+ "Select residues",
160
+ options=[info['full_name'] for info in structure_info],
161
+ default=[structure_info[i]['full_name'] for i in range(min(4, len(structure_info)))],
162
+ key=f"specific_{set_name}_{file.name}"
163
+ )
164
+
165
+ # Map back to indices
166
+ name_to_idx = {info['full_name']: info['index'] for info in structure_info}
167
+ selected_indices = [name_to_idx[name] for name in selected_names]
168
+ selected_indices.sort()
169
+
170
+ if selected_indices:
171
+ st.info(f"βœ“ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
172
+
173
+ else: # Use all residues
174
+ selected_indices = list(range(len(structure_info)))
175
+ st.info(f"βœ“ Using all {len(selected_indices)} residues")
176
+
177
+ # Show selected residues details
178
+ if selected_indices:
179
+ selected_df = info_df[info_df['Index (0-based)'].isin(selected_indices)]
180
+ st.markdown("**Selected residues:**")
181
+ st.dataframe(selected_df, use_container_width=True)
182
+
183
+ selections[file.name] = selected_indices
184
+
185
+ return selections
186
 
187
+
188
+ def save_uploaded_file(uploaded_file, directory):
189
+ """Save an uploaded file to a temporary directory"""
190
+ file_path = os.path.join(directory, uploaded_file.name)
191
+ with open(file_path, "wb") as f:
192
+ f.write(uploaded_file.getbuffer())
193
+ return file_path
194
+
195
+
196
+ def extract_window_coords(residues, window_indices):
197
+ """
198
+ Extract coordinates for a specific window of residues.
199
+
200
+ Args:
201
+ residues: List of all residues
202
+ window_indices: List of indices to extract
203
+
204
+ Returns:
205
+ numpy array of coordinates
206
+ """
207
+ from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
208
+
209
+ all_coords = []
210
+ for idx in window_indices:
211
+ if idx < len(residues):
212
+ residue = residues[idx]
213
+ # Get backbone and sugar coordinates
214
+ backbone_coords = get_backbone_sugar_coords_from_residue(residue)
215
+ all_coords.extend(backbone_coords)
216
+ # Get base coordinates
217
+ base_coords = get_base_coords_from_residue(residue)
218
+ all_coords.extend(base_coords)
219
+
220
+ return np.asarray(all_coords)
221
+
222
+
223
+ def compare_structures_with_selection(reference_files, query_files, ref_selections, query_selections, temp_dir):
224
+ """
225
+ Compare reference and query structures using user-selected residues (direct comparison).
226
+ Only compares structures with matching selection sizes.
227
+
228
+ Args:
229
+ reference_files: List of reference motif files
230
+ query_files: List of query motif files
231
+ ref_selections: Dict mapping filename to selected residue indices
232
+ query_selections: Dict mapping filename to selected residue indices
233
+ temp_dir: Temporary directory containing files
234
+
235
+ Returns:
236
+ DataFrame with comparison results
237
+ """
238
+ results = []
239
+
240
+ # Count valid comparisons
241
+ total_comparisons = 0
242
+ for ref_file in reference_files:
243
+ ref_indices = ref_selections.get(ref_file.name, [])
244
+ if len(ref_indices) < 2:
245
+ continue
246
+ for query_file in query_files:
247
+ query_indices = query_selections.get(query_file.name, [])
248
+ if len(query_indices) < 2:
249
+ continue
250
+ # Only compare if they have the same number of selected residues
251
+ if len(ref_indices) == len(query_indices):
252
+ total_comparisons += 1
253
+
254
+ if total_comparisons == 0:
255
+ st.error("No valid comparisons found. Ensure selected regions have matching sizes.")
256
+ return pd.DataFrame()
257
+
258
+ progress_bar = st.progress(0)
259
+ status_text = st.empty()
260
+
261
+ comparison_count = 0
262
+
263
+ for ref_file in reference_files:
264
+ ref_name = ref_file.name
265
+ ref_path = os.path.join(temp_dir, ref_name)
266
+ ref_indices = ref_selections.get(ref_name, [])
267
+
268
+ if len(ref_indices) < 2:
269
+ continue
270
+
271
+ # Parse reference motif
272
+ ref_residues = parse_residue_atoms(ref_path)
273
+
274
+ # Extract coordinates for selected residues
275
+ ref_coords = extract_window_coords(ref_residues, ref_indices)
276
+ ref_com = calculate_COM(ref_coords)
277
+
278
+ # Get residue description
279
+ ref_residue_desc = f"[{','.join([str(i+1) for i in ref_indices])}]"
280
+ ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_indices if i < len(ref_residues)])
281
+
282
+ for query_file in query_files:
283
+ query_name = query_file.name
284
+ query_path = os.path.join(temp_dir, query_name)
285
+ query_indices = query_selections.get(query_name, [])
286
+
287
+ if len(query_indices) < 2:
288
+ continue
289
+
290
+ # Only compare if same number of residues
291
+ if len(ref_indices) != len(query_indices):
292
+ continue
293
+
294
+ # Parse query motif
295
+ query_residues = parse_residue_atoms(query_path)
296
+
297
+ # Extract coordinates for selected residues
298
+ query_coords = extract_window_coords(query_residues, query_indices)
299
+ query_com = calculate_COM(query_coords)
300
+
301
+ # Get residue description
302
+ query_residue_desc = f"[{','.join([str(i+1) for i in query_indices])}]"
303
+ query_sequence = ''.join([query_residues[i]['resname'] for i in query_indices if i < len(query_residues)])
304
+
305
+ # Calculate RMSD
306
+ U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
307
+
308
+ if U is None or RMSD is None:
309
+ RMSD = 999.0
310
+ U = np.eye(3)
311
+
312
+ # Store results
313
+ results.append({
314
+ 'Reference': ref_name,
315
+ 'Ref_Residues': ref_residue_desc,
316
+ 'Ref_Sequence': ref_sequence,
317
+ 'Ref_Indices': ref_indices,
318
+ 'Query': query_name,
319
+ 'Query_Residues': query_residue_desc,
320
+ 'Query_Sequence': query_sequence,
321
+ 'Query_Indices': query_indices,
322
+ 'Num_Residues': len(ref_indices),
323
+ 'RMSD': RMSD,
324
+ 'Rotation_Matrix': U,
325
+ 'Ref_COM': ref_com,
326
+ 'Query_COM': query_com,
327
+ 'Ref_Path': ref_path,
328
+ 'Query_Path': query_path
329
+ })
330
+
331
+ comparison_count += 1
332
+ progress = comparison_count / total_comparisons
333
+ progress_bar.progress(progress)
334
+ status_text.text(f"Processing: {ref_name}{ref_residue_desc} vs {query_name}{query_residue_desc}")
335
+
336
+ progress_bar.empty()
337
+ status_text.empty()
338
+
339
+ return pd.DataFrame(results)
340
+
341
+
342
+ def compare_structures_with_windows(reference_files, query_files, ref_selections, query_selections,
343
+ window_size, window_type, temp_dir):
344
+ """
345
+ Compare reference and query structures using sliding windows on selected residues.
346
+ Allows comparison of different-sized selections.
347
+
348
+ Args:
349
+ reference_files: List of reference motif files
350
+ query_files: List of query motif files
351
+ ref_selections: Dict mapping filename to selected residue indices
352
+ query_selections: Dict mapping filename to selected residue indices
353
+ window_size: Size of comparison window
354
+ window_type: "contiguous" or "non-contiguous"
355
+ temp_dir: Temporary directory containing files
356
+
357
+ Returns:
358
+ DataFrame with comparison results
359
+ """
360
+ from itertools import combinations
361
+
362
+ results = []
363
+
364
+ def generate_windows_from_selection(selected_indices, win_size, win_type):
365
+ """Generate windows from selected indices"""
366
+ if len(selected_indices) < win_size:
367
+ return []
368
+
369
+ if win_type == "contiguous":
370
+ windows = []
371
+ for i in range(len(selected_indices) - win_size + 1):
372
+ windows.append(selected_indices[i:i + win_size])
373
+ return windows
374
+ else: # non-contiguous
375
+ return [list(combo) for combo in combinations(selected_indices, win_size)]
376
+
377
+ # Count total comparisons
378
+ total_comparisons = 0
379
+ for ref_file in reference_files:
380
+ ref_indices = ref_selections.get(ref_file.name, [])
381
+ ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
382
+ if not ref_windows:
383
+ continue
384
+
385
+ for query_file in query_files:
386
+ query_indices = query_selections.get(query_file.name, [])
387
+ query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
388
+ if not query_windows:
389
+ continue
390
+ total_comparisons += len(ref_windows) * len(query_windows)
391
+
392
+ if total_comparisons == 0:
393
+ st.error(f"No valid comparisons found. Ensure selected regions have at least {window_size} residues.")
394
+ return pd.DataFrame()
395
+
396
+ progress_bar = st.progress(0)
397
+ status_text = st.empty()
398
+ comparison_count = 0
399
+
400
+ for ref_file in reference_files:
401
+ ref_name = ref_file.name
402
+ ref_path = os.path.join(temp_dir, ref_name)
403
+ ref_indices = ref_selections.get(ref_name, [])
404
+
405
+ # Generate windows from selected residues
406
+ ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
407
+
408
+ if not ref_windows:
409
+ st.warning(f"Skipping {ref_name}: selected {len(ref_indices)} residues, need at least {window_size}")
410
+ continue
411
+
412
+ # Parse reference motif
413
+ ref_residues = parse_residue_atoms(ref_path)
414
+
415
+ for ref_window in ref_windows:
416
+ # Extract coordinates for this window
417
+ ref_coords = extract_window_coords(ref_residues, ref_window)
418
+ ref_com = calculate_COM(ref_coords)
419
+
420
+ # Get descriptions
421
+ ref_window_desc = f"[{','.join([str(i+1) for i in ref_window])}]"
422
+ ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_window if i < len(ref_residues)])
423
+
424
+ for query_file in query_files:
425
+ query_name = query_file.name
426
+ query_path = os.path.join(temp_dir, query_name)
427
+ query_indices = query_selections.get(query_name, [])
428
+
429
+ # Generate windows from selected residues
430
+ query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
431
+
432
+ if not query_windows:
433
+ continue
434
+
435
+ # Parse query motif
436
+ query_residues = parse_residue_atoms(query_path)
437
+
438
+ for query_window in query_windows:
439
+ # Extract coordinates for this window
440
+ query_coords = extract_window_coords(query_residues, query_window)
441
+ query_com = calculate_COM(query_coords)
442
+
443
+ # Get descriptions
444
+ query_window_desc = f"[{','.join([str(i+1) for i in query_window])}]"
445
+ query_sequence = ''.join([query_residues[i]['resname'] for i in query_window if i < len(query_residues)])
446
+
447
+ # Calculate RMSD
448
+ U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
449
+
450
+ if U is None or RMSD is None:
451
+ RMSD = 999.0
452
+ U = np.eye(3)
453
+
454
+ # Store results
455
+ results.append({
456
+ 'Reference': ref_name,
457
+ 'Ref_Residues': ref_window_desc,
458
+ 'Ref_Sequence': ref_sequence,
459
+ 'Ref_Indices': ref_window,
460
+ 'Query': query_name,
461
+ 'Query_Residues': query_window_desc,
462
+ 'Query_Sequence': query_sequence,
463
+ 'Query_Indices': query_window,
464
+ 'Num_Residues': window_size,
465
+ 'RMSD': RMSD,
466
+ 'Rotation_Matrix': U,
467
+ 'Ref_COM': ref_com,
468
+ 'Query_COM': query_com,
469
+ 'Ref_Path': ref_path,
470
+ 'Query_Path': query_path
471
+ })
472
+
473
+ comparison_count += 1
474
+ progress = comparison_count / total_comparisons
475
+ progress_bar.progress(progress)
476
+ status_text.text(f"Processing: {ref_name}{ref_window_desc} vs {query_name}{query_window_desc}")
477
+
478
+ progress_bar.empty()
479
+ status_text.empty()
480
+
481
+ return pd.DataFrame(results)
482
+
483
+
484
+ def main():
485
+ # Header
486
+ st.markdown('<p class="main-header">🧬 RNA Motif Structure Comparison</p>', unsafe_allow_html=True)
487
+ st.markdown('<p class="sub-header">Compare RNA motifs with flexible residue selection</p>', unsafe_allow_html=True)
488
+
489
+ # Sidebar
490
+ st.sidebar.header("βš™οΈ Configuration")
491
+
492
+ # File upload
493
+ st.sidebar.subheader("1️⃣ Upload Structures")
494
+ reference_files = st.sidebar.file_uploader(
495
+ "Upload Reference Motif PDB files (Set A)",
496
+ type=['pdb', 'PDB'],
497
+ accept_multiple_files=True,
498
+ key="reference",
499
+ help="Upload RNA motif structures to use as reference"
500
+ )
501
+
502
+ query_files = st.sidebar.file_uploader(
503
+ "Upload Query Motif PDB files (Set B)",
504
+ type=['pdb', 'PDB'],
505
+ accept_multiple_files=True,
506
+ key="query",
507
+ help="Upload RNA motif structures to compare against reference"
508
+ )
509
+
510
+ # Main content area
511
+ if not reference_files or not query_files:
512
+ st.info("πŸ‘ˆ Please upload reference and query motif PDB files to begin analysis")
513
+
514
+ # Show example info
515
+ with st.expander("ℹ️ About this tool"):
516
+ st.markdown("""
517
+ ### Purpose
518
+ This tool compares the 3D structures of RNA motifs with **flexible residue selection** and **multiple comparison modes**.
519
+
520
+ ### Workflow
521
+ 1. **Upload PDB files** for reference and query motifs
522
+ 2. **Preview structures** and see all residues in each file
523
+ 3. **Select residues** to include in comparison (e.g., exclude stem bases, keep only loop)
524
+ 4. **Choose comparison mode**:
525
+ - **Direct comparison**: Compare selected regions directly (must be same size)
526
+ - **Window-based comparison**: Generate windows from selections (handles different sizes)
527
+ 5. **Run analysis** using RMSD-based structural alignment
528
+
529
+ ### Comparison Modes
530
+
531
+ #### Direct Comparison (Same Size)
532
+ - Compares your exact selections
533
+ - Example: You select 4 loop residues from each structure
534
+ - Result: Direct 4-residue vs 4-residue comparison
535
+ - Best for: When all structures have same-sized regions of interest
536
+
537
+ #### Window-Based Comparison (Different Sizes)
538
+ - Generates sliding windows from your selections
539
+ - Example: You select 4 loop residues from ref, 6 loop residues from query
540
+ - Set window size to 4
541
+ - Result: Ref's 4 residues compared against all 4-residue windows from query's 6
542
+ - Best for: When structures have different-sized regions but you want to find similar sub-regions
543
+
544
+ ### Selection Methods
545
+ - **By range**: Select consecutive residues (e.g., residues 3-6 for a tetraloop)
546
+ - **Specific residues**: Pick any combination of residues (e.g., 1,3,5,7)
547
+ - **All residues**: Use the entire structure
548
+
549
+ ### Method Details
550
+ - RMSD calculated using backbone, sugar, and select base atoms
551
+ - Base atoms mapped: purines (N9,C8,C4) ↔ pyrimidines (N1,C2,C6)
552
+ - Kabsch algorithm for optimal structural alignment
553
+
554
+ ### Example Use Cases
555
+
556
+ **Case 1: Extract loops from 2+4+2 structures (Direct)**
557
+ - All structures have 8 residues (2 stem + 4 loop + 2 stem)
558
+ - Select residues 3-6 for all structures (the 4-residue loop)
559
+ - Use "Direct comparison"
560
+ - Result: Compare loop vs loop directly
561
+
562
+ **Case 2: Compare 4-mer loop vs 6-mer loop (Window-based)**
563
+ - Structure A: Select residues 3-6 (4 loop residues)
564
+ - Structure B: Select residues 2-7 (6 loop residues)
565
+ - Use "Window-based comparison" with window size = 4
566
+ - Result: Structure A compared against 3 windows from Structure B
567
+
568
+ **Case 3: Find similar regions in different structures (Window-based)**
569
+ - Reference: Select 5 residues of interest
570
+ - Query: Select 10 residues from larger region
571
+ - Use "Window-based comparison" with window size = 5
572
+ - Result: Find which 5-residue window in query best matches reference
573
+
574
+ ### Output
575
+ - RMSD values for all comparisons
576
+ - Interactive 3D visualization of aligned structures
577
+ - Rotation and translation matrices
578
+ - Sequence information for compared regions
579
+ """)
580
+
581
+ return
582
+
583
+ # Create temporary directory for file processing
584
+ temp_dir = tempfile.mkdtemp()
585
+
586
+ # Save uploaded files
587
+ for file in reference_files:
588
+ save_uploaded_file(file, temp_dir)
589
+ for file in query_files:
590
+ save_uploaded_file(file, temp_dir)
591
+
592
+ # Display file info
593
+ st.markdown("---")
594
+ col1, col2 = st.columns(2)
595
+ with col1:
596
+ st.metric("Reference Motifs", len(reference_files))
597
+ with col2:
598
+ st.metric("Query Motifs", len(query_files))
599
+
600
+ # Structure preview and selection
601
+ st.markdown("---")
602
+
603
+ # Get residue selections for reference and query sets
604
+ ref_selections = display_structure_selector(reference_files, temp_dir, "Reference")
605
+
606
+ st.markdown("---")
607
+
608
+ query_selections = display_structure_selector(query_files, temp_dir, "Query")
609
+
610
+ # Validate selections
611
+ st.markdown("---")
612
+ valid_selections = True
613
+ min_residues = 2
614
+
615
+ for filename, indices in ref_selections.items():
616
+ if len(indices) < min_residues:
617
+ st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
618
+ valid_selections = False
619
+
620
+ for filename, indices in query_selections.items():
621
+ if len(indices) < min_residues:
622
+ st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
623
+ valid_selections = False
624
+
625
+ # Check if all selections have the same number of residues
626
+ ref_lengths = set(len(indices) for indices in ref_selections.values())
627
+ query_lengths = set(len(indices) for indices in query_selections.values())
628
+ all_lengths = ref_lengths.union(query_lengths)
629
+
630
+ if len(all_lengths) > 1:
631
+ st.warning(f"⚠️ Selected regions have different sizes: {sorted(all_lengths)} residues. Only structures with matching sizes will be compared.")
632
+
633
+ # Run analysis button
634
+ st.sidebar.markdown("---")
635
+ st.sidebar.subheader("2️⃣ Comparison Method")
636
+
637
+ comparison_mode = st.sidebar.radio(
638
+ "How to compare structures?",
639
+ ["Direct comparison (same size)", "Window-based comparison (different sizes)"],
640
+ help="""
641
+ Direct: Compare selected regions directly (must have same size)
642
+ Window-based: Generate sliding windows for flexible comparison
643
+ """
644
+ )
645
+
646
+ window_size = None
647
+ window_type = None
648
+
649
+ if comparison_mode == "Window-based comparison (different sizes)":
650
+ st.sidebar.markdown("**Window Configuration**")
651
+
652
+ window_size = st.sidebar.number_input(
653
+ "Window Size",
654
+ min_value=2,
655
+ max_value=20,
656
+ value=4,
657
+ step=1,
658
+ help="Number of residues per comparison window"
659
+ )
660
+
661
+ window_type = st.sidebar.radio(
662
+ "Window Type",
663
+ ["contiguous", "non-contiguous"],
664
+ help="Contiguous: sliding windows. Non-contiguous: all combinations"
665
+ )
666
+
667
+ st.sidebar.markdown("---")
668
+ st.sidebar.subheader("3️⃣ Run Analysis")
669
+
670
+ if st.sidebar.button("πŸš€ Run Analysis", type="primary", disabled=not valid_selections):
671
+ if not valid_selections:
672
+ st.error("Please fix selection errors before running analysis")
673
+ return
674
+
675
+ with st.spinner("Analyzing structures..."):
676
+ if comparison_mode == "Direct comparison (same size)":
677
+ results_df = compare_structures_with_selection(
678
+ reference_files,
679
+ query_files,
680
+ ref_selections,
681
+ query_selections,
682
+ temp_dir
683
+ )
684
+ else: # Window-based comparison
685
+ results_df = compare_structures_with_windows(
686
+ reference_files,
687
+ query_files,
688
+ ref_selections,
689
+ query_selections,
690
+ window_size,
691
+ window_type,
692
+ temp_dir
693
+ )
694
+
695
+ # Store results in session state
696
+ st.session_state['results_df'] = results_df
697
+ st.session_state['ref_selections'] = ref_selections
698
+ st.session_state['query_selections'] = query_selections
699
+ st.session_state['comparison_mode'] = comparison_mode
700
+
701
+ if len(results_df) > 0:
702
+ st.success(f"βœ… Analysis complete! {len(results_df)} comparisons performed.")
703
+ else:
704
+ st.warning("⚠️ No comparisons could be performed. Check that structures meet comparison requirements.")
705
+
706
+ # Display results if available
707
+ if 'results_df' in st.session_state and len(st.session_state['results_df']) > 0:
708
+ results_df = st.session_state['results_df']
709
+
710
+ # Add RMSD threshold filter
711
+ st.sidebar.markdown("---")
712
+ st.sidebar.subheader("4️⃣ Filter Results")
713
+ rmsd_threshold = st.sidebar.slider(
714
+ "RMSD Threshold (Γ…)",
715
+ min_value=0.0,
716
+ max_value=5.0,
717
+ value=2.0,
718
+ step=0.1,
719
+ help="Only show results below this RMSD value"
720
+ )
721
+
722
+ # Show comparison mode
723
+ if 'comparison_mode' in st.session_state:
724
+ mode_display = "Direct" if "Direct" in st.session_state['comparison_mode'] else "Window-based"
725
+ st.sidebar.info(f"**Mode**: {mode_display}")
726
+
727
+ # Filter by threshold
728
+ filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold].copy()
729
+
730
+ # Summary statistics
731
+ st.markdown("---")
732
+ st.subheader("πŸ“Š Summary Statistics")
733
+
734
+ col1, col2, col3, col4 = st.columns(4)
735
+ with col1:
736
+ st.metric("Total Comparisons", len(results_df))
737
+ with col2:
738
+ st.metric("Below Threshold", len(filtered_df))
739
+ with col3:
740
+ st.metric("Best RMSD", f"{results_df['RMSD'].min():.3f} Γ…")
741
+ with col4:
742
+ st.metric("Mean RMSD", f"{results_df['RMSD'].mean():.3f} Γ…")
743
+
744
+ # Results table
745
+ st.markdown("---")
746
+ st.subheader("πŸ” Comparison Results")
747
+
748
+ # Prepare display dataframe
749
+ display_df = filtered_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']].copy()
750
+ display_df = display_df.sort_values('RMSD').reset_index(drop=True)
751
+ display_df['RMSD'] = display_df['RMSD'].round(3)
752
+
753
+ # Display with selection
754
+ st.dataframe(
755
+ display_df,
756
+ use_container_width=True,
757
+ height=300
758
+ )
759
+
760
+ # Structure selection for visualization
761
+ st.markdown("---")
762
+ st.subheader("πŸ”¬ 3D Structure Visualization")
763
+
764
+ if len(filtered_df) > 0:
765
+ # Select a comparison to visualize
766
+ selected_idx = st.selectbox(
767
+ "Select a comparison to visualize:",
768
+ range(len(filtered_df)),
769
+ format_func=lambda i: f"{filtered_df.iloc[i]['Reference']}{filtered_df.iloc[i]['Ref_Residues']} ({filtered_df.iloc[i]['Ref_Sequence']}) vs {filtered_df.iloc[i]['Query']}{filtered_df.iloc[i]['Query_Residues']} ({filtered_df.iloc[i]['Query_Sequence']}) | RMSD: {filtered_df.iloc[i]['RMSD']:.3f} Γ…"
770
+ )
771
+
772
+ selected_row = filtered_df.iloc[selected_idx]
773
+
774
+ # Display RMSD info
775
+ st.info(f"**RMSD: {selected_row['RMSD']:.3f} Γ…** ({selected_row['Num_Residues']} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})")
776
+
777
+ # Create visualization - wider display
778
+ col1, col2, col3 = st.columns([0.5, 4, 0.5])
779
+
780
+ with col2:
781
+ try:
782
+ viz_html = create_structure_visualization(
783
+ selected_row['Ref_Path'],
784
+ selected_row['Query_Path'],
785
+ selected_row['Ref_Indices'],
786
+ selected_row['Query_Indices'],
787
+ selected_row['Rotation_Matrix'],
788
+ selected_row['Ref_COM'],
789
+ selected_row['Query_COM'],
790
+ selected_row['RMSD']
791
+ )
792
+ st.components.v1.html(viz_html, height=700, scrolling=False)
793
+ except Exception as e:
794
+ st.error(f"Error creating visualization: {str(e)}")
795
+
796
+ # Show transformation details
797
+ with st.expander("πŸ”§ Transformation Details"):
798
+ col1, col2 = st.columns(2)
799
+
800
+ with col1:
801
+ st.markdown("**Rotation Matrix (U):**")
802
+ st.dataframe(
803
+ pd.DataFrame(selected_row['Rotation_Matrix']).round(4),
804
+ use_container_width=True
805
+ )
806
+
807
+ with col2:
808
+ st.markdown("**Translation Vectors:**")
809
+ st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]")
810
+ st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]")
811
+
812
+ # Download aligned structures
813
+ with st.expander("πŸ’Ύ Download Structure Files"):
814
+ st.markdown("**Download extracted and aligned structures for external visualization**")
815
+
816
+ from visualization import extract_window_pdb, transform_pdb_string
817
+
818
+ # Extract reference window
819
+ ref_pdb = extract_window_pdb(
820
+ selected_row['Ref_Path'],
821
+ selected_row['Ref_Indices']
822
+ )
823
+
824
+ # Extract and transform query window
825
+ query_pdb = extract_window_pdb(
826
+ selected_row['Query_Path'],
827
+ selected_row['Query_Indices']
828
+ )
829
+
830
+ query_aligned_pdb = transform_pdb_string(
831
+ query_pdb,
832
+ selected_row['Rotation_Matrix'],
833
+ selected_row['Query_COM'],
834
+ selected_row['Ref_COM']
835
+ )
836
+
837
+ col1, col2, col3 = st.columns(3)
838
+
839
+ with col1:
840
+ # Reference structure
841
+ ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Indices']]))}.pdb"
842
+ st.download_button(
843
+ label="πŸ“₯ Reference PDB",
844
+ data=ref_pdb,
845
+ file_name=ref_filename,
846
+ mime="chemical/x-pdb",
847
+ help="Original reference structure (selected residues only)"
848
+ )
849
+
850
+ with col2:
851
+ # Query structure (original position)
852
+ query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
853
+ st.download_button(
854
+ label="πŸ“₯ Query PDB (Original)",
855
+ data=query_pdb,
856
+ file_name=query_filename,
857
+ mime="chemical/x-pdb",
858
+ help="Original query structure (selected residues only)"
859
+ )
860
+
861
+ with col3:
862
+ # Query structure (aligned)
863
+ query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
864
+ st.download_button(
865
+ label="πŸ“₯ Query PDB (Aligned)",
866
+ data=query_aligned_pdb,
867
+ file_name=query_aligned_filename,
868
+ mime="chemical/x-pdb",
869
+ help="Query structure aligned to reference"
870
+ )
871
+
872
+ st.info("πŸ’‘ **Tip:** Load reference and aligned query together in PyMOL/Chimera to examine the superposition")
873
+ else:
874
+ st.warning("No comparisons below the RMSD threshold. Try increasing the threshold.")
875
+
876
+ # Download results
877
+ st.markdown("---")
878
+ st.subheader("πŸ’Ύ Export Results")
879
+
880
+ col1, col2 = st.columns(2)
881
+
882
+ with col1:
883
+ st.markdown("**Export Results Table**")
884
+ # Prepare CSV - make sure all columns exist
885
+ export_columns = ['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']
886
+ export_df = results_df[export_columns].copy()
887
+ export_df = export_df.sort_values('RMSD').reset_index(drop=True)
888
+
889
+ csv = export_df.to_csv(index=False)
890
+ st.download_button(
891
+ label="πŸ“₯ Download Results (CSV)",
892
+ data=csv,
893
+ file_name="rna_motif_comparison_results.csv",
894
+ mime="text/csv"
895
+ )
896
+
897
+ with col2:
898
+ st.markdown("**Export All Aligned Structures**")
899
+ if st.button("πŸ“¦ Generate PDB Archive", help="Create a ZIP file with all aligned structure pairs"):
900
+ with st.spinner("Generating PDB files..."):
901
+ import zipfile
902
+ import io
903
+ from visualization import extract_window_pdb, transform_pdb_string
904
+
905
+ # Create ZIP file in memory
906
+ zip_buffer = io.BytesIO()
907
+
908
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
909
+ # Process each comparison
910
+ for idx, row in filtered_df.iterrows():
911
+ # Create a directory name for this comparison
912
+ comp_name = f"comparison_{idx:03d}_rmsd_{row['RMSD']:.3f}"
913
+
914
+ # Extract reference
915
+ ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Indices'])
916
+ ref_filename = f"{comp_name}/reference_{row['Reference'].replace('.pdb', '')}.pdb"
917
+ zip_file.writestr(ref_filename, ref_pdb)
918
+
919
+ # Extract query (original)
920
+ query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Indices'])
921
+ query_filename = f"{comp_name}/query_original_{row['Query'].replace('.pdb', '')}.pdb"
922
+ zip_file.writestr(query_filename, query_pdb)
923
+
924
+ # Extract and align query
925
+ query_aligned_pdb = transform_pdb_string(
926
+ query_pdb,
927
+ row['Rotation_Matrix'],
928
+ row['Query_COM'],
929
+ row['Ref_COM']
930
+ )
931
+ query_aligned_filename = f"{comp_name}/query_aligned_{row['Query'].replace('.pdb', '')}.pdb"
932
+ zip_file.writestr(query_aligned_filename, query_aligned_pdb)
933
+
934
+ # Add a README for this comparison
935
+ readme_content = f"""Comparison #{idx}
936
+ RMSD: {row['RMSD']:.3f} Γ…
937
+ Residues Compared: {row['Num_Residues']}
938
+
939
+ Reference:
940
+ File: {row['Reference']}
941
+ Residues: {row['Ref_Residues']}
942
+ Sequence: {row['Ref_Sequence']}
943
+
944
+ Query:
945
+ File: {row['Query']}
946
+ Residues: {row['Query_Residues']}
947
+ Sequence: {row['Query_Sequence']}
948
+
949
+ Files:
950
+ - reference_*.pdb: Reference structure (selected residues)
951
+ - query_original_*.pdb: Query structure (original position)
952
+ - query_aligned_*.pdb: Query structure (aligned to reference)
953
+
954
+ To visualize in PyMOL:
955
+ load reference_*.pdb
956
+ load query_aligned_*.pdb
957
+
958
+ To visualize in Chimera:
959
+ File β†’ Open β†’ Select both reference and query_aligned PDB files
960
  """
961
+ readme_filename = f"{comp_name}/README.txt"
962
+ zip_file.writestr(readme_filename, readme_content)
963
+
964
+ zip_buffer.seek(0)
965
+
966
+ st.download_button(
967
+ label="πŸ“₯ Download PDB Archive (ZIP)",
968
+ data=zip_buffer.getvalue(),
969
+ file_name="aligned_structures.zip",
970
+ mime="application/zip",
971
+ help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs"
972
+ )
973
+
974
+ st.success(f"βœ… Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.")
975
+
976
 
977
+ if __name__ == "__main__":
978
+ main()