jiehou commited on
Commit
a6c9f2a
·
verified ·
1 Parent(s): fc2db95

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +978 -0
  2. rmsd_utils.py +294 -0
  3. visualization.py +673 -0
app.py ADDED
@@ -0,0 +1,978 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RNA Motif Structure Comparison Tool
3
+ Streamlit app for comparing RNA motif structures with flexible residue selection
4
+ """
5
+
6
+ import streamlit as st
7
+ import numpy as np
8
+ import pandas as pd
9
+ from pathlib import Path
10
+ import io
11
+ import tempfile
12
+ import os
13
+
14
+ # Import our RMSD calculation functions
15
+ from rmsd_utils import (
16
+ parse_residue_atoms,
17
+ get_backbone_sugar_and_selectbase_coords_fixed,
18
+ calculate_COM,
19
+ calculate_rotation_rmsd,
20
+ translate_rotate_coords
21
+ )
22
+
23
+ from visualization import create_structure_visualization
24
+
25
+ # Page configuration
26
+ st.set_page_config(
27
+ page_title="RNA Motif Structure Comparison",
28
+ page_icon="🧬",
29
+ layout="wide",
30
+ initial_sidebar_state="expanded"
31
+ )
32
+
33
+ # Custom CSS
34
+ st.markdown("""
35
+ <style>
36
+ .main-header {
37
+ font-size: 2.5rem;
38
+ font-weight: bold;
39
+ color: #1f77b4;
40
+ margin-bottom: 1rem;
41
+ }
42
+ .sub-header {
43
+ font-size: 1.2rem;
44
+ color: #666;
45
+ margin-bottom: 2rem;
46
+ }
47
+ .metric-box {
48
+ background-color: #f0f2f6;
49
+ padding: 1rem;
50
+ border-radius: 0.5rem;
51
+ margin: 0.5rem 0;
52
+ }
53
+ </style>
54
+ """, unsafe_allow_html=True)
55
+
56
+
57
+ def save_uploaded_file(uploaded_file, directory):
58
+ """Save an uploaded file to a temporary directory"""
59
+ file_path = os.path.join(directory, uploaded_file.name)
60
+ with open(file_path, "wb") as f:
61
+ f.write(uploaded_file.getbuffer())
62
+ return file_path
63
+
64
+
65
+ def get_structure_info(pdb_path):
66
+ """
67
+ Get information about a structure's residues.
68
+
69
+ Args:
70
+ pdb_path: Path to PDB file
71
+
72
+ Returns:
73
+ List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
74
+ """
75
+ residues = parse_residue_atoms(pdb_path)
76
+
77
+ structure_info = []
78
+ for idx, res in enumerate(residues):
79
+ structure_info.append({
80
+ 'index': idx,
81
+ 'resnum': res['resnum'],
82
+ 'resname': res['resname'],
83
+ 'full_name': f"{idx+1}. {res['resname']} (residue #{res['resnum']})"
84
+ })
85
+
86
+ return structure_info
87
+
88
+
89
+ def display_structure_selector(files, temp_dir, set_name):
90
+ """
91
+ Display structure information and allow users to select residues.
92
+
93
+ Args:
94
+ files: List of uploaded files
95
+ temp_dir: Temporary directory containing files
96
+ set_name: Name of the set (e.g., "Reference" or "Query")
97
+
98
+ Returns:
99
+ Dict mapping filename to list of selected residue indices
100
+ """
101
+ if not files:
102
+ return {}
103
+
104
+ st.subheader(f"📋 {set_name} Structure Preview & Selection")
105
+
106
+ selections = {}
107
+
108
+ for file in files:
109
+ file_path = os.path.join(temp_dir, file.name)
110
+ structure_info = get_structure_info(file_path)
111
+
112
+ with st.expander(f"🔍 {file.name} ({len(structure_info)} residues)"):
113
+ # Display residue table
114
+ info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
115
+ info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
116
+ info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
117
+ info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
118
+
119
+ st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
120
+
121
+ # Selection method
122
+ selection_method = st.radio(
123
+ f"Selection method for {file.name}",
124
+ ["Select by range", "Select specific residues", "Use all residues"],
125
+ key=f"method_{set_name}_{file.name}",
126
+ horizontal=True
127
+ )
128
+
129
+ selected_indices = []
130
+
131
+ if selection_method == "Select by range":
132
+ col1, col2 = st.columns(2)
133
+ with col1:
134
+ start_idx = st.number_input(
135
+ "Start index (1-based)",
136
+ min_value=1,
137
+ max_value=len(structure_info),
138
+ value=1,
139
+ key=f"start_{set_name}_{file.name}"
140
+ )
141
+ with col2:
142
+ end_idx = st.number_input(
143
+ "End index (1-based, inclusive)",
144
+ min_value=1,
145
+ max_value=len(structure_info),
146
+ value=min(4, len(structure_info)),
147
+ key=f"end_{set_name}_{file.name}"
148
+ )
149
+
150
+ if start_idx <= end_idx:
151
+ selected_indices = list(range(start_idx - 1, end_idx))
152
+ st.info(f"✓ Selected residues: {[i+1 for i in selected_indices]}")
153
+ else:
154
+ st.error("Start index must be ≤ end index")
155
+
156
+ elif selection_method == "Select specific residues":
157
+ # Multi-select for specific residues
158
+ selected_names = st.multiselect(
159
+ "Select residues",
160
+ options=[info['full_name'] for info in structure_info],
161
+ default=[structure_info[i]['full_name'] for i in range(min(4, len(structure_info)))],
162
+ key=f"specific_{set_name}_{file.name}"
163
+ )
164
+
165
+ # Map back to indices
166
+ name_to_idx = {info['full_name']: info['index'] for info in structure_info}
167
+ selected_indices = [name_to_idx[name] for name in selected_names]
168
+ selected_indices.sort()
169
+
170
+ if selected_indices:
171
+ st.info(f"✓ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
172
+
173
+ else: # Use all residues
174
+ selected_indices = list(range(len(structure_info)))
175
+ st.info(f"✓ Using all {len(selected_indices)} residues")
176
+
177
+ # Show selected residues details
178
+ if selected_indices:
179
+ selected_df = info_df[info_df['Index (0-based)'].isin(selected_indices)]
180
+ st.markdown("**Selected residues:**")
181
+ st.dataframe(selected_df, use_container_width=True)
182
+
183
+ selections[file.name] = selected_indices
184
+
185
+ return selections
186
+
187
+
188
+ def save_uploaded_file(uploaded_file, directory):
189
+ """Save an uploaded file to a temporary directory"""
190
+ file_path = os.path.join(directory, uploaded_file.name)
191
+ with open(file_path, "wb") as f:
192
+ f.write(uploaded_file.getbuffer())
193
+ return file_path
194
+
195
+
196
+ def extract_window_coords(residues, window_indices):
197
+ """
198
+ Extract coordinates for a specific window of residues.
199
+
200
+ Args:
201
+ residues: List of all residues
202
+ window_indices: List of indices to extract
203
+
204
+ Returns:
205
+ numpy array of coordinates
206
+ """
207
+ from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
208
+
209
+ all_coords = []
210
+ for idx in window_indices:
211
+ if idx < len(residues):
212
+ residue = residues[idx]
213
+ # Get backbone and sugar coordinates
214
+ backbone_coords = get_backbone_sugar_coords_from_residue(residue)
215
+ all_coords.extend(backbone_coords)
216
+ # Get base coordinates
217
+ base_coords = get_base_coords_from_residue(residue)
218
+ all_coords.extend(base_coords)
219
+
220
+ return np.asarray(all_coords)
221
+
222
+
223
+ def compare_structures_with_selection(reference_files, query_files, ref_selections, query_selections, temp_dir):
224
+ """
225
+ Compare reference and query structures using user-selected residues (direct comparison).
226
+ Only compares structures with matching selection sizes.
227
+
228
+ Args:
229
+ reference_files: List of reference motif files
230
+ query_files: List of query motif files
231
+ ref_selections: Dict mapping filename to selected residue indices
232
+ query_selections: Dict mapping filename to selected residue indices
233
+ temp_dir: Temporary directory containing files
234
+
235
+ Returns:
236
+ DataFrame with comparison results
237
+ """
238
+ results = []
239
+
240
+ # Count valid comparisons
241
+ total_comparisons = 0
242
+ for ref_file in reference_files:
243
+ ref_indices = ref_selections.get(ref_file.name, [])
244
+ if len(ref_indices) < 2:
245
+ continue
246
+ for query_file in query_files:
247
+ query_indices = query_selections.get(query_file.name, [])
248
+ if len(query_indices) < 2:
249
+ continue
250
+ # Only compare if they have the same number of selected residues
251
+ if len(ref_indices) == len(query_indices):
252
+ total_comparisons += 1
253
+
254
+ if total_comparisons == 0:
255
+ st.error("No valid comparisons found. Ensure selected regions have matching sizes.")
256
+ return pd.DataFrame()
257
+
258
+ progress_bar = st.progress(0)
259
+ status_text = st.empty()
260
+
261
+ comparison_count = 0
262
+
263
+ for ref_file in reference_files:
264
+ ref_name = ref_file.name
265
+ ref_path = os.path.join(temp_dir, ref_name)
266
+ ref_indices = ref_selections.get(ref_name, [])
267
+
268
+ if len(ref_indices) < 2:
269
+ continue
270
+
271
+ # Parse reference motif
272
+ ref_residues = parse_residue_atoms(ref_path)
273
+
274
+ # Extract coordinates for selected residues
275
+ ref_coords = extract_window_coords(ref_residues, ref_indices)
276
+ ref_com = calculate_COM(ref_coords)
277
+
278
+ # Get residue description
279
+ ref_residue_desc = f"[{','.join([str(i+1) for i in ref_indices])}]"
280
+ ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_indices if i < len(ref_residues)])
281
+
282
+ for query_file in query_files:
283
+ query_name = query_file.name
284
+ query_path = os.path.join(temp_dir, query_name)
285
+ query_indices = query_selections.get(query_name, [])
286
+
287
+ if len(query_indices) < 2:
288
+ continue
289
+
290
+ # Only compare if same number of residues
291
+ if len(ref_indices) != len(query_indices):
292
+ continue
293
+
294
+ # Parse query motif
295
+ query_residues = parse_residue_atoms(query_path)
296
+
297
+ # Extract coordinates for selected residues
298
+ query_coords = extract_window_coords(query_residues, query_indices)
299
+ query_com = calculate_COM(query_coords)
300
+
301
+ # Get residue description
302
+ query_residue_desc = f"[{','.join([str(i+1) for i in query_indices])}]"
303
+ query_sequence = ''.join([query_residues[i]['resname'] for i in query_indices if i < len(query_residues)])
304
+
305
+ # Calculate RMSD
306
+ U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
307
+
308
+ if U is None or RMSD is None:
309
+ RMSD = 999.0
310
+ U = np.eye(3)
311
+
312
+ # Store results
313
+ results.append({
314
+ 'Reference': ref_name,
315
+ 'Ref_Residues': ref_residue_desc,
316
+ 'Ref_Sequence': ref_sequence,
317
+ 'Ref_Indices': ref_indices,
318
+ 'Query': query_name,
319
+ 'Query_Residues': query_residue_desc,
320
+ 'Query_Sequence': query_sequence,
321
+ 'Query_Indices': query_indices,
322
+ 'Num_Residues': len(ref_indices),
323
+ 'RMSD': RMSD,
324
+ 'Rotation_Matrix': U,
325
+ 'Ref_COM': ref_com,
326
+ 'Query_COM': query_com,
327
+ 'Ref_Path': ref_path,
328
+ 'Query_Path': query_path
329
+ })
330
+
331
+ comparison_count += 1
332
+ progress = comparison_count / total_comparisons
333
+ progress_bar.progress(progress)
334
+ status_text.text(f"Processing: {ref_name}{ref_residue_desc} vs {query_name}{query_residue_desc}")
335
+
336
+ progress_bar.empty()
337
+ status_text.empty()
338
+
339
+ return pd.DataFrame(results)
340
+
341
+
342
+ def compare_structures_with_windows(reference_files, query_files, ref_selections, query_selections,
343
+ window_size, window_type, temp_dir):
344
+ """
345
+ Compare reference and query structures using sliding windows on selected residues.
346
+ Allows comparison of different-sized selections.
347
+
348
+ Args:
349
+ reference_files: List of reference motif files
350
+ query_files: List of query motif files
351
+ ref_selections: Dict mapping filename to selected residue indices
352
+ query_selections: Dict mapping filename to selected residue indices
353
+ window_size: Size of comparison window
354
+ window_type: "contiguous" or "non-contiguous"
355
+ temp_dir: Temporary directory containing files
356
+
357
+ Returns:
358
+ DataFrame with comparison results
359
+ """
360
+ from itertools import combinations
361
+
362
+ results = []
363
+
364
+ def generate_windows_from_selection(selected_indices, win_size, win_type):
365
+ """Generate windows from selected indices"""
366
+ if len(selected_indices) < win_size:
367
+ return []
368
+
369
+ if win_type == "contiguous":
370
+ windows = []
371
+ for i in range(len(selected_indices) - win_size + 1):
372
+ windows.append(selected_indices[i:i + win_size])
373
+ return windows
374
+ else: # non-contiguous
375
+ return [list(combo) for combo in combinations(selected_indices, win_size)]
376
+
377
+ # Count total comparisons
378
+ total_comparisons = 0
379
+ for ref_file in reference_files:
380
+ ref_indices = ref_selections.get(ref_file.name, [])
381
+ ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
382
+ if not ref_windows:
383
+ continue
384
+
385
+ for query_file in query_files:
386
+ query_indices = query_selections.get(query_file.name, [])
387
+ query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
388
+ if not query_windows:
389
+ continue
390
+ total_comparisons += len(ref_windows) * len(query_windows)
391
+
392
+ if total_comparisons == 0:
393
+ st.error(f"No valid comparisons found. Ensure selected regions have at least {window_size} residues.")
394
+ return pd.DataFrame()
395
+
396
+ progress_bar = st.progress(0)
397
+ status_text = st.empty()
398
+ comparison_count = 0
399
+
400
+ for ref_file in reference_files:
401
+ ref_name = ref_file.name
402
+ ref_path = os.path.join(temp_dir, ref_name)
403
+ ref_indices = ref_selections.get(ref_name, [])
404
+
405
+ # Generate windows from selected residues
406
+ ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
407
+
408
+ if not ref_windows:
409
+ st.warning(f"Skipping {ref_name}: selected {len(ref_indices)} residues, need at least {window_size}")
410
+ continue
411
+
412
+ # Parse reference motif
413
+ ref_residues = parse_residue_atoms(ref_path)
414
+
415
+ for ref_window in ref_windows:
416
+ # Extract coordinates for this window
417
+ ref_coords = extract_window_coords(ref_residues, ref_window)
418
+ ref_com = calculate_COM(ref_coords)
419
+
420
+ # Get descriptions
421
+ ref_window_desc = f"[{','.join([str(i+1) for i in ref_window])}]"
422
+ ref_sequence = ''.join([ref_residues[i]['resname'] for i in ref_window if i < len(ref_residues)])
423
+
424
+ for query_file in query_files:
425
+ query_name = query_file.name
426
+ query_path = os.path.join(temp_dir, query_name)
427
+ query_indices = query_selections.get(query_name, [])
428
+
429
+ # Generate windows from selected residues
430
+ query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
431
+
432
+ if not query_windows:
433
+ continue
434
+
435
+ # Parse query motif
436
+ query_residues = parse_residue_atoms(query_path)
437
+
438
+ for query_window in query_windows:
439
+ # Extract coordinates for this window
440
+ query_coords = extract_window_coords(query_residues, query_window)
441
+ query_com = calculate_COM(query_coords)
442
+
443
+ # Get descriptions
444
+ query_window_desc = f"[{','.join([str(i+1) for i in query_window])}]"
445
+ query_sequence = ''.join([query_residues[i]['resname'] for i in query_window if i < len(query_residues)])
446
+
447
+ # Calculate RMSD
448
+ U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
449
+
450
+ if U is None or RMSD is None:
451
+ RMSD = 999.0
452
+ U = np.eye(3)
453
+
454
+ # Store results
455
+ results.append({
456
+ 'Reference': ref_name,
457
+ 'Ref_Residues': ref_window_desc,
458
+ 'Ref_Sequence': ref_sequence,
459
+ 'Ref_Indices': ref_window,
460
+ 'Query': query_name,
461
+ 'Query_Residues': query_window_desc,
462
+ 'Query_Sequence': query_sequence,
463
+ 'Query_Indices': query_window,
464
+ 'Num_Residues': window_size,
465
+ 'RMSD': RMSD,
466
+ 'Rotation_Matrix': U,
467
+ 'Ref_COM': ref_com,
468
+ 'Query_COM': query_com,
469
+ 'Ref_Path': ref_path,
470
+ 'Query_Path': query_path
471
+ })
472
+
473
+ comparison_count += 1
474
+ progress = comparison_count / total_comparisons
475
+ progress_bar.progress(progress)
476
+ status_text.text(f"Processing: {ref_name}{ref_window_desc} vs {query_name}{query_window_desc}")
477
+
478
+ progress_bar.empty()
479
+ status_text.empty()
480
+
481
+ return pd.DataFrame(results)
482
+
483
+
484
+ def main():
485
+ # Header
486
+ st.markdown('<p class="main-header">🧬 RNA Motif Structure Comparison</p>', unsafe_allow_html=True)
487
+ st.markdown('<p class="sub-header">Compare RNA motifs with flexible residue selection</p>', unsafe_allow_html=True)
488
+
489
+ # Sidebar
490
+ st.sidebar.header("⚙️ Configuration")
491
+
492
+ # File upload
493
+ st.sidebar.subheader("1️⃣ Upload Structures")
494
+ reference_files = st.sidebar.file_uploader(
495
+ "Upload Reference Motif PDB files (Set A)",
496
+ type=['pdb', 'PDB'],
497
+ accept_multiple_files=True,
498
+ key="reference",
499
+ help="Upload RNA motif structures to use as reference"
500
+ )
501
+
502
+ query_files = st.sidebar.file_uploader(
503
+ "Upload Query Motif PDB files (Set B)",
504
+ type=['pdb', 'PDB'],
505
+ accept_multiple_files=True,
506
+ key="query",
507
+ help="Upload RNA motif structures to compare against reference"
508
+ )
509
+
510
+ # Main content area
511
+ if not reference_files or not query_files:
512
+ st.info("👈 Please upload reference and query motif PDB files to begin analysis")
513
+
514
+ # Show example info
515
+ with st.expander("ℹ️ About this tool"):
516
+ st.markdown("""
517
+ ### Purpose
518
+ This tool compares the 3D structures of RNA motifs with **flexible residue selection** and **multiple comparison modes**.
519
+
520
+ ### Workflow
521
+ 1. **Upload PDB files** for reference and query motifs
522
+ 2. **Preview structures** and see all residues in each file
523
+ 3. **Select residues** to include in comparison (e.g., exclude stem bases, keep only loop)
524
+ 4. **Choose comparison mode**:
525
+ - **Direct comparison**: Compare selected regions directly (must be same size)
526
+ - **Window-based comparison**: Generate windows from selections (handles different sizes)
527
+ 5. **Run analysis** using RMSD-based structural alignment
528
+
529
+ ### Comparison Modes
530
+
531
+ #### Direct Comparison (Same Size)
532
+ - Compares your exact selections
533
+ - Example: You select 4 loop residues from each structure
534
+ - Result: Direct 4-residue vs 4-residue comparison
535
+ - Best for: When all structures have same-sized regions of interest
536
+
537
+ #### Window-Based Comparison (Different Sizes)
538
+ - Generates sliding windows from your selections
539
+ - Example: You select 4 loop residues from ref, 6 loop residues from query
540
+ - Set window size to 4
541
+ - Result: Ref's 4 residues compared against all 4-residue windows from query's 6
542
+ - Best for: When structures have different-sized regions but you want to find similar sub-regions
543
+
544
+ ### Selection Methods
545
+ - **By range**: Select consecutive residues (e.g., residues 3-6 for a tetraloop)
546
+ - **Specific residues**: Pick any combination of residues (e.g., 1,3,5,7)
547
+ - **All residues**: Use the entire structure
548
+
549
+ ### Method Details
550
+ - RMSD calculated using backbone, sugar, and select base atoms
551
+ - Base atoms mapped: purines (N9,C8,C4) ↔ pyrimidines (N1,C2,C6)
552
+ - Kabsch algorithm for optimal structural alignment
553
+
554
+ ### Example Use Cases
555
+
556
+ **Case 1: Extract loops from 2+4+2 structures (Direct)**
557
+ - All structures have 8 residues (2 stem + 4 loop + 2 stem)
558
+ - Select residues 3-6 for all structures (the 4-residue loop)
559
+ - Use "Direct comparison"
560
+ - Result: Compare loop vs loop directly
561
+
562
+ **Case 2: Compare 4-mer loop vs 6-mer loop (Window-based)**
563
+ - Structure A: Select residues 3-6 (4 loop residues)
564
+ - Structure B: Select residues 2-7 (6 loop residues)
565
+ - Use "Window-based comparison" with window size = 4
566
+ - Result: Structure A compared against 3 windows from Structure B
567
+
568
+ **Case 3: Find similar regions in different structures (Window-based)**
569
+ - Reference: Select 5 residues of interest
570
+ - Query: Select 10 residues from larger region
571
+ - Use "Window-based comparison" with window size = 5
572
+ - Result: Find which 5-residue window in query best matches reference
573
+
574
+ ### Output
575
+ - RMSD values for all comparisons
576
+ - Interactive 3D visualization of aligned structures
577
+ - Rotation and translation matrices
578
+ - Sequence information for compared regions
579
+ """)
580
+
581
+ return
582
+
583
+ # Create temporary directory for file processing
584
+ temp_dir = tempfile.mkdtemp()
585
+
586
+ # Save uploaded files
587
+ for file in reference_files:
588
+ save_uploaded_file(file, temp_dir)
589
+ for file in query_files:
590
+ save_uploaded_file(file, temp_dir)
591
+
592
+ # Display file info
593
+ st.markdown("---")
594
+ col1, col2 = st.columns(2)
595
+ with col1:
596
+ st.metric("Reference Motifs", len(reference_files))
597
+ with col2:
598
+ st.metric("Query Motifs", len(query_files))
599
+
600
+ # Structure preview and selection
601
+ st.markdown("---")
602
+
603
+ # Get residue selections for reference and query sets
604
+ ref_selections = display_structure_selector(reference_files, temp_dir, "Reference")
605
+
606
+ st.markdown("---")
607
+
608
+ query_selections = display_structure_selector(query_files, temp_dir, "Query")
609
+
610
+ # Validate selections
611
+ st.markdown("---")
612
+ valid_selections = True
613
+ min_residues = 2
614
+
615
+ for filename, indices in ref_selections.items():
616
+ if len(indices) < min_residues:
617
+ st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
618
+ valid_selections = False
619
+
620
+ for filename, indices in query_selections.items():
621
+ if len(indices) < min_residues:
622
+ st.error(f"❌ {filename}: Need at least {min_residues} residues selected, got {len(indices)}")
623
+ valid_selections = False
624
+
625
+ # Check if all selections have the same number of residues
626
+ ref_lengths = set(len(indices) for indices in ref_selections.values())
627
+ query_lengths = set(len(indices) for indices in query_selections.values())
628
+ all_lengths = ref_lengths.union(query_lengths)
629
+
630
+ if len(all_lengths) > 1:
631
+ st.warning(f"⚠️ Selected regions have different sizes: {sorted(all_lengths)} residues. Only structures with matching sizes will be compared.")
632
+
633
+ # Run analysis button
634
+ st.sidebar.markdown("---")
635
+ st.sidebar.subheader("2️⃣ Comparison Method")
636
+
637
+ comparison_mode = st.sidebar.radio(
638
+ "How to compare structures?",
639
+ ["Direct comparison (same size)", "Window-based comparison (different sizes)"],
640
+ help="""
641
+ Direct: Compare selected regions directly (must have same size)
642
+ Window-based: Generate sliding windows for flexible comparison
643
+ """
644
+ )
645
+
646
+ window_size = None
647
+ window_type = None
648
+
649
+ if comparison_mode == "Window-based comparison (different sizes)":
650
+ st.sidebar.markdown("**Window Configuration**")
651
+
652
+ window_size = st.sidebar.number_input(
653
+ "Window Size",
654
+ min_value=2,
655
+ max_value=20,
656
+ value=4,
657
+ step=1,
658
+ help="Number of residues per comparison window"
659
+ )
660
+
661
+ window_type = st.sidebar.radio(
662
+ "Window Type",
663
+ ["contiguous", "non-contiguous"],
664
+ help="Contiguous: sliding windows. Non-contiguous: all combinations"
665
+ )
666
+
667
+ st.sidebar.markdown("---")
668
+ st.sidebar.subheader("3️⃣ Run Analysis")
669
+
670
+ if st.sidebar.button("🚀 Run Analysis", type="primary", disabled=not valid_selections):
671
+ if not valid_selections:
672
+ st.error("Please fix selection errors before running analysis")
673
+ return
674
+
675
+ with st.spinner("Analyzing structures..."):
676
+ if comparison_mode == "Direct comparison (same size)":
677
+ results_df = compare_structures_with_selection(
678
+ reference_files,
679
+ query_files,
680
+ ref_selections,
681
+ query_selections,
682
+ temp_dir
683
+ )
684
+ else: # Window-based comparison
685
+ results_df = compare_structures_with_windows(
686
+ reference_files,
687
+ query_files,
688
+ ref_selections,
689
+ query_selections,
690
+ window_size,
691
+ window_type,
692
+ temp_dir
693
+ )
694
+
695
+ # Store results in session state
696
+ st.session_state['results_df'] = results_df
697
+ st.session_state['ref_selections'] = ref_selections
698
+ st.session_state['query_selections'] = query_selections
699
+ st.session_state['comparison_mode'] = comparison_mode
700
+
701
+ if len(results_df) > 0:
702
+ st.success(f"✅ Analysis complete! {len(results_df)} comparisons performed.")
703
+ else:
704
+ st.warning("⚠️ No comparisons could be performed. Check that structures meet comparison requirements.")
705
+
706
+ # Display results if available
707
+ if 'results_df' in st.session_state and len(st.session_state['results_df']) > 0:
708
+ results_df = st.session_state['results_df']
709
+
710
+ # Add RMSD threshold filter
711
+ st.sidebar.markdown("---")
712
+ st.sidebar.subheader("4️⃣ Filter Results")
713
+ rmsd_threshold = st.sidebar.slider(
714
+ "RMSD Threshold (Å)",
715
+ min_value=0.0,
716
+ max_value=5.0,
717
+ value=2.0,
718
+ step=0.1,
719
+ help="Only show results below this RMSD value"
720
+ )
721
+
722
+ # Show comparison mode
723
+ if 'comparison_mode' in st.session_state:
724
+ mode_display = "Direct" if "Direct" in st.session_state['comparison_mode'] else "Window-based"
725
+ st.sidebar.info(f"**Mode**: {mode_display}")
726
+
727
+ # Filter by threshold
728
+ filtered_df = results_df[results_df['RMSD'] <= rmsd_threshold].copy()
729
+
730
+ # Summary statistics
731
+ st.markdown("---")
732
+ st.subheader("📊 Summary Statistics")
733
+
734
+ col1, col2, col3, col4 = st.columns(4)
735
+ with col1:
736
+ st.metric("Total Comparisons", len(results_df))
737
+ with col2:
738
+ st.metric("Below Threshold", len(filtered_df))
739
+ with col3:
740
+ st.metric("Best RMSD", f"{results_df['RMSD'].min():.3f} Å")
741
+ with col4:
742
+ st.metric("Mean RMSD", f"{results_df['RMSD'].mean():.3f} Å")
743
+
744
+ # Results table
745
+ st.markdown("---")
746
+ st.subheader("🔍 Comparison Results")
747
+
748
+ # Prepare display dataframe
749
+ display_df = filtered_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']].copy()
750
+ display_df = display_df.sort_values('RMSD').reset_index(drop=True)
751
+ display_df['RMSD'] = display_df['RMSD'].round(3)
752
+
753
+ # Display with selection
754
+ st.dataframe(
755
+ display_df,
756
+ use_container_width=True,
757
+ height=300
758
+ )
759
+
760
+ # Structure selection for visualization
761
+ st.markdown("---")
762
+ st.subheader("🔬 3D Structure Visualization")
763
+
764
+ if len(filtered_df) > 0:
765
+ # Select a comparison to visualize
766
+ selected_idx = st.selectbox(
767
+ "Select a comparison to visualize:",
768
+ range(len(filtered_df)),
769
+ format_func=lambda i: f"{filtered_df.iloc[i]['Reference']}{filtered_df.iloc[i]['Ref_Residues']} ({filtered_df.iloc[i]['Ref_Sequence']}) vs {filtered_df.iloc[i]['Query']}{filtered_df.iloc[i]['Query_Residues']} ({filtered_df.iloc[i]['Query_Sequence']}) | RMSD: {filtered_df.iloc[i]['RMSD']:.3f} Å"
770
+ )
771
+
772
+ selected_row = filtered_df.iloc[selected_idx]
773
+
774
+ # Display RMSD info
775
+ st.info(f"**RMSD: {selected_row['RMSD']:.3f} Å** ({selected_row['Num_Residues']} residues) | Reference: {selected_row['Reference']}{selected_row['Ref_Residues']} ({selected_row['Ref_Sequence']}) | Query: {selected_row['Query']}{selected_row['Query_Residues']} ({selected_row['Query_Sequence']})")
776
+
777
+ # Create visualization - wider display
778
+ col1, col2, col3 = st.columns([0.5, 4, 0.5])
779
+
780
+ with col2:
781
+ try:
782
+ viz_html = create_structure_visualization(
783
+ selected_row['Ref_Path'],
784
+ selected_row['Query_Path'],
785
+ selected_row['Ref_Indices'],
786
+ selected_row['Query_Indices'],
787
+ selected_row['Rotation_Matrix'],
788
+ selected_row['Ref_COM'],
789
+ selected_row['Query_COM'],
790
+ selected_row['RMSD']
791
+ )
792
+ st.components.v1.html(viz_html, height=700, scrolling=False)
793
+ except Exception as e:
794
+ st.error(f"Error creating visualization: {str(e)}")
795
+
796
+ # Show transformation details
797
+ with st.expander("🔧 Transformation Details"):
798
+ col1, col2 = st.columns(2)
799
+
800
+ with col1:
801
+ st.markdown("**Rotation Matrix (U):**")
802
+ st.dataframe(
803
+ pd.DataFrame(selected_row['Rotation_Matrix']).round(4),
804
+ use_container_width=True
805
+ )
806
+
807
+ with col2:
808
+ st.markdown("**Translation Vectors:**")
809
+ st.write(f"Reference COM: [{selected_row['Ref_COM'][0]:.3f}, {selected_row['Ref_COM'][1]:.3f}, {selected_row['Ref_COM'][2]:.3f}]")
810
+ st.write(f"Query COM: [{selected_row['Query_COM'][0]:.3f}, {selected_row['Query_COM'][1]:.3f}, {selected_row['Query_COM'][2]:.3f}]")
811
+
812
+ # Download aligned structures
813
+ with st.expander("💾 Download Structure Files"):
814
+ st.markdown("**Download extracted and aligned structures for external visualization**")
815
+
816
+ from visualization import extract_window_pdb, transform_pdb_string
817
+
818
+ # Extract reference window
819
+ ref_pdb = extract_window_pdb(
820
+ selected_row['Ref_Path'],
821
+ selected_row['Ref_Indices']
822
+ )
823
+
824
+ # Extract and transform query window
825
+ query_pdb = extract_window_pdb(
826
+ selected_row['Query_Path'],
827
+ selected_row['Query_Indices']
828
+ )
829
+
830
+ query_aligned_pdb = transform_pdb_string(
831
+ query_pdb,
832
+ selected_row['Rotation_Matrix'],
833
+ selected_row['Query_COM'],
834
+ selected_row['Ref_COM']
835
+ )
836
+
837
+ col1, col2, col3 = st.columns(3)
838
+
839
+ with col1:
840
+ # Reference structure
841
+ ref_filename = f"ref_{selected_row['Reference'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Ref_Indices']]))}.pdb"
842
+ st.download_button(
843
+ label="📥 Reference PDB",
844
+ data=ref_pdb,
845
+ file_name=ref_filename,
846
+ mime="chemical/x-pdb",
847
+ help="Original reference structure (selected residues only)"
848
+ )
849
+
850
+ with col2:
851
+ # Query structure (original position)
852
+ query_filename = f"query_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
853
+ st.download_button(
854
+ label="📥 Query PDB (Original)",
855
+ data=query_pdb,
856
+ file_name=query_filename,
857
+ mime="chemical/x-pdb",
858
+ help="Original query structure (selected residues only)"
859
+ )
860
+
861
+ with col3:
862
+ # Query structure (aligned)
863
+ query_aligned_filename = f"query_aligned_{selected_row['Query'].replace('.pdb', '')}_{'_'.join(map(str, [i+1 for i in selected_row['Query_Indices']]))}.pdb"
864
+ st.download_button(
865
+ label="📥 Query PDB (Aligned)",
866
+ data=query_aligned_pdb,
867
+ file_name=query_aligned_filename,
868
+ mime="chemical/x-pdb",
869
+ help="Query structure aligned to reference"
870
+ )
871
+
872
+ st.info("💡 **Tip:** Load reference and aligned query together in PyMOL/Chimera to examine the superposition")
873
+ else:
874
+ st.warning("No comparisons below the RMSD threshold. Try increasing the threshold.")
875
+
876
+ # Download results
877
+ st.markdown("---")
878
+ st.subheader("💾 Export Results")
879
+
880
+ col1, col2 = st.columns(2)
881
+
882
+ with col1:
883
+ st.markdown("**Export Results Table**")
884
+ # Prepare CSV - make sure all columns exist
885
+ export_columns = ['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'Num_Residues', 'RMSD']
886
+ export_df = results_df[export_columns].copy()
887
+ export_df = export_df.sort_values('RMSD').reset_index(drop=True)
888
+
889
+ csv = export_df.to_csv(index=False)
890
+ st.download_button(
891
+ label="📥 Download Results (CSV)",
892
+ data=csv,
893
+ file_name="rna_motif_comparison_results.csv",
894
+ mime="text/csv"
895
+ )
896
+
897
+ with col2:
898
+ st.markdown("**Export All Aligned Structures**")
899
+ if st.button("📦 Generate PDB Archive", help="Create a ZIP file with all aligned structure pairs"):
900
+ with st.spinner("Generating PDB files..."):
901
+ import zipfile
902
+ import io
903
+ from visualization import extract_window_pdb, transform_pdb_string
904
+
905
+ # Create ZIP file in memory
906
+ zip_buffer = io.BytesIO()
907
+
908
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
909
+ # Process each comparison
910
+ for idx, row in filtered_df.iterrows():
911
+ # Create a directory name for this comparison
912
+ comp_name = f"comparison_{idx:03d}_rmsd_{row['RMSD']:.3f}"
913
+
914
+ # Extract reference
915
+ ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Indices'])
916
+ ref_filename = f"{comp_name}/reference_{row['Reference'].replace('.pdb', '')}.pdb"
917
+ zip_file.writestr(ref_filename, ref_pdb)
918
+
919
+ # Extract query (original)
920
+ query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Indices'])
921
+ query_filename = f"{comp_name}/query_original_{row['Query'].replace('.pdb', '')}.pdb"
922
+ zip_file.writestr(query_filename, query_pdb)
923
+
924
+ # Extract and align query
925
+ query_aligned_pdb = transform_pdb_string(
926
+ query_pdb,
927
+ row['Rotation_Matrix'],
928
+ row['Query_COM'],
929
+ row['Ref_COM']
930
+ )
931
+ query_aligned_filename = f"{comp_name}/query_aligned_{row['Query'].replace('.pdb', '')}.pdb"
932
+ zip_file.writestr(query_aligned_filename, query_aligned_pdb)
933
+
934
+ # Add a README for this comparison
935
+ readme_content = f"""Comparison #{idx}
936
+ RMSD: {row['RMSD']:.3f} Å
937
+ Residues Compared: {row['Num_Residues']}
938
+
939
+ Reference:
940
+ File: {row['Reference']}
941
+ Residues: {row['Ref_Residues']}
942
+ Sequence: {row['Ref_Sequence']}
943
+
944
+ Query:
945
+ File: {row['Query']}
946
+ Residues: {row['Query_Residues']}
947
+ Sequence: {row['Query_Sequence']}
948
+
949
+ Files:
950
+ - reference_*.pdb: Reference structure (selected residues)
951
+ - query_original_*.pdb: Query structure (original position)
952
+ - query_aligned_*.pdb: Query structure (aligned to reference)
953
+
954
+ To visualize in PyMOL:
955
+ load reference_*.pdb
956
+ load query_aligned_*.pdb
957
+
958
+ To visualize in Chimera:
959
+ File → Open → Select both reference and query_aligned PDB files
960
+ """
961
+ readme_filename = f"{comp_name}/README.txt"
962
+ zip_file.writestr(readme_filename, readme_content)
963
+
964
+ zip_buffer.seek(0)
965
+
966
+ st.download_button(
967
+ label="📥 Download PDB Archive (ZIP)",
968
+ data=zip_buffer.getvalue(),
969
+ file_name="aligned_structures.zip",
970
+ mime="application/zip",
971
+ help=f"Contains {len(filtered_df)} comparison sets with reference, original query, and aligned query PDBs"
972
+ )
973
+
974
+ st.success(f"✅ Archive ready! Contains {len(filtered_df)} comparisons with 3 PDB files each.")
975
+
976
+
977
+ if __name__ == "__main__":
978
+ main()
rmsd_utils.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RMSD Calculation Utilities for RNA Structure Comparison
3
+ Fixed version with explicit purine-pyrimidine atom mapping
4
+ """
5
+
6
+ import numpy as np
7
+
8
+
9
+ def parse_residue_atoms(fname):
10
+ """
11
+ Parse PDB file and organize atoms by residue.
12
+
13
+ Args:
14
+ fname: Path to PDB file
15
+
16
+ Returns:
17
+ List of residues, where each residue is a dict with:
18
+ - 'resnum': residue number
19
+ - 'resname': residue name (A, C, G, U)
20
+ - 'atoms': dict of {atom_name: [x, y, z]}
21
+ """
22
+ with open(fname) as f:
23
+ content = f.readlines()
24
+
25
+ residues = {}
26
+
27
+ for line in content:
28
+ record = line[0:6].strip()
29
+ if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
30
+ atomname = line[12:16].strip()
31
+ resname = line[17:20].strip() # residue name (A, C, G, U)
32
+ resnum = int(line[22:26].strip()) # residue number
33
+
34
+ x = float(line[30:38].strip())
35
+ y = float(line[38:46].strip())
36
+ z = float(line[46:54].strip())
37
+
38
+ # Initialize residue if not seen before
39
+ if resnum not in residues:
40
+ residues[resnum] = {
41
+ 'resnum': resnum,
42
+ 'resname': resname,
43
+ 'atoms': {}
44
+ }
45
+
46
+ residues[resnum]['atoms'][atomname] = [x, y, z]
47
+
48
+ # Convert to sorted list by residue number
49
+ sorted_residues = [residues[k] for k in sorted(residues.keys())]
50
+
51
+ return sorted_residues
52
+
53
+
54
+ def get_backbone_sugar_coords_from_residue(residue):
55
+ """
56
+ Extract backbone and sugar atom coordinates from a residue dict.
57
+
58
+ Args:
59
+ residue: Dict with 'atoms' key containing atom coordinates
60
+
61
+ Returns:
62
+ List of [x, y, z] coordinates in consistent order
63
+ """
64
+ # Define the order of backbone and sugar atoms
65
+ backbone_sugar_atoms = ["P", "OP1", "OP2", "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"]
66
+
67
+ coords = []
68
+ atoms = residue['atoms']
69
+
70
+ for atom_name in backbone_sugar_atoms:
71
+ if atom_name in atoms:
72
+ coords.append(atoms[atom_name])
73
+
74
+ return coords
75
+
76
+
77
+ def get_base_coords_from_residue(residue):
78
+ """
79
+ Extract the three key base atom coordinates from a residue.
80
+
81
+ Returns list of [x, y, z] coordinates in the correct order:
82
+ - For purines (A, G): N9, C8, C4
83
+ - For pyrimidines (C, U): N1, C2, C6
84
+
85
+ These are ordered to enable proper purine-pyrimidine mapping:
86
+ N9 <-> N1, C8 <-> C2, C4 <-> C6
87
+
88
+ Args:
89
+ residue: Dict with 'resname' and 'atoms' keys
90
+
91
+ Returns:
92
+ List of [x, y, z] coordinates
93
+ """
94
+ resname = residue['resname']
95
+ atoms = residue['atoms']
96
+ coords = []
97
+
98
+ if resname in ['A', 'G']: # Purines
99
+ base_atoms = ['N9', 'C8', 'C4']
100
+ elif resname in ['C', 'U']: # Pyrimidines
101
+ base_atoms = ['N1', 'C2', 'C6']
102
+ else:
103
+ # Unknown residue type
104
+ return coords
105
+
106
+ for atom_name in base_atoms:
107
+ if atom_name in atoms:
108
+ coords.append(atoms[atom_name])
109
+
110
+ return coords
111
+
112
+
113
+ def get_backbone_sugar_and_selectbase_coords_fixed(fname):
114
+ """
115
+ Extract backbone, sugar, and select base atom coordinates.
116
+ Ensures proper ordering for purine-pyrimidine mapping.
117
+
118
+ For each residue, extracts:
119
+ 1. All backbone and sugar atoms (in consistent order)
120
+ 2. Three base atoms:
121
+ - Purines (A, G): N9, C8, C4
122
+ - Pyrimidines (C, U): N1, C2, C6
123
+
124
+ This ordering ensures that when comparing structures with different sequences,
125
+ the atoms are correctly mapped (N9<->N1, C8<->C2, C4<->C6).
126
+
127
+ Args:
128
+ fname: Path to PDB file
129
+
130
+ Returns:
131
+ Numpy array of coordinates
132
+ """
133
+ residues = parse_residue_atoms(fname)
134
+
135
+ all_coords = []
136
+
137
+ for residue in residues:
138
+ # Get backbone and sugar coordinates
139
+ backbone_coords = get_backbone_sugar_coords_from_residue(residue)
140
+ all_coords.extend(backbone_coords)
141
+
142
+ # Get base coordinates
143
+ base_coords = get_base_coords_from_residue(residue)
144
+ all_coords.extend(base_coords)
145
+
146
+ return np.asarray(all_coords)
147
+
148
+
149
+ def calculate_COM(coords):
150
+ """
151
+ Calculate center of mass (geometric center) of coordinates.
152
+
153
+ Args:
154
+ coords: Numpy array of shape (N, 3)
155
+
156
+ Returns:
157
+ Numpy array of shape (3,) representing the center of mass
158
+ """
159
+ L = coords.shape[0]
160
+ COM = np.sum(coords, axis=0) / float(L)
161
+ return COM
162
+
163
+
164
+ def calculate_rotation_rmsd(coords1, coords2, COM1, COM2):
165
+ """
166
+ Calculate rotation matrix and RMSD using Kabsch algorithm.
167
+
168
+ Args:
169
+ coords1: Coordinates of structure 1 (N, 3)
170
+ coords2: Coordinates of structure 2 (N, 3)
171
+ COM1: Center of mass of structure 1 (3,)
172
+ COM2: Center of mass of structure 2 (3,)
173
+
174
+ Returns:
175
+ U: Rotation matrix (3, 3)
176
+ RMSD: Root mean square deviation (float)
177
+ """
178
+ sel1 = coords1 - COM1
179
+ sel2 = coords2 - COM2
180
+
181
+ # Check for consistency
182
+ if len(sel1) != len(sel2):
183
+ return None, None
184
+
185
+ L = len(sel1)
186
+ assert L > 0
187
+
188
+ # Initial residual, see Kabsch.
189
+ R0 = np.sum(np.sum(sel1 * sel1, axis=0), axis=0) + np.sum(np.sum(sel2 * sel2, axis=0), axis=0)
190
+
191
+ # Calculate the components of the rotation matrix (V,W)
192
+ # S is used to calculate the error (RMSD)
193
+ V, S, W = np.linalg.svd(np.dot(sel2.T, sel1))
194
+
195
+ # Calculate if the product of the determinants is + or -
196
+ # if negative reflect the rotation matrix components prior
197
+ # determining the rotation matrix (U)
198
+ reflect = float(str(float(np.linalg.det(V) * np.linalg.det(W))))
199
+
200
+ if reflect == -1.0:
201
+ S[-1] = -S[-1]
202
+ V[:, -1] = -V[:, -1]
203
+
204
+ U = np.dot(V, W)
205
+
206
+ # Calculate the RMSD using sigma from the SVD calculation above
207
+ RMSD = R0 - (2.0 * sum(S))
208
+ RMSD = np.sqrt(abs(RMSD / L))
209
+
210
+ return U, RMSD
211
+
212
+
213
+ def translate_rotate_coords(coords, COM, U=None):
214
+ """
215
+ Translate and optionally rotate coordinates.
216
+
217
+ Args:
218
+ coords: Coordinates to transform (N, 3)
219
+ COM: Center of mass to translate by (3,)
220
+ U: Rotation matrix (3, 3), optional
221
+
222
+ Returns:
223
+ Transformed coordinates (N, 3)
224
+ """
225
+ # Translate only
226
+ if U is None:
227
+ return coords - COM
228
+
229
+ # Translate and rotate
230
+ return np.dot((coords - COM), U)
231
+
232
+
233
+ def get_all_atom_coords(fname):
234
+ """
235
+ Get all atom coordinates from a PDB file.
236
+
237
+ Args:
238
+ fname: Path to PDB file
239
+
240
+ Returns:
241
+ Numpy array of coordinates (N, 3)
242
+ """
243
+ with open(fname) as f:
244
+ content = f.readlines()
245
+
246
+ coords = []
247
+ for line in content:
248
+ record = line[0:6].strip()
249
+ if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
250
+ x = float(line[30:38].strip())
251
+ y = float(line[38:46].strip())
252
+ z = float(line[46:54].strip())
253
+ coords.append([x, y, z])
254
+
255
+ return np.asarray(coords)
256
+
257
+
258
+ def apply_transformation_to_pdb(fname, U, COM, output_fname):
259
+ """
260
+ Apply rotation and translation to a PDB file and save result.
261
+
262
+ Args:
263
+ fname: Input PDB file path
264
+ U: Rotation matrix (3, 3)
265
+ COM: Center of mass to translate from (3,)
266
+ output_fname: Output PDB file path
267
+ """
268
+ with open(fname) as f:
269
+ lines = f.readlines()
270
+
271
+ with open(output_fname, 'w') as f:
272
+ for line in lines:
273
+ record = line[0:6].strip()
274
+ if record == 'ATOM' or record == 'HETATM' or record == 'HETAT':
275
+ # Extract coordinates
276
+ x = float(line[30:38].strip())
277
+ y = float(line[38:46].strip())
278
+ z = float(line[46:54].strip())
279
+
280
+ # Transform
281
+ coord = np.array([x, y, z])
282
+ new_coord = np.dot((coord - COM), U)
283
+
284
+ # Write transformed line
285
+ new_line = (
286
+ line[:30] +
287
+ f"{new_coord[0]:8.3f}" +
288
+ f"{new_coord[1]:8.3f}" +
289
+ f"{new_coord[2]:8.3f}" +
290
+ line[54:]
291
+ )
292
+ f.write(new_line)
293
+ else:
294
+ f.write(line)
visualization.py ADDED
@@ -0,0 +1,673 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 3D Visualization Module for RNA Structure Comparison
3
+ Uses py3Dmol for interactive molecular visualization
4
+ """
5
+
6
+ import numpy as np
7
+ from rmsd_utils import (
8
+ parse_residue_atoms,
9
+ translate_rotate_coords,
10
+ calculate_COM,
11
+ get_backbone_sugar_and_selectbase_coords_fixed
12
+ )
13
+
14
+
15
+ def create_structure_visualization(ref_path, query_path, ref_window_indices, query_window_indices,
16
+ rotation_matrix, ref_com, query_com, rmsd=None):
17
+ """
18
+ Create an interactive 3D visualization of aligned structures.
19
+
20
+ Args:
21
+ ref_path: Path to reference motif PDB file
22
+ query_path: Path to query motif PDB file
23
+ ref_window_indices: List of residue indices for the reference window
24
+ query_window_indices: List of residue indices for the query window
25
+ rotation_matrix: Rotation matrix from RMSD calculation
26
+ ref_com: Center of mass of reference window
27
+ query_com: Center of mass of query window
28
+ rmsd: RMSD value (optional, for display)
29
+
30
+ Returns:
31
+ HTML string containing the py3Dmol visualization
32
+ """
33
+
34
+ # Read PDB files
35
+ with open(ref_path) as f:
36
+ ref_pdb = f.read()
37
+
38
+ with open(query_path) as f:
39
+ query_pdb_full = f.read()
40
+
41
+ # Extract only the window residues from both structures
42
+ ref_residues = parse_residue_atoms(ref_path)
43
+ query_residues = parse_residue_atoms(query_path)
44
+
45
+ ref_window_pdb = extract_window_pdb(ref_path, ref_window_indices)
46
+ query_window_pdb = extract_window_pdb(query_path, query_window_indices)
47
+
48
+ # Parse window coordinates for transformation
49
+ from rmsd_utils import get_backbone_sugar_coords_from_residue, get_base_coords_from_residue
50
+
51
+ ref_window_coords = []
52
+ for idx in ref_window_indices:
53
+ if idx < len(ref_residues):
54
+ residue = ref_residues[idx]
55
+ backbone_coords = get_backbone_sugar_coords_from_residue(residue)
56
+ ref_window_coords.extend(backbone_coords)
57
+ base_coords = get_base_coords_from_residue(residue)
58
+ ref_window_coords.extend(base_coords)
59
+ ref_window_coords = np.asarray(ref_window_coords)
60
+
61
+ query_window_coords = []
62
+ for idx in query_window_indices:
63
+ if idx < len(query_residues):
64
+ residue = query_residues[idx]
65
+ backbone_coords = get_backbone_sugar_coords_from_residue(residue)
66
+ query_window_coords.extend(backbone_coords)
67
+ base_coords = get_base_coords_from_residue(residue)
68
+ query_window_coords.extend(base_coords)
69
+ query_window_coords = np.asarray(query_window_coords)
70
+
71
+ # Transform query window to align with reference window
72
+ # Proper alignment: translate to origin, rotate, translate to reference position
73
+ # Note: We need both query_com and ref_com for proper alignment
74
+ transformed_query_pdb = transform_pdb_string(
75
+ query_window_pdb,
76
+ rotation_matrix,
77
+ query_com,
78
+ ref_com # Add reference COM for proper alignment
79
+ )
80
+
81
+ # Create py3Dmol visualization
82
+ html = f"""
83
+ <!DOCTYPE html>
84
+ <html>
85
+ <head>
86
+ <script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>
87
+ <style>
88
+ #container {{
89
+ width: 100%;
90
+ height: 700px;
91
+ position: relative;
92
+ border: 1px solid #ddd;
93
+ }}
94
+ .control-panel {{
95
+ position: absolute;
96
+ top: 10px;
97
+ right: 10px;
98
+ background: rgba(255, 255, 255, 0.95);
99
+ padding: 15px;
100
+ border-radius: 8px;
101
+ font-family: Arial, sans-serif;
102
+ font-size: 13px;
103
+ z-index: 1000;
104
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
105
+ max-width: 220px;
106
+ }}
107
+ .control-panel h4 {{
108
+ margin: 0 0 10px 0;
109
+ font-size: 14px;
110
+ color: #333;
111
+ }}
112
+ .control-section {{
113
+ margin-bottom: 12px;
114
+ padding-bottom: 12px;
115
+ border-bottom: 1px solid #eee;
116
+ }}
117
+ .control-section:last-child {{
118
+ border-bottom: none;
119
+ margin-bottom: 0;
120
+ }}
121
+ .control-section label {{
122
+ display: block;
123
+ margin: 6px 0;
124
+ cursor: pointer;
125
+ }}
126
+ .control-section input[type="checkbox"] {{
127
+ margin-right: 8px;
128
+ }}
129
+ .control-section select {{
130
+ width: 100%;
131
+ padding: 4px;
132
+ margin-top: 5px;
133
+ border: 1px solid #ccc;
134
+ border-radius: 4px;
135
+ }}
136
+ .legend {{
137
+ position: absolute;
138
+ top: 10px;
139
+ left: 10px;
140
+ background: rgba(255, 255, 255, 0.95);
141
+ padding: 15px;
142
+ border-radius: 8px;
143
+ font-family: Arial, sans-serif;
144
+ font-size: 13px;
145
+ z-index: 1000;
146
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
147
+ }}
148
+ .legend h4 {{
149
+ margin: 0 0 10px 0;
150
+ font-size: 14px;
151
+ color: #333;
152
+ }}
153
+ .legend-item {{
154
+ margin: 6px 0;
155
+ display: flex;
156
+ align-items: center;
157
+ }}
158
+ .color-box {{
159
+ width: 24px;
160
+ height: 16px;
161
+ margin-right: 10px;
162
+ border: 1px solid #333;
163
+ border-radius: 2px;
164
+ }}
165
+ .rmsd-info {{
166
+ position: absolute;
167
+ bottom: 10px;
168
+ left: 10px;
169
+ background: rgba(255, 255, 255, 0.95);
170
+ padding: 10px 15px;
171
+ border-radius: 8px;
172
+ font-family: Arial, sans-serif;
173
+ font-size: 13px;
174
+ z-index: 1000;
175
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
176
+ }}
177
+ .section-title {{
178
+ font-weight: bold;
179
+ color: #555;
180
+ margin-bottom: 5px;
181
+ font-size: 12px;
182
+ text-transform: uppercase;
183
+ }}
184
+ </style>
185
+ </head>
186
+ <body>
187
+ <div id="container"></div>
188
+
189
+ <div class="legend">
190
+ <h4>🧬 Structures</h4>
191
+ <div class="legend-item">
192
+ <div class="color-box" style="background: #4A90E2;"></div>
193
+ <span>Reference</span>
194
+ </div>
195
+ <div class="legend-item">
196
+ <div class="color-box" style="background: #E94B3C;"></div>
197
+ <span>Query (Aligned)</span>
198
+ </div>
199
+ </div>
200
+
201
+ <div class="control-panel">
202
+ <h4>⚙️ Display Options</h4>
203
+
204
+ <div class="control-section">
205
+ <div class="section-title">Structures</div>
206
+ <label>
207
+ <input type="checkbox" id="showRef" checked onchange="updateDisplay()">
208
+ Reference
209
+ </label>
210
+ <label>
211
+ <input type="checkbox" id="showQuery" checked onchange="updateDisplay()">
212
+ Query
213
+ </label>
214
+ </div>
215
+
216
+ <div class="control-section">
217
+ <div class="section-title">Style</div>
218
+ <select id="styleMode" onchange="updateDisplay()">
219
+ <option value="sticks">Sticks</option>
220
+ <option value="cartoon">Cartoon</option>
221
+ <option value="spheres">Spheres</option>
222
+ <option value="lines">Lines</option>
223
+ <option value="cartoon_sticks">Cartoon + Sticks</option>
224
+ </select>
225
+ </div>
226
+
227
+ <div class="control-section">
228
+ <div class="section-title">Components</div>
229
+ <label>
230
+ <input type="checkbox" id="showBackbone" checked onchange="updateDisplay()">
231
+ Backbone/Sugar
232
+ </label>
233
+ <label>
234
+ <input type="checkbox" id="showBases" checked onchange="updateDisplay()">
235
+ Bases
236
+ </label>
237
+ </div>
238
+
239
+ <div class="control-section">
240
+ <div class="section-title">Labels</div>
241
+ <label>
242
+ <input type="checkbox" id="showLabels" onchange="updateDisplay()">
243
+ Residue Labels
244
+ </label>
245
+ <label>
246
+ <input type="checkbox" id="showNumbers" onchange="updateDisplay()">
247
+ Residue Numbers
248
+ </label>
249
+ <label>
250
+ <input type="checkbox" id="showAtoms" onchange="updateDisplay()">
251
+ Atom Names
252
+ </label>
253
+ <select id="atomLabelMode" style="margin-top: 5px; font-size: 11px;" onchange="updateDisplay()">
254
+ <option value="all">All Atoms</option>
255
+ <option value="backbone">Backbone Only</option>
256
+ <option value="sidechain">Bases Only</option>
257
+ </select>
258
+ </div>
259
+
260
+ <div class="control-section">
261
+ <div class="section-title">Background</div>
262
+ <select id="bgColor" onchange="updateBackground()">
263
+ <option value="white">White</option>
264
+ <option value="black">Black</option>
265
+ <option value="gray">Gray</option>
266
+ </select>
267
+ </div>
268
+ </div>
269
+
270
+ <div class="rmsd-info">
271
+ <strong>RMSD:</strong> <span style="color: #E94B3C; font-weight: bold;">{f"{rmsd:.3f}" if rmsd is not None else "N/A"} Å</span>
272
+ </div>
273
+
274
+ <script>
275
+ let viewer = null;
276
+ let refModel = null;
277
+ let queryModel = null;
278
+ const refPDB = `{ref_window_pdb}`;
279
+ const queryPDB = `{transformed_query_pdb}`;
280
+
281
+ // RNA backbone atoms
282
+ const backboneAtoms = ['P', 'OP1', 'OP2', "O5'", "C5'", "C4'", "O4'", "C3'", "O3'", "C2'", "O2'", "C1'"];
283
+
284
+ function initViewer() {{
285
+ try {{
286
+ viewer = $3Dmol.createViewer("container", {{
287
+ backgroundColor: 'white'
288
+ }});
289
+
290
+ if (!refPDB || refPDB.length < 10) {{
291
+ throw new Error("Reference PDB data is empty");
292
+ }}
293
+
294
+ if (!queryPDB || queryPDB.length < 10) {{
295
+ throw new Error("Query PDB data is empty");
296
+ }}
297
+
298
+ updateDisplay();
299
+ viewer.zoomTo();
300
+ viewer.render();
301
+
302
+ }} catch (error) {{
303
+ console.error("Error initializing viewer:", error);
304
+ document.getElementById("container").innerHTML =
305
+ '<div style="padding: 20px; color: red; text-align: center;">Error loading visualization: ' + error.message + '</div>';
306
+ }}
307
+ }}
308
+
309
+ function updateBackground() {{
310
+ const bgColor = document.getElementById('bgColor').value;
311
+ viewer.setBackgroundColor(bgColor);
312
+ viewer.render();
313
+ }}
314
+
315
+ function updateDisplay() {{
316
+ if (!viewer) return;
317
+
318
+ try {{
319
+ // Clear everything
320
+ viewer.removeAllModels();
321
+ viewer.removeAllLabels();
322
+
323
+ const showRef = document.getElementById('showRef').checked;
324
+ const showQuery = document.getElementById('showQuery').checked;
325
+ const showBackbone = document.getElementById('showBackbone').checked;
326
+ const showBases = document.getElementById('showBases').checked;
327
+ const showLabels = document.getElementById('showLabels').checked;
328
+ const showNumbers = document.getElementById('showNumbers').checked;
329
+ const showAtoms = document.getElementById('showAtoms').checked;
330
+ const styleMode = document.getElementById('styleMode').value;
331
+
332
+ // Reference structure (blue)
333
+ if (showRef) {{
334
+ refModel = viewer.addModel(refPDB, "pdb");
335
+ applyStyle(refModel, '#4A90E2', '#5BA3F5', styleMode, showBackbone, showBases);
336
+
337
+ if (showLabels || showNumbers) {{
338
+ addResidueLabels(refModel, '#4A90E2', showLabels, showNumbers);
339
+ }}
340
+ if (showAtoms) {{
341
+ addAtomLabels(refModel, '#4A90E2');
342
+ }}
343
+ }}
344
+
345
+ // Query structure (red)
346
+ if (showQuery) {{
347
+ queryModel = viewer.addModel(queryPDB, "pdb");
348
+ applyStyle(queryModel, '#E94B3C', '#FF6B6B', styleMode, showBackbone, showBases);
349
+
350
+ if (showLabels || showNumbers) {{
351
+ addResidueLabels(queryModel, '#E94B3C', showLabels, showNumbers);
352
+ }}
353
+ if (showAtoms) {{
354
+ addAtomLabels(queryModel, '#E94B3C');
355
+ }}
356
+ }}
357
+
358
+ viewer.zoomTo();
359
+ viewer.render();
360
+
361
+ }} catch (error) {{
362
+ console.error("Error updating display:", error);
363
+ }}
364
+ }}
365
+
366
+ function applyStyle(model, backboneColor, baseColor, styleMode, showBackbone, showBases) {{
367
+ // Clear any existing styles
368
+ viewer.setStyle({{model: model}}, {{}});
369
+
370
+ if (styleMode === 'cartoon') {{
371
+ // Cartoon representation
372
+ viewer.setStyle({{model: model}}, {{
373
+ cartoon: {{
374
+ color: backboneColor,
375
+ thickness: 0.5,
376
+ opacity: 0.8
377
+ }}
378
+ }});
379
+ }} else if (styleMode === 'cartoon_sticks') {{
380
+ // Cartoon + sticks for bases
381
+ viewer.setStyle({{model: model}}, {{
382
+ cartoon: {{
383
+ color: backboneColor,
384
+ thickness: 0.5,
385
+ opacity: 0.7
386
+ }}
387
+ }});
388
+ if (showBases) {{
389
+ viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
390
+ stick: {{
391
+ color: baseColor,
392
+ radius: 0.15
393
+ }}
394
+ }});
395
+ }}
396
+ }} else if (styleMode === 'spheres') {{
397
+ // Sphere representation
398
+ if (showBackbone) {{
399
+ viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
400
+ sphere: {{
401
+ color: backboneColor,
402
+ radius: 0.4
403
+ }}
404
+ }});
405
+ }}
406
+ if (showBases) {{
407
+ viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
408
+ sphere: {{
409
+ color: baseColor,
410
+ radius: 0.35
411
+ }}
412
+ }});
413
+ }}
414
+ }} else if (styleMode === 'lines') {{
415
+ // Line representation
416
+ if (showBackbone) {{
417
+ viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
418
+ line: {{
419
+ color: backboneColor,
420
+ linewidth: 2
421
+ }}
422
+ }});
423
+ }}
424
+ if (showBases) {{
425
+ viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
426
+ line: {{
427
+ color: baseColor,
428
+ linewidth: 2
429
+ }}
430
+ }});
431
+ }}
432
+ }} else {{
433
+ // Stick representation (default)
434
+ if (showBackbone) {{
435
+ viewer.setStyle({{model: model, atom: backboneAtoms}}, {{
436
+ stick: {{
437
+ color: backboneColor,
438
+ radius: 0.2
439
+ }},
440
+ sphere: {{
441
+ color: backboneColor,
442
+ radius: 0.3
443
+ }}
444
+ }});
445
+ }}
446
+ if (showBases) {{
447
+ viewer.addStyle({{model: model, not: {{atom: backboneAtoms}}}}, {{
448
+ stick: {{
449
+ color: baseColor,
450
+ radius: 0.15
451
+ }},
452
+ sphere: {{
453
+ color: baseColor,
454
+ radius: 0.25
455
+ }}
456
+ }});
457
+ }}
458
+ }}
459
+ }}
460
+
461
+ function addResidueLabels(model, color, showLabels, showNumbers) {{
462
+ const atoms = viewer.selectedAtoms({{model: model}});
463
+ const residues = {{}};
464
+
465
+ // Group atoms by residue
466
+ atoms.forEach(atom => {{
467
+ const key = atom.chain + '_' + atom.resi;
468
+ if (!residues[key]) {{
469
+ residues[key] = atom;
470
+ }}
471
+ }});
472
+
473
+ // Add labels for each residue
474
+ Object.values(residues).forEach(atom => {{
475
+ let labelText = '';
476
+ if (showLabels && showNumbers) {{
477
+ labelText = atom.resn + atom.resi;
478
+ }} else if (showLabels) {{
479
+ labelText = atom.resn;
480
+ }} else if (showNumbers) {{
481
+ labelText = atom.resi.toString();
482
+ }}
483
+
484
+ if (labelText) {{
485
+ viewer.addLabel(labelText, {{
486
+ position: atom,
487
+ backgroundColor: color,
488
+ backgroundOpacity: 0.7,
489
+ fontColor: 'white',
490
+ fontSize: 11,
491
+ fontWeight: 'bold',
492
+ showBackground: true,
493
+ borderRadius: 3
494
+ }});
495
+ }}
496
+ }});
497
+ }}
498
+
499
+ function addAtomLabels(model, color) {{
500
+ const atomLabelMode = document.getElementById('atomLabelMode').value;
501
+ const atoms = viewer.selectedAtoms({{model: model}});
502
+
503
+ // Filter atoms based on mode
504
+ let filteredAtoms = atoms;
505
+ if (atomLabelMode === 'backbone') {{
506
+ // Only backbone atoms
507
+ filteredAtoms = atoms.filter(atom => backboneAtoms.includes(atom.atom));
508
+ }} else if (atomLabelMode === 'sidechain') {{
509
+ // Only base/sidechain atoms (not backbone)
510
+ filteredAtoms = atoms.filter(atom => !backboneAtoms.includes(atom.atom));
511
+ }}
512
+ // 'all' mode uses all atoms (no filtering)
513
+
514
+ // Add label for each atom
515
+ filteredAtoms.forEach(atom => {{
516
+ // Use atom name (e.g., P, C1', N1, O4, etc.)
517
+ const atomName = atom.atom;
518
+
519
+ viewer.addLabel(atomName, {{
520
+ position: atom,
521
+ backgroundColor: color,
522
+ backgroundOpacity: 0.6,
523
+ fontColor: 'white',
524
+ fontSize: 9,
525
+ fontWeight: 'normal',
526
+ showBackground: true,
527
+ borderRadius: 2,
528
+ borderThickness: 0.5
529
+ }});
530
+ }});
531
+ }}
532
+
533
+ // Initialize on load
534
+ initViewer();
535
+ </script>
536
+ </body>
537
+ </html>
538
+ """
539
+
540
+ return html
541
+
542
+
543
+ def extract_window_pdb(pdb_path, window_indices):
544
+ """
545
+ Extract specific residues from a PDB file based on window indices.
546
+
547
+ Args:
548
+ pdb_path: Path to PDB file
549
+ window_indices: List of residue indices (0-based)
550
+
551
+ Returns:
552
+ String containing PDB data for only the specified residues
553
+ """
554
+ with open(pdb_path) as f:
555
+ lines = f.readlines()
556
+
557
+ # Get all residue numbers from the file
558
+ residues = parse_residue_atoms(pdb_path)
559
+
560
+ if not residues:
561
+ # If parsing failed, return original file
562
+ return ''.join(lines)
563
+
564
+ residue_numbers = [res['resnum'] for res in residues]
565
+
566
+ # Map window indices to actual residue numbers
567
+ target_resnums = set()
568
+ for idx in window_indices:
569
+ if idx < len(residue_numbers):
570
+ target_resnums.add(residue_numbers[idx])
571
+
572
+ if not target_resnums:
573
+ # If no valid residues, return original file
574
+ return ''.join(lines)
575
+
576
+ # Extract lines for these residues
577
+ window_lines = []
578
+ for line in lines:
579
+ if len(line) < 6:
580
+ continue
581
+
582
+ record = line[0:6].strip()
583
+ if record in ['ATOM', 'HETATM', 'HETAT']:
584
+ try:
585
+ # Handle different PDB formats
586
+ resnum_str = line[22:26].strip()
587
+ if resnum_str:
588
+ resnum = int(resnum_str)
589
+ if resnum in target_resnums:
590
+ window_lines.append(line)
591
+ except (ValueError, IndexError):
592
+ continue
593
+ elif record in ['HEADER', 'TITLE', 'MODEL', 'ENDMDL']:
594
+ window_lines.append(line)
595
+
596
+ # Always add END record
597
+ if window_lines and not any('END' in line for line in window_lines):
598
+ window_lines.append('END\n')
599
+
600
+ result = ''.join(window_lines)
601
+
602
+ # Debug: print info about extraction
603
+ if not result or len(result) < 50:
604
+ print(f"Warning: Empty or very small PDB extracted from {pdb_path}")
605
+ print(f" Window indices: {window_indices}")
606
+ print(f" Target residue numbers: {target_resnums}")
607
+ print(f" Result length: {len(result)}")
608
+ # Return full structure if extraction failed
609
+ return ''.join(lines)
610
+
611
+ return result
612
+
613
+
614
+ def transform_pdb_string(pdb_string, rotation_matrix, query_com, ref_com=None):
615
+ """
616
+ Apply rotation and translation to coordinates in a PDB string to align with reference.
617
+
618
+ The transformation aligns the query structure to the reference structure:
619
+ 1. Translate query to origin (subtract query_com)
620
+ 2. Apply rotation matrix
621
+ 3. Translate to reference position (add ref_com)
622
+
623
+ Args:
624
+ pdb_string: PDB format string
625
+ rotation_matrix: 3x3 rotation matrix
626
+ query_com: Center of mass of query structure (to translate FROM)
627
+ ref_com: Center of mass of reference structure (to translate TO), optional
628
+
629
+ Returns:
630
+ Transformed PDB string with aligned coordinates
631
+ """
632
+ lines = pdb_string.split('\n')
633
+ transformed_lines = []
634
+
635
+ # If ref_com not provided, just center at origin after rotation
636
+ if ref_com is None:
637
+ ref_com = np.array([0.0, 0.0, 0.0])
638
+
639
+ for line in lines:
640
+ if len(line) < 54:
641
+ transformed_lines.append(line)
642
+ continue
643
+
644
+ record = line[0:6].strip()
645
+ if record in ['ATOM', 'HETATM', 'HETAT']:
646
+ # Extract coordinates
647
+ try:
648
+ x = float(line[30:38].strip())
649
+ y = float(line[38:46].strip())
650
+ z = float(line[46:54].strip())
651
+
652
+ # Transform: (coord - query_com) @ rotation_matrix + ref_com
653
+ # This aligns query to reference coordinate system
654
+ coord = np.array([x, y, z])
655
+ centered = coord - query_com # Move query to origin
656
+ rotated = np.dot(centered, rotation_matrix) # Rotate
657
+ new_coord = rotated + ref_com # Move to reference position
658
+
659
+ # Write transformed line
660
+ new_line = (
661
+ line[:30] +
662
+ f"{new_coord[0]:8.3f}" +
663
+ f"{new_coord[1]:8.3f}" +
664
+ f"{new_coord[2]:8.3f}" +
665
+ line[54:]
666
+ )
667
+ transformed_lines.append(new_line)
668
+ except (ValueError, IndexError):
669
+ transformed_lines.append(line)
670
+ else:
671
+ transformed_lines.append(line)
672
+
673
+ return '\n'.join(transformed_lines)