jiehou commited on
Commit
9d7ed31
Β·
verified Β·
1 Parent(s): 5edfe3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +630 -423
app.py CHANGED
@@ -1,7 +1,7 @@
1
  """
2
- RNA Motif Multi-Structure Comparison Tool
3
- Streamlit app for comparing multiple RNA motif structures simultaneously
4
- Based on working pairwise alignment code
5
  """
6
 
7
  import streamlit as st
@@ -25,15 +25,20 @@ from rmsd_utils import (
25
  )
26
 
27
  # Import example data loader
28
- from example_data_loader import (
29
- get_example_pdbs,
30
- load_example_as_uploaded_file,
31
- get_example_info
32
- )
 
 
 
 
 
33
 
34
  # Page configuration
35
  st.set_page_config(
36
- page_title="RNA Motif Multi-Structure Comparison",
37
  page_icon="🧬",
38
  layout="wide",
39
  initial_sidebar_state="expanded"
@@ -226,7 +231,6 @@ st.markdown("""
226
  """, unsafe_allow_html=True)
227
 
228
 
229
-
230
  def save_uploaded_file(uploaded_file, directory):
231
  """Save an uploaded file to a temporary directory"""
232
  file_path = os.path.join(directory, uploaded_file.name)
@@ -236,7 +240,15 @@ def save_uploaded_file(uploaded_file, directory):
236
 
237
 
238
  def get_structure_info(pdb_path):
239
- """Get information about a structure's residues."""
 
 
 
 
 
 
 
 
240
  residues = parse_residue_atoms(pdb_path)
241
 
242
  structure_info = []
@@ -251,351 +263,468 @@ def get_structure_info(pdb_path):
251
  return structure_info
252
 
253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  def extract_window_coords(residues, window_indices):
255
- """Extract coordinates for a specific window of residues."""
256
- all_coords = []
257
  for idx in window_indices:
258
- if idx < len(residues):
259
- residue = residues[idx]
260
- backbone_coords = get_backbone_sugar_coords_from_residue(residue)
261
- all_coords.extend(backbone_coords)
262
- base_coords = get_base_coords_from_residue(residue)
263
- all_coords.extend(base_coords)
264
-
265
- return np.asarray(all_coords)
266
 
267
 
268
- def generate_windows_from_selection(selected_indices, win_size, win_type):
269
- """Generate windows from selected indices"""
270
- if len(selected_indices) < win_size:
271
  return []
272
 
273
- if win_type == "contiguous":
274
- windows = []
275
- for i in range(len(selected_indices) - win_size + 1):
276
- windows.append(selected_indices[i:i + win_size])
277
- return windows
278
  else: # non-contiguous
279
- return [list(combo) for combo in combinations(selected_indices, win_size)]
 
 
 
 
280
 
281
 
282
  def main():
283
- # Header
284
- st.markdown('<p class="main-header">🧬 RNA Motif Multi-Structure Comparison</p>', unsafe_allow_html=True)
285
- st.markdown('<p class="sub-header">Compare multiple RNA motifs simultaneously with window-based alignment</p>', unsafe_allow_html=True)
286
-
287
- # Sidebar
288
- st.sidebar.header("βš™οΈ Configuration")
289
-
290
- # Step 1: File upload or Example data
291
- st.sidebar.subheader("1️⃣ Load Structures")
292
-
293
- # Add tabs for Upload vs Examples
294
- data_source = st.sidebar.radio(
295
- "Data Source",
296
- ["Upload Files", "Use Examples"],
297
- key="data_source",
298
- help="Choose to upload your own files or use example data"
299
- )
300
 
301
- # Clear session state when data source changes
302
- if 'previous_data_source' not in st.session_state:
303
- st.session_state['previous_data_source'] = data_source
304
-
305
- if st.session_state['previous_data_source'] != data_source:
306
- # Data source changed - clear all structure-related session state
307
- if 'selections' in st.session_state:
308
- del st.session_state['selections']
309
- if 'auto_initialized' in st.session_state:
310
- del st.session_state['auto_initialized']
311
- if 'results' in st.session_state:
312
- del st.session_state['results']
313
- if 'structure_data' in st.session_state:
314
- del st.session_state['structure_data']
315
- if 'reference_name' in st.session_state:
316
- del st.session_state['reference_name']
317
- st.session_state['previous_data_source'] = data_source
318
- st.sidebar.info("πŸ”„ Switched data source - session cleared")
319
-
320
- uploaded_files = []
321
-
322
- if data_source == "Upload Files":
323
- uploaded_files_raw = st.sidebar.file_uploader(
324
- "Upload RNA Motif PDB files",
325
- type=['pdb', 'PDB'],
326
- accept_multiple_files=True,
327
- key="structures",
328
- help="Upload all RNA motif structures to compare"
329
  )
330
- if uploaded_files_raw:
331
- uploaded_files = list(uploaded_files_raw)
 
 
 
 
 
 
 
 
 
 
332
 
333
- else: # Use Examples
334
- # Check if data folder exists
335
- data_folder = "data"
336
- examples = get_example_pdbs(data_folder)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
- if not examples:
339
- st.sidebar.warning(f"⚠️ No example files found in '{data_folder}/' folder")
340
- st.sidebar.info("πŸ’‘ Create a 'data/' folder and add .pdb files to use examples")
 
 
341
  else:
342
- st.sidebar.success(f"πŸ“ Found {len(examples)} example files")
343
-
344
- # Show example info in expander
345
- with st.sidebar.expander("πŸ“‹ View Example Files"):
346
- example_info = get_example_info(data_folder)
347
- for name, info in sorted(example_info.items()):
348
- if 'error' not in info:
349
- st.caption(f"**{name}**")
350
- st.caption(f" β”” {info['atoms']} atoms")
351
-
352
- # Multiselect for examples
353
- selected_names = st.sidebar.multiselect(
354
- "Select example PDB files",
355
- options=sorted(examples.keys()),
356
- default=sorted(examples.keys())[:3] if len(examples) >= 3 else sorted(examples.keys()),
357
- help="Choose one or more example structures"
358
- )
359
-
360
- if selected_names:
361
- # Convert example files to uploaded file format
362
- for name in selected_names:
363
- uploaded_files.append(load_example_as_uploaded_file(examples[name]))
364
 
365
- st.sidebar.success(f"βœ… Loaded {len(uploaded_files)} example file(s)")
366
-
367
- if not uploaded_files:
368
- st.info("πŸ‘ˆ Please upload RNA motif PDB files or select examples to begin analysis")
369
- with st.expander("ℹ️ About this tool"):
370
- st.markdown("""
371
- ### Multi-Structure RNA Motif Comparison
372
-
373
- This tool compares multiple RNA motif structures simultaneously using window-based alignment.
374
-
375
- **Workflow:**
376
- 1. Upload PDB structures or use examples
377
- 2. Structures ranked by length (shortest first)
378
- 3. Select residues for each structure via dropdown
379
- 4. Choose reference structure (default: shortest)
380
- 5. Configure window size and type
381
- 6. Run comparison - all structures aligned to reference
382
- 7. View all structures superimposed in 3D
383
-
384
- **Features:**
385
- - Window-based comparison (contiguous or non-contiguous)
386
- - Best match selection per structure
387
- - Interactive 3D visualization with all structures
388
- - Color-coded structures
389
- - RMSD-based alignment quality
390
- - Example data for testing
391
-
392
- **Using Examples:**
393
- - Create a folder named `data/` in your app directory
394
- - Add .pdb files to the data folder
395
- - Select "Use Examples" to load them
396
- """)
397
- return
398
-
399
- # Create temporary directory
400
- temp_dir = tempfile.mkdtemp()
401
-
402
- # Save uploaded files
403
- for file in uploaded_files:
404
- save_uploaded_file(file, temp_dir)
405
-
406
- # Step 2: Rank structures by length and create dropdown
407
- #st.sidebar.markdown("---")
408
- st.sidebar.subheader("2️⃣ Structure Selection")
409
-
410
- # Get structure sizes
411
- structure_data = []
412
- for file in uploaded_files:
413
- file_path = os.path.join(temp_dir, file.name)
414
- residues = parse_residue_atoms(file_path)
415
- structure_data.append({
416
- 'file': file,
417
- 'name': file.name,
418
- 'path': file_path,
419
- 'num_residues': len(residues),
420
- 'residues': residues
421
- })
422
-
423
- # Sort by number of residues (shortest first)
424
- structure_data.sort(key=lambda x: x['num_residues'])
425
-
426
- # Display ranked structures
427
- st.markdown("---")
428
- st.subheader("πŸ“Š Uploaded Structures (Ranked by Length)")
429
-
430
- rank_df = pd.DataFrame([
431
- {'Rank': i+1, 'Filename': s['name'], 'Residues': s['num_residues']}
432
- for i, s in enumerate(structure_data)
433
- ])
434
- st.dataframe(rank_df, use_container_width=True)
435
-
436
- # Step 3: Atom selection for each structure using dropdown
437
- st.markdown("---")
438
- st.subheader("πŸ”¬ Configure Atom Selections")
439
-
440
- # Dropdown to select structure
441
- selected_structure_name = st.selectbox(
442
- "Select structure to configure (excluding two bases in 5' and 3' by default)",
443
- options=[s['name'] for s in structure_data],
444
- help="Choose a structure to configure its residue selection"
445
- )
446
-
447
- # Initialize session state for selections
448
- if 'selections' not in st.session_state:
449
- st.session_state['selections'] = {}
450
-
451
- # Track current file names to detect if files changed
452
- current_file_names = set([s['name'] for s in structure_data])
453
- if 'current_files' not in st.session_state:
454
- st.session_state['current_files'] = current_file_names
455
-
456
- # If file set changed, reset auto-initialization
457
- if st.session_state['current_files'] != current_file_names:
458
- st.session_state['current_files'] = current_file_names
459
- if 'auto_initialized' in st.session_state:
460
- del st.session_state['auto_initialized']
461
- # Clear old selections that don't match current files
462
- old_selections = set(st.session_state['selections'].keys())
463
- for old_name in old_selections - current_file_names:
464
- del st.session_state['selections'][old_name]
465
-
466
- # Auto-initialize selections for all structures (exclude first and last residue)
467
- if 'auto_initialized' not in st.session_state:
468
- for struct in structure_data:
469
  num_res = struct['num_residues']
470
- if num_res > 4: # Need at least 5 residues to do 2 to len-2
471
- # Auto-select from index 1 to index len-2 (which is residue 2 to residue len-1)
472
  auto_selection = list(range(1, num_res - 1))
473
- st.session_state['selections'][struct['name']] = auto_selection
474
  else:
475
- # For small structures, use all residues
476
- st.session_state['selections'][struct['name']] = list(range(num_res))
477
- st.session_state['auto_initialized'] = True
478
 
479
- # Find selected structure data
480
- selected_struct = next((s for s in structure_data if s['name'] == selected_structure_name), None)
 
 
 
 
 
 
 
481
 
482
- if selected_struct:
483
- st.markdown(f"### {selected_struct['name']} ({selected_struct['num_residues']} residues)")
484
-
485
- # Display residue table
486
- structure_info = get_structure_info(selected_struct['path'])
487
- info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
488
- info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
489
- info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
490
- info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
491
-
492
- with st.expander(f"πŸ“‹ View Residue Table", expanded=False):
493
- st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
494
-
495
- # Selection method
496
- selection_method = st.radio(
497
- f"Selection method for {selected_struct['name']}",
498
- ["Select by range", "Select specific residues", "Use all residues"],
499
- key=f"method_{selected_struct['name']}",
500
- index = 1,
501
- horizontal=True
502
- )
503
-
504
- selected_indices = []
505
-
506
- if selection_method == "Select by range":
507
- # Get current saved selection or auto-initialized values
508
- current_selection = st.session_state['selections'].get(selected_struct['name'], [])
509
- default_start = current_selection[0] + 2 if current_selection else 2
510
- default_end = current_selection[-1] + 1 if current_selection else max(2, len(structure_info) - 2)
511
 
512
- col1, col2 = st.columns(2)
513
- with col1:
514
- start_idx = st.number_input(
515
- "Start index (1-based)",
516
- min_value=1,
517
- max_value=len(structure_info),
518
- value=default_start,
519
- key=f"start_{selected_struct['name']}"
520
- )
521
- with col2:
522
- end_idx = st.number_input(
523
- "End index (1-based, inclusive)",
524
- min_value=1,
525
- max_value=len(structure_info),
526
- value=default_end,
527
- key=f"end_{selected_struct['name']}"
528
- )
529
 
530
- if start_idx <= end_idx:
531
- selected_indices = list(range(start_idx - 1, end_idx))
532
- st.success(f"βœ“ Selected residues: {[i+1 for i in selected_indices]}")
533
- else:
534
- st.error("Start index must be ≀ end index")
535
 
536
- elif selection_method == "Select specific residues":
537
- # Get current selection or default
538
- current_selection = st.session_state['selections'].get(selected_struct['name'], [])
539
- default_names = [structure_info[i]['full_name'] for i in range(2, len(structure_info)-2)] if current_selection else []
540
-
541
-
542
-
543
- selected_names = st.multiselect(
544
- "Select residues",
545
- options=[info['full_name'] for info in structure_info],
546
- default=default_names,
547
- key=f"specific_{selected_struct['name']}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  )
549
 
550
- name_to_idx = {info['full_name']: info['index'] for info in structure_info}
551
- selected_indices = [name_to_idx[name] for name in selected_names]
552
- selected_indices.sort()
553
 
554
- if selected_indices:
555
- st.success(f"βœ“ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
556
 
557
- else: # Use all residues
558
- selected_indices = list(range(len(structure_info)))
559
- st.info(f"βœ“ Using all {len(selected_indices)} residues")
560
-
561
- # Save selection button
562
- if st.button(f"πŸ’Ύ Save Selection for {selected_struct['name']}", type="primary"):
563
- st.session_state['selections'][selected_struct['name']] = selected_indices
564
- st.success(f"βœ… Saved selection for {selected_struct['name']}")
565
-
566
- # Show current saved selections
567
- if selected_struct['name'] in st.session_state['selections']:
568
- saved_indices = st.session_state['selections'][selected_struct['name']]
569
- st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}")
570
-
571
- # Step 4: Reference structure selection
572
- #st.sidebar.markdown("---")
573
- st.sidebar.subheader("3️⃣ Reference Structure")
574
-
575
- # Default: shortest structure (first in sorted list)
576
- default_ref = structure_data[0]['name']
577
-
578
- reference_structure_name = st.sidebar.selectbox(
579
- "Select reference structure",
580
- options=[s['name'] for s in structure_data],
581
- index=0,
582
- help="All other structures will be aligned to this reference (default: shortest)"
583
- )
584
-
585
- ref_struct = next((s for s in structure_data if s['name'] == reference_structure_name), None)
586
- if ref_struct:
587
- st.sidebar.info(f"**Reference:** {ref_struct['name']} ({ref_struct['num_residues']} residues)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
 
589
- # Step 5: Window configuration
590
- #st.sidebar.markdown("---")
591
- st.sidebar.subheader("4️⃣ Window Configuration")
592
 
593
  # Check if all structures have selections
594
- all_have_selections = all(s['name'] in st.session_state.get('selections', {}) for s in structure_data)
 
595
 
596
- if all_have_selections:
597
- selections = st.session_state['selections']
598
- min_selection_size = min(len(selections[s['name']]) for s in structure_data)
 
599
 
600
  window_size = st.sidebar.number_input(
601
  "Window Size",
@@ -613,92 +742,83 @@ def main():
613
  help="Contiguous: sliding windows. Non-contiguous: all combinations"
614
  )
615
  else:
616
- st.sidebar.warning("⚠️ Configure selections for all structures first")
617
  window_size = 4
618
  window_type = "contiguous"
619
 
620
- # Step 6: Run analysis
621
- #st.sidebar.markdown("---")
622
- st.sidebar.subheader("5️⃣ Run Analysis")
623
 
624
- if st.sidebar.button("πŸš€ Run Multi-Structure Analysis", type="primary", disabled=not all_have_selections):
625
- if not all_have_selections:
626
- st.error("Please configure atom selections for all structures")
627
- return
628
-
629
- # Get selections
630
- selections = st.session_state['selections']
631
-
632
- # Find reference structure
633
- ref_struct = next((s for s in structure_data if s['name'] == reference_structure_name), None)
634
- ref_indices = selections[ref_struct['name']]
635
-
636
- # Generate reference windows
637
- ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
638
-
639
- if not ref_windows:
640
- st.error(f"Reference structure needs at least {window_size} selected residues")
641
  return
642
 
643
  # Run comparisons
644
  with st.spinner("Analyzing structures..."):
645
  results = []
646
 
647
- # For each reference window
648
- for ref_window in ref_windows:
649
- # Extract reference coords
650
- ref_coords = extract_window_coords(ref_struct['residues'], ref_window)
651
- ref_com = calculate_COM(ref_coords)
652
- ref_sequence = ''.join([ref_struct['residues'][i]['resname'] for i in ref_window])
653
 
654
- # Compare against all other structures
655
- for query_struct in structure_data:
656
- if query_struct['name'] == ref_struct['name']:
657
- continue # Skip self-comparison
658
-
659
- query_indices = selections[query_struct['name']]
660
- query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
 
 
661
 
662
- for query_window in query_windows:
663
- # Extract query coords
664
- query_coords = extract_window_coords(query_struct['residues'], query_window)
665
- query_com = calculate_COM(query_coords)
666
- query_sequence = ''.join([query_struct['residues'][i]['resname'] for i in query_window])
667
-
668
- # Calculate RMSD
669
- U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
670
-
671
- if U is None or RMSD is None:
672
- RMSD = 999.0
673
- U = np.eye(3)
674
 
675
- results.append({
676
- 'Reference': ref_struct['name'],
677
- 'Ref_Window': ref_window,
678
- 'Ref_Sequence': ref_sequence,
679
- 'Query': query_struct['name'],
680
- 'Query_Window': query_window,
681
- 'Query_Sequence': query_sequence,
682
- 'RMSD': RMSD,
683
- 'Rotation_Matrix': U,
684
- 'Ref_COM': ref_com,
685
- 'Query_COM': query_com,
686
- 'Ref_Path': ref_struct['path'],
687
- 'Query_Path': query_struct['path']
688
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
689
 
690
  results_df = pd.DataFrame(results)
691
  st.session_state['results'] = results_df
692
- st.session_state['structure_data'] = structure_data
693
- st.session_state['reference_name'] = reference_structure_name
694
 
695
  st.success(f"βœ… Analysis complete! {len(results_df)} comparisons performed.")
696
 
697
  # Display results
698
  if 'results' in st.session_state:
699
  results_df = st.session_state['results']
700
- structure_data = st.session_state['structure_data']
701
- reference_name = st.session_state['reference_name']
702
 
703
  st.markdown("---")
704
  st.subheader("πŸ“Š Results Summary")
@@ -719,49 +839,57 @@ def main():
719
  with col2:
720
  st.metric("Comparisons Below Threshold", f"{len(filtered_df)} / {len(results_df)}")
721
 
722
- # Best match per structure
723
- st.markdown("### πŸ† Best Match per Structure")
724
- best_matches = results_df.loc[results_df.groupby('Query')['RMSD'].idxmin()]
725
 
726
- best_display = best_matches[['Query', 'Query_Sequence', 'RMSD']].copy()
727
- best_display['RMSD'] = best_display['RMSD'].round(3)
728
- best_display.columns = ['Structure', 'Sequence', 'RMSD (Γ…)']
729
- st.dataframe(best_display, use_container_width=True)
 
 
 
 
 
 
730
 
731
  # Full results
732
  with st.expander("πŸ“‹ All Comparison Results"):
733
- display_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy()
734
-
735
- # Format the window indices to be 1-based (more intuitive for users)
736
- display_df['Ref_Residues'] = display_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
737
- display_df['Query_Residues'] = display_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
738
-
739
- # Reorder columns and drop the window lists
740
- display_df = display_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']]
741
- display_df['RMSD'] = display_df['RMSD'].round(3)
742
- display_df = display_df.sort_values('RMSD').reset_index(drop=True)
743
-
744
- # Rename columns for better display
745
- display_df.columns = ['Reference', 'Ref_Indices', 'Ref_Sequence', 'Query', 'Query_Indices', 'Query_Sequence', 'RMSD']
746
-
747
- st.dataframe(display_df, use_container_width=True, height=300)
 
 
 
748
 
749
  # Visualization
750
  st.markdown("---")
751
  st.subheader("πŸ”¬ 3D Structure Visualization")
752
 
753
- st.markdown("**Select a comparison to visualize:**")
754
-
755
- # Create dropdown options showing all comparisons
756
- viz_options = []
757
- for idx, row in filtered_df.iterrows():
758
- ref_res_str = ','.join([str(i+1) for i in row['Ref_Window']])
759
- query_res_str = ','.join([str(i+1) for i in row['Query_Window']])
760
- option_text = f"{row['Reference']}[{ref_res_str}] ({row['Ref_Sequence']}) vs {row['Query']}[{query_res_str}] ({row['Query_Sequence']}) | RMSD: {row['RMSD']:.3f} Γ…"
761
- viz_options.append((idx, option_text))
762
-
763
- if viz_options:
764
- # Sort by RMSD (best first)
765
  viz_options.sort(key=lambda x: filtered_df.loc[x[0], 'RMSD'])
766
 
767
  selected_viz_idx = st.selectbox(
@@ -777,7 +905,7 @@ def main():
777
  # Import visualization function
778
  from visualization_multi import create_pairwise_visualization
779
 
780
- # Create visualization for selected comparison
781
  try:
782
  viz_html = create_pairwise_visualization(
783
  ref_path=selected_comparison['Ref_Path'],
@@ -800,6 +928,85 @@ def main():
800
  st.code(traceback.format_exc())
801
  else:
802
  st.warning("No comparisons below RMSD threshold to visualize")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
 
804
 
805
  if __name__ == "__main__":
 
1
  """
2
+ RNA Motif Multi-Structure Comparison Tool - Pairwise Mode
3
+ Streamlit app for comparing multiple RNA motif structures with separate reference and query sets
4
+ Uses dropdown menu for residue configuration and default Backbone + Sugar atom selection
5
  """
6
 
7
  import streamlit as st
 
25
  )
26
 
27
  # Import example data loader
28
+ try:
29
+ from example_data_loader import (
30
+ get_example_pdbs,
31
+ load_example_as_uploaded_file,
32
+ get_example_info
33
+ )
34
+ EXAMPLES_AVAILABLE = True
35
+ except ImportError:
36
+ EXAMPLES_AVAILABLE = False
37
+ st.warning("Example data loader not available. Please use 'Upload Files' mode.")
38
 
39
  # Page configuration
40
  st.set_page_config(
41
+ page_title="RNA Motif Multi-Structure Comparison - Pairwise",
42
  page_icon="🧬",
43
  layout="wide",
44
  initial_sidebar_state="expanded"
 
231
  """, unsafe_allow_html=True)
232
 
233
 
 
234
  def save_uploaded_file(uploaded_file, directory):
235
  """Save an uploaded file to a temporary directory"""
236
  file_path = os.path.join(directory, uploaded_file.name)
 
240
 
241
 
242
  def get_structure_info(pdb_path):
243
+ """
244
+ Get information about a structure's residues.
245
+
246
+ Args:
247
+ pdb_path: Path to PDB file
248
+
249
+ Returns:
250
+ List of dicts with residue info: [{index, resnum, resname, full_name}, ...]
251
+ """
252
  residues = parse_residue_atoms(pdb_path)
253
 
254
  structure_info = []
 
263
  return structure_info
264
 
265
 
266
+ def load_structure_data(uploaded_files, temp_dir):
267
+ """Load structure data from uploaded files"""
268
+ structure_data = []
269
+
270
+ for uploaded_file in uploaded_files:
271
+ file_path = save_uploaded_file(uploaded_file, temp_dir)
272
+ residues = parse_residue_atoms(file_path)
273
+
274
+ structure_data.append({
275
+ 'name': uploaded_file.name,
276
+ 'path': file_path,
277
+ 'residues': residues,
278
+ 'num_residues': len(residues)
279
+ })
280
+
281
+ return structure_data
282
+
283
+
284
  def extract_window_coords(residues, window_indices):
285
+ """Extract coordinates for a window of residues - using Backbone + Sugar by default"""
286
+ coords_list = []
287
  for idx in window_indices:
288
+ res = residues[idx]
289
+ backbone_coords = get_backbone_sugar_coords_from_residue(res)
290
+ coords_list.extend(backbone_coords)
291
+ return np.array(coords_list)
 
 
 
 
292
 
293
 
294
+ def generate_windows_from_selection(selected_indices, window_size, window_type):
295
+ """Generate windows from selected residue indices"""
296
+ if len(selected_indices) < window_size:
297
  return []
298
 
299
+ windows = []
300
+ if window_type == "contiguous":
301
+ for i in range(len(selected_indices) - window_size + 1):
302
+ windows.append(selected_indices[i:i+window_size])
 
303
  else: # non-contiguous
304
+ from itertools import combinations
305
+ windows = list(combinations(selected_indices, window_size))
306
+ windows = [list(w) for w in windows]
307
+
308
+ return windows
309
 
310
 
311
  def main():
312
+ st.markdown('<h1 class="main-header">🧬 RNA Motif Multi-Structure Comparison</h1>', unsafe_allow_html=True)
313
+ st.markdown('<p class="sub-header">Pairwise comparison: Reference structures vs Query structures</p>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
 
315
+ # Create temporary directory
316
+ if 'temp_dir' not in st.session_state:
317
+ st.session_state['temp_dir'] = tempfile.mkdtemp()
318
+ temp_dir = st.session_state['temp_dir']
319
+
320
+ # Initialize session state
321
+ if 'data_mode' not in st.session_state:
322
+ st.session_state['data_mode'] = 'upload'
323
+ if 'ref_selections' not in st.session_state:
324
+ st.session_state['ref_selections'] = {}
325
+ if 'query_selections' not in st.session_state:
326
+ st.session_state['query_selections'] = {}
327
+
328
+ # Sidebar: Step 1 - Data Source Selection
329
+ st.sidebar.title("βš™οΈ Configuration")
330
+ st.sidebar.subheader("1️⃣ Data Source")
331
+
332
+ # Check if examples are available
333
+ if EXAMPLES_AVAILABLE:
334
+ data_mode = st.sidebar.radio(
335
+ "Choose data source",
336
+ ["Upload Files", "Use Example Data"],
337
+ key="data_mode_radio",
338
+ help="Upload your own PDB files or use provided examples"
 
 
 
 
339
  )
340
+ else:
341
+ st.sidebar.info("ℹ️ Example data not available. Using upload mode.")
342
+ data_mode = "Upload Files"
343
+
344
+ # Update data mode
345
+ if data_mode == "Upload Files":
346
+ st.session_state['data_mode'] = 'upload'
347
+ # Reset example initialization when switching to upload mode
348
+ if 'example_mode_initialized' in st.session_state:
349
+ del st.session_state['example_mode_initialized']
350
+ else:
351
+ st.session_state['data_mode'] = 'example'
352
 
353
+ # Step 2: File Upload/Selection - SEPARATE FOR REFERENCE AND QUERY
354
+ st.sidebar.subheader("2️⃣ Structure Files")
355
+
356
+ reference_files = []
357
+ query_files = []
358
+
359
+ if st.session_state['data_mode'] == 'upload':
360
+ st.sidebar.markdown("**Upload Reference Structures**")
361
+ ref_uploaded = st.sidebar.file_uploader(
362
+ "Reference PDB files",
363
+ type=['pdb'],
364
+ accept_multiple_files=True,
365
+ key="ref_uploader",
366
+ help="Upload one or more reference structures (e.g., Pentaloop)"
367
+ )
368
+
369
+ st.sidebar.markdown("**Upload Query Structures**")
370
+ query_uploaded = st.sidebar.file_uploader(
371
+ "Query PDB files",
372
+ type=['pdb'],
373
+ accept_multiple_files=True,
374
+ key="query_uploader",
375
+ help="Upload one or more query structures (e.g., Tetraloop)"
376
+ )
377
+
378
+ reference_files = ref_uploaded if ref_uploaded else []
379
+ query_files = query_uploaded if query_uploaded else []
380
 
381
+ else: # Example data mode
382
+ if not EXAMPLES_AVAILABLE:
383
+ st.sidebar.error("❌ Example data loader module not found")
384
+ reference_files = []
385
+ query_files = []
386
  else:
387
+ try:
388
+ examples = get_example_pdbs()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
+ if not examples or len(examples) == 0:
391
+ st.sidebar.error("❌ No example data available. Please add PDB files to 'data/' folder")
392
+ st.sidebar.info("πŸ’‘ Create a 'data/' folder in the same directory as the app and add .pdb files")
393
+ reference_files = []
394
+ query_files = []
395
+ else:
396
+ example_names = sorted(list(examples.keys()))
397
+
398
+ # Auto-select examples when first switching to example mode
399
+ if 'example_mode_initialized' not in st.session_state:
400
+ st.session_state['example_mode_initialized'] = True
401
+ # Auto-select first half as reference, second half as query
402
+ mid_point = max(1, len(example_names) // 2)
403
+ st.session_state['auto_ref_examples'] = example_names[:mid_point]
404
+ st.session_state['auto_query_examples'] = example_names[mid_point:mid_point*2]
405
+
406
+ st.sidebar.markdown("**Select Reference Examples**")
407
+ ref_example_names = st.sidebar.multiselect(
408
+ "Reference structures",
409
+ options=example_names,
410
+ default=st.session_state.get('auto_ref_examples', []),
411
+ key="ref_examples",
412
+ help="Select example reference structures"
413
+ )
414
+
415
+ if ref_example_names:
416
+ st.sidebar.success(f"βœ… {len(ref_example_names)} reference file(s) selected")
417
+
418
+ st.sidebar.markdown("**Select Query Examples**")
419
+ query_example_names = st.sidebar.multiselect(
420
+ "Query structures",
421
+ options=example_names,
422
+ default=st.session_state.get('auto_query_examples', []),
423
+ key="query_examples",
424
+ help="Select example query structures"
425
+ )
426
+
427
+ if query_example_names:
428
+ st.sidebar.success(f"βœ… {len(query_example_names)} query file(s) selected")
429
+
430
+ # Convert names to paths and load files
431
+ try:
432
+ reference_files = [load_example_as_uploaded_file(examples[name]) for name in ref_example_names]
433
+ query_files = [load_example_as_uploaded_file(examples[name]) for name in query_example_names]
434
+
435
+ except Exception as load_error:
436
+ st.sidebar.error(f"Error loading files: {str(load_error)}")
437
+ import traceback
438
+ st.sidebar.code(traceback.format_exc())
439
+ reference_files = []
440
+ query_files = []
441
+ except Exception as e:
442
+ st.sidebar.error(f"❌ Error loading examples: {str(e)}")
443
+ import traceback
444
+ st.sidebar.code(traceback.format_exc())
445
+ reference_files = []
446
+ query_files = []
447
+
448
+ # Show upload status
449
+ if reference_files and query_files:
450
+ st.sidebar.success(f"βœ… {len(reference_files)} reference + {len(query_files)} query structures")
451
+ elif reference_files:
452
+ st.sidebar.info(f"ℹ️ {len(reference_files)} reference structures loaded")
453
+ elif query_files:
454
+ st.sidebar.info(f"ℹ️ {len(query_files)} query structures loaded")
455
+ else:
456
+ st.sidebar.warning("⚠️ Upload or select structures")
457
+
458
+ # Load structure data
459
+ ref_structure_data = []
460
+ query_structure_data = []
461
+
462
+ if reference_files:
463
+ ref_structure_data = load_structure_data(reference_files, temp_dir)
464
+
465
+ if query_files:
466
+ query_structure_data = load_structure_data(query_files, temp_dir)
467
+
468
+ # Track current files to reset selections if files change
469
+ current_ref_files = set([s['name'] for s in ref_structure_data])
470
+ current_query_files = set([s['name'] for s in query_structure_data])
471
+
472
+ if 'current_ref_files' not in st.session_state:
473
+ st.session_state['current_ref_files'] = current_ref_files
474
+ if 'current_query_files' not in st.session_state:
475
+ st.session_state['current_query_files'] = current_query_files
476
+
477
+ # Reset selections if files changed
478
+ if st.session_state['current_ref_files'] != current_ref_files:
479
+ st.session_state['current_ref_files'] = current_ref_files
480
+ st.session_state['ref_selections'] = {}
481
+ if 'ref_auto_initialized' in st.session_state:
482
+ del st.session_state['ref_auto_initialized']
483
+
484
+ if st.session_state['current_query_files'] != current_query_files:
485
+ st.session_state['current_query_files'] = current_query_files
486
+ st.session_state['query_selections'] = {}
487
+ if 'query_auto_initialized' in st.session_state:
488
+ del st.session_state['query_auto_initialized']
489
+
490
+ # Auto-initialize selections (exclude first and last residue by default)
491
+ if 'ref_auto_initialized' not in st.session_state and ref_structure_data:
492
+ for struct in ref_structure_data:
 
493
  num_res = struct['num_residues']
494
+ if num_res > 4:
 
495
  auto_selection = list(range(1, num_res - 1))
496
+ st.session_state['ref_selections'][struct['name']] = auto_selection
497
  else:
498
+ st.session_state['ref_selections'][struct['name']] = list(range(num_res))
499
+ st.session_state['ref_auto_initialized'] = True
 
500
 
501
+ if 'query_auto_initialized' not in st.session_state and query_structure_data:
502
+ for struct in query_structure_data:
503
+ num_res = struct['num_residues']
504
+ if num_res > 4:
505
+ auto_selection = list(range(1, num_res - 1))
506
+ st.session_state['query_selections'][struct['name']] = auto_selection
507
+ else:
508
+ st.session_state['query_selections'][struct['name']] = list(range(num_res))
509
+ st.session_state['query_auto_initialized'] = True
510
 
511
+ # Step 3: Configure Atom Selections in Main Area
512
+ st.markdown("---")
513
+ st.subheader("πŸ”¬ Configure Atom Selections")
514
+ st.info("ℹ️ **Atom Selection:** Backbone + Sugar (default)")
515
+
516
+ # Create two columns for Reference and Query
517
+ col1, col2 = st.columns(2)
518
+
519
+ with col1:
520
+ st.markdown("### πŸ“‹ Reference Structures")
521
+ if ref_structure_data:
522
+ selected_ref_name = st.selectbox(
523
+ "Select structure to configure (excluding two bases in 5' and 3' by default)",
524
+ options=[s['name'] for s in ref_structure_data],
525
+ key="ref_dropdown",
526
+ help="Choose a reference structure to configure its residue selection"
527
+ )
 
 
 
 
 
 
 
 
 
 
 
 
528
 
529
+ selected_ref = next((s for s in ref_structure_data if s['name'] == selected_ref_name), None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
 
531
+ if selected_ref:
532
+ st.markdown(f"**{selected_ref['name']}** ({selected_ref['num_residues']} residues)")
 
 
 
533
 
534
+ # Display residue table
535
+ structure_info = get_structure_info(selected_ref['path'])
536
+ info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
537
+ info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
538
+ info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
539
+ info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
540
+
541
+ with st.expander("πŸ“‹ View Residue Table", expanded=False):
542
+ st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
543
+
544
+ # Selection method
545
+ selection_method = st.radio(
546
+ f"Selection method for {selected_ref['name']}",
547
+ ["Select by range", "Select specific residues", "Use all residues"],
548
+ key=f"method_ref_{selected_ref['name']}",
549
+ index=1,
550
+ horizontal=True
551
+ )
552
+
553
+ selected_indices = []
554
+
555
+ if selection_method == "Select by range":
556
+ current_selection = st.session_state['ref_selections'].get(selected_ref['name'], [])
557
+ default_start = current_selection[0] + 2 if current_selection else 3
558
+ default_end = current_selection[-1] + 1 if current_selection else max(2, len(structure_info) - 2)
559
+
560
+ c1, c2 = st.columns(2)
561
+ with c1:
562
+ start_idx = st.number_input(
563
+ "Start index (1-based)",
564
+ min_value=1,
565
+ max_value=len(structure_info),
566
+ value=default_start,
567
+ key=f"start_ref_{selected_ref['name']}"
568
+ )
569
+ with c2:
570
+ end_idx = st.number_input(
571
+ "End index (1-based, inclusive)",
572
+ min_value=1,
573
+ max_value=len(structure_info),
574
+ value=default_end,
575
+ key=f"end_ref_{selected_ref['name']}"
576
+ )
577
+
578
+ if start_idx <= end_idx:
579
+ selected_indices = list(range(start_idx - 1, end_idx))
580
+ st.success(f"βœ“ Selected residues: {[i+1 for i in selected_indices]}")
581
+ else:
582
+ st.error("Start index must be ≀ end index")
583
+
584
+ elif selection_method == "Select specific residues":
585
+ current_selection = st.session_state['ref_selections'].get(selected_ref['name'], [])
586
+ default_names = [structure_info[i]['full_name'] for i in current_selection] if current_selection else []
587
+
588
+ selected_names = st.multiselect(
589
+ "Select residues",
590
+ options=[info['full_name'] for info in structure_info],
591
+ default=[structure_info[i]['full_name'] for i in range(2, len(structure_info)-2)],
592
+ key=f"specific_ref_{selected_ref['name']}"
593
+ )
594
+
595
+ name_to_idx = {info['full_name']: info['index'] for info in structure_info}
596
+ selected_indices = [name_to_idx[name] for name in selected_names]
597
+ selected_indices.sort()
598
+
599
+ if selected_indices:
600
+ st.success(f"βœ“ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
601
+
602
+ else: # Use all residues
603
+ selected_indices = list(range(len(structure_info)))
604
+ st.info(f"βœ“ Using all {len(selected_indices)} residues")
605
+
606
+ # Save button
607
+ if st.button(f"πŸ’Ύ Save Selection for {selected_ref['name']}", type="primary", key=f"save_ref_{selected_ref['name']}"):
608
+ st.session_state['ref_selections'][selected_ref['name']] = selected_indices
609
+ st.success(f"βœ… Saved selection for {selected_ref['name']}")
610
+
611
+ # Show current saved selection
612
+ if selected_ref['name'] in st.session_state['ref_selections']:
613
+ saved_indices = st.session_state['ref_selections'][selected_ref['name']]
614
+ st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}")
615
+ else:
616
+ st.info("Upload reference structures to configure")
617
+
618
+ with col2:
619
+ st.markdown("### πŸ“‹ Query Structures")
620
+ if query_structure_data:
621
+ selected_query_name = st.selectbox(
622
+ "Select structure to configure (excluding two bases in 5' and 3' by default)",
623
+ options=[s['name'] for s in query_structure_data],
624
+ key="query_dropdown",
625
+ help="Choose a query structure to configure its residue selection"
626
  )
627
 
628
+ selected_query = next((s for s in query_structure_data if s['name'] == selected_query_name), None)
 
 
629
 
630
+ if selected_query:
631
+ st.markdown(f"**{selected_query['name']}** ({selected_query['num_residues']} residues)")
632
 
633
+ # Display residue table
634
+ structure_info = get_structure_info(selected_query['path'])
635
+ info_df = pd.DataFrame(structure_info)[['index', 'resnum', 'resname']]
636
+ info_df.columns = ['Index (0-based)', 'Residue Number', 'Base Type']
637
+ info_df['Index (1-based)'] = info_df['Index (0-based)'] + 1
638
+ info_df = info_df[['Index (1-based)', 'Index (0-based)', 'Residue Number', 'Base Type']]
639
+
640
+ with st.expander("πŸ“‹ View Residue Table", expanded=False):
641
+ st.dataframe(info_df, use_container_width=True, height=min(300, len(structure_info) * 35 + 38))
642
+
643
+ # Selection method
644
+ selection_method = st.radio(
645
+ f"Selection method for {selected_query['name']}",
646
+ ["Select by range", "Select specific residues", "Use all residues"],
647
+ key=f"method_query_{selected_query['name']}",
648
+ index=1,
649
+ horizontal=True
650
+ )
651
+
652
+ selected_indices = []
653
+
654
+ if selection_method == "Select by range":
655
+ current_selection = st.session_state['query_selections'].get(selected_query['name'], [])
656
+ default_start = current_selection[0] + 2 if current_selection else 3
657
+ default_end = current_selection[-1] + 1 if current_selection else max(2, len(structure_info) - 2)
658
+
659
+ c1, c2 = st.columns(2)
660
+ with c1:
661
+ start_idx = st.number_input(
662
+ "Start index (1-based)",
663
+ min_value=1,
664
+ max_value=len(structure_info),
665
+ value=default_start,
666
+ key=f"start_query_{selected_query['name']}"
667
+ )
668
+ with c2:
669
+ end_idx = st.number_input(
670
+ "End index (1-based, inclusive)",
671
+ min_value=1,
672
+ max_value=len(structure_info),
673
+ value=default_end,
674
+ key=f"end_query_{selected_query['name']}"
675
+ )
676
+
677
+ if start_idx <= end_idx:
678
+ selected_indices = list(range(start_idx - 1, end_idx))
679
+ st.success(f"βœ“ Selected residues: {[i+1 for i in selected_indices]}")
680
+ else:
681
+ st.error("Start index must be ≀ end index")
682
+
683
+ elif selection_method == "Select specific residues":
684
+ current_selection = st.session_state['query_selections'].get(selected_query['name'], [])
685
+ default_names = [structure_info[i]['full_name'] for i in current_selection] if current_selection else []
686
+
687
+ selected_names = st.multiselect(
688
+ "Select residues",
689
+ options=[info['full_name'] for info in structure_info],
690
+ default=[structure_info[i]['full_name'] for i in range(2, len(structure_info)-2)],
691
+ key=f"specific_query_{selected_query['name']}"
692
+ )
693
+
694
+ name_to_idx = {info['full_name']: info['index'] for info in structure_info}
695
+ selected_indices = [name_to_idx[name] for name in selected_names]
696
+ selected_indices.sort()
697
+
698
+ if selected_indices:
699
+ st.success(f"βœ“ Selected {len(selected_indices)} residues: {[i+1 for i in selected_indices]}")
700
+
701
+ else: # Use all residues
702
+ selected_indices = list(range(len(structure_info)))
703
+ st.info(f"βœ“ Using all {len(selected_indices)} residues")
704
+
705
+ # Save button
706
+ if st.button(f"πŸ’Ύ Save Selection for {selected_query['name']}", type="primary", key=f"save_query_{selected_query['name']}"):
707
+ st.session_state['query_selections'][selected_query['name']] = selected_indices
708
+ st.success(f"βœ… Saved selection for {selected_query['name']}")
709
+
710
+ # Show current saved selection
711
+ if selected_query['name'] in st.session_state['query_selections']:
712
+ saved_indices = st.session_state['query_selections'][selected_query['name']]
713
+ st.info(f"**Current saved selection:** {len(saved_indices)} residues: {[i+1 for i in saved_indices]}")
714
+ else:
715
+ st.info("Upload query structures to configure")
716
 
717
+ # Step 4: Window Configuration
718
+ st.sidebar.subheader("3️⃣ Window Configuration")
 
719
 
720
  # Check if all structures have selections
721
+ all_ref_have_selections = all(s['name'] in st.session_state['ref_selections'] for s in ref_structure_data)
722
+ all_query_have_selections = all(s['name'] in st.session_state['query_selections'] for s in query_structure_data)
723
 
724
+ if all_ref_have_selections and all_query_have_selections and ref_structure_data and query_structure_data:
725
+ # Find minimum selection size
726
+ all_selections = list(st.session_state['ref_selections'].values()) + list(st.session_state['query_selections'].values())
727
+ min_selection_size = min(len(sel) for sel in all_selections)
728
 
729
  window_size = st.sidebar.number_input(
730
  "Window Size",
 
742
  help="Contiguous: sliding windows. Non-contiguous: all combinations"
743
  )
744
  else:
745
+ st.sidebar.warning("⚠️ Configure selections first")
746
  window_size = 4
747
  window_type = "contiguous"
748
 
749
+ # Step 5: Run Analysis
750
+ st.sidebar.subheader("4️⃣ Run Analysis")
 
751
 
752
+ can_run = (all_ref_have_selections and all_query_have_selections and
753
+ ref_structure_data and query_structure_data)
754
+
755
+ if st.sidebar.button("πŸš€ Run Pairwise Analysis", type="primary", disabled=not can_run):
756
+ if not can_run:
757
+ st.error("Please upload and configure both reference and query structures")
 
 
 
 
 
 
 
 
 
 
 
758
  return
759
 
760
  # Run comparisons
761
  with st.spinner("Analyzing structures..."):
762
  results = []
763
 
764
+ # For each reference structure
765
+ for ref_struct in ref_structure_data:
766
+ ref_indices = st.session_state['ref_selections'][ref_struct['name']]
767
+ ref_windows = generate_windows_from_selection(ref_indices, window_size, window_type)
 
 
768
 
769
+ if not ref_windows:
770
+ continue
771
+
772
+ # For each reference window
773
+ for ref_window in ref_windows:
774
+ # Extract reference coords
775
+ ref_coords = extract_window_coords(ref_struct['residues'], ref_window)
776
+ ref_com = calculate_COM(ref_coords)
777
+ ref_sequence = ''.join([ref_struct['residues'][i]['resname'] for i in ref_window])
778
 
779
+ # Compare against all query structures
780
+ for query_struct in query_structure_data:
781
+ query_indices = st.session_state['query_selections'][query_struct['name']]
782
+ query_windows = generate_windows_from_selection(query_indices, window_size, window_type)
 
 
 
 
 
 
 
 
783
 
784
+ for query_window in query_windows:
785
+ # Extract query coords
786
+ query_coords = extract_window_coords(query_struct['residues'], query_window)
787
+ query_com = calculate_COM(query_coords)
788
+ query_sequence = ''.join([query_struct['residues'][i]['resname'] for i in query_window])
789
+
790
+ # Calculate RMSD
791
+ U, RMSD = calculate_rotation_rmsd(ref_coords, query_coords, ref_com, query_com)
792
+
793
+ if U is None or RMSD is None:
794
+ RMSD = 999.0
795
+ U = np.eye(3)
796
+
797
+ results.append({
798
+ 'Reference': ref_struct['name'],
799
+ 'Ref_Window': ref_window,
800
+ 'Ref_Sequence': ref_sequence,
801
+ 'Query': query_struct['name'],
802
+ 'Query_Window': query_window,
803
+ 'Query_Sequence': query_sequence,
804
+ 'RMSD': RMSD,
805
+ 'Rotation_Matrix': U,
806
+ 'Ref_COM': ref_com,
807
+ 'Query_COM': query_com,
808
+ 'Ref_Path': ref_struct['path'],
809
+ 'Query_Path': query_struct['path']
810
+ })
811
 
812
  results_df = pd.DataFrame(results)
813
  st.session_state['results'] = results_df
814
+ st.session_state['ref_structure_data'] = ref_structure_data
815
+ st.session_state['query_structure_data'] = query_structure_data
816
 
817
  st.success(f"βœ… Analysis complete! {len(results_df)} comparisons performed.")
818
 
819
  # Display results
820
  if 'results' in st.session_state:
821
  results_df = st.session_state['results']
 
 
822
 
823
  st.markdown("---")
824
  st.subheader("πŸ“Š Results Summary")
 
839
  with col2:
840
  st.metric("Comparisons Below Threshold", f"{len(filtered_df)} / {len(results_df)}")
841
 
842
+ # Best match per Reference-Query pair
843
+ st.markdown("### πŸ† Best Match per Reference-Query Pair")
 
844
 
845
+ if len(filtered_df) > 0:
846
+ # Group by Reference and Query to find best match for each pair
847
+ best_matches = filtered_df.loc[filtered_df.groupby(['Reference', 'Query'])['RMSD'].idxmin()]
848
+
849
+ best_display = best_matches[['Reference', 'Query', 'Ref_Sequence', 'Query_Sequence', 'RMSD']].copy()
850
+ best_display['RMSD'] = best_display['RMSD'].round(3)
851
+ best_display.columns = ['Reference', 'Query', 'Ref Sequence', 'Query Sequence', 'RMSD (Γ…)']
852
+ st.dataframe(best_display, use_container_width=True)
853
+ else:
854
+ st.warning("No matches found below threshold")
855
 
856
  # Full results
857
  with st.expander("πŸ“‹ All Comparison Results"):
858
+ if len(filtered_df) > 0:
859
+ display_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy()
860
+
861
+ # Format the window indices to be 1-based
862
+ display_df['Ref_Residues'] = display_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
863
+ display_df['Query_Residues'] = display_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
864
+
865
+ # Reorder columns
866
+ display_df = display_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']]
867
+ display_df['RMSD'] = display_df['RMSD'].round(3)
868
+ display_df = display_df.sort_values('RMSD').reset_index(drop=True)
869
+
870
+ # Rename columns for better display
871
+ display_df.columns = ['Reference', 'Ref_Indices', 'Ref_Sequence', 'Query', 'Query_Indices', 'Query_Sequence', 'RMSD (Γ…)']
872
+
873
+ st.dataframe(display_df, use_container_width=True, height=400)
874
+ else:
875
+ st.info("No results to display")
876
 
877
  # Visualization
878
  st.markdown("---")
879
  st.subheader("πŸ”¬ 3D Structure Visualization")
880
 
881
+ if len(filtered_df) > 0:
882
+ st.markdown("**Select a comparison to visualize:**")
883
+
884
+ # Create dropdown options
885
+ viz_options = []
886
+ for idx, row in filtered_df.iterrows():
887
+ ref_res_str = ','.join([str(i+1) for i in row['Ref_Window']])
888
+ query_res_str = ','.join([str(i+1) for i in row['Query_Window']])
889
+ option_text = f"{row['Reference']}[{ref_res_str}] ({row['Ref_Sequence']}) vs {row['Query']}[{query_res_str}] ({row['Query_Sequence']}) | RMSD: {row['RMSD']:.3f} Γ…"
890
+ viz_options.append((idx, option_text))
891
+
892
+ # Sort by RMSD
893
  viz_options.sort(key=lambda x: filtered_df.loc[x[0], 'RMSD'])
894
 
895
  selected_viz_idx = st.selectbox(
 
905
  # Import visualization function
906
  from visualization_multi import create_pairwise_visualization
907
 
908
+ # Create visualization
909
  try:
910
  viz_html = create_pairwise_visualization(
911
  ref_path=selected_comparison['Ref_Path'],
 
928
  st.code(traceback.format_exc())
929
  else:
930
  st.warning("No comparisons below RMSD threshold to visualize")
931
+
932
+ # Export Results
933
+ st.markdown("---")
934
+ st.subheader("πŸ’Ύ Export Results")
935
+
936
+ col1, col2 = st.columns(2)
937
+
938
+ with col1:
939
+ st.markdown("**Download Results Table**")
940
+ if len(filtered_df) > 0:
941
+ export_df = filtered_df[['Reference', 'Ref_Window', 'Ref_Sequence', 'Query', 'Query_Window', 'Query_Sequence', 'RMSD']].copy()
942
+ export_df['Ref_Residues'] = export_df['Ref_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
943
+ export_df['Query_Residues'] = export_df['Query_Window'].apply(lambda x: ','.join([str(i+1) for i in x]))
944
+ export_df = export_df[['Reference', 'Ref_Residues', 'Ref_Sequence', 'Query', 'Query_Residues', 'Query_Sequence', 'RMSD']]
945
+ export_df = export_df.sort_values('RMSD').reset_index(drop=True)
946
+
947
+ csv = export_df.to_csv(index=False)
948
+ st.download_button(
949
+ label="πŸ“₯ Download Results (CSV)",
950
+ data=csv,
951
+ file_name="rna_pairwise_comparison_results.csv",
952
+ mime="text/csv"
953
+ )
954
+ else:
955
+ st.info("No results to export")
956
+
957
+ with col2:
958
+ st.markdown("**Download Aligned Structures**")
959
+ if len(filtered_df) > 0 and st.button("πŸ“¦ Generate PDB Archive"):
960
+ with st.spinner("Creating archive..."):
961
+ import zipfile
962
+ from visualization_multi import extract_window_pdb, transform_pdb_string
963
+
964
+ zip_buffer = io.BytesIO()
965
+
966
+ with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
967
+ for idx, row in filtered_df.iterrows():
968
+ comp_name = f"comp_{idx:03d}_rmsd_{row['RMSD']:.3f}"
969
+
970
+ # Reference
971
+ ref_pdb = extract_window_pdb(row['Ref_Path'], row['Ref_Window'])
972
+ zip_file.writestr(f"{comp_name}/reference.pdb", ref_pdb)
973
+
974
+ # Query original
975
+ query_pdb = extract_window_pdb(row['Query_Path'], row['Query_Window'])
976
+ zip_file.writestr(f"{comp_name}/query_original.pdb", query_pdb)
977
+
978
+ # Query aligned
979
+ query_aligned = transform_pdb_string(
980
+ query_pdb,
981
+ row['Rotation_Matrix'],
982
+ row['Query_COM'],
983
+ row['Ref_COM']
984
+ )
985
+ zip_file.writestr(f"{comp_name}/query_aligned.pdb", query_aligned)
986
+
987
+ # README
988
+ readme = f"""Comparison #{idx}
989
+ RMSD: {row['RMSD']:.3f} Γ…
990
+ Atom Selection: Backbone + Sugar (default)
991
+
992
+ Reference: {row['Reference']}
993
+ Residues: {','.join([str(i+1) for i in row['Ref_Window']])}
994
+ Sequence: {row['Ref_Sequence']}
995
+
996
+ Query: {row['Query']}
997
+ Residues: {','.join([str(i+1) for i in row['Query_Window']])}
998
+ Sequence: {row['Query_Sequence']}
999
+ """
1000
+ zip_file.writestr(f"{comp_name}/README.txt", readme)
1001
+
1002
+ zip_buffer.seek(0)
1003
+
1004
+ st.download_button(
1005
+ label="πŸ“₯ Download ZIP",
1006
+ data=zip_buffer.getvalue(),
1007
+ file_name="aligned_structures.zip",
1008
+ mime="application/zip"
1009
+ )
1010
 
1011
 
1012
  if __name__ == "__main__":