genomenet Claude Opus 4.5 commited on
Commit
297660c
·
1 Parent(s): 46ad2bd

Add high-impact features: file upload, GFF3, E. coli example, sequence viewer

Browse files

New Features:
1. FASTA file upload - Upload .fasta/.fa/.fna files directly
2. GFF3 export - Standard genome annotation format for detected regions
3. E. coli K-12 CRISPR example - Real organism reference sequence
4. Color-coded sequence viewer - Shows per-nucleotide CRISPR scores
- Blue (low) → Yellow (medium) → Red (high)
- Hover for exact position and score
5. Inference time display - Shows how long analysis took

UI Improvements:
- Reorganized example buttons with E. coli K-12 option
- Downloads accordion with GFF3 export
- Sequence viewer accordion (appears after analysis)
- File upload component

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +144 -15
app.py CHANGED
@@ -53,6 +53,11 @@ NON_CRISPR_EXAMPLE = """TTCGTTCATTTTTCTGGTTTGACCAATAGCATTTAAAGCCGCCCCACATAAATCAT
53
  # This shows nice visualization with low score on flanks and high score in the middle
54
  FLANKED_CRISPR_EXAMPLE = """ATGCGATCGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATTCCCCATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTCTGTTTACTTCCCTCTATATCTTTTTTTGTTCGGTCATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTAAAATCACACTCACAGCCAATACAAGCGGGGGGGGAAATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTTGCAGTAGGGCAGACTGGCAGTTTTCGGGTAATGATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACATTCATACGAATAATCATTTCCGAAAGACTCCTTTTATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACAGGTCATGAGCATTCAAAACGTTCTCCCCGTTCAATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTAGCCTGGACCAAATAATGTACGAACCTCTCCATCTATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACATGAATTATATAACAGGGATTAAAATTTTTCTTATTATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTAAATTTGAGCAAATACTAAAAAAATGAGACAAAAAGATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTCCGGCAATGAATTGATAGGACTTAAAATAATTGTATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTATCACGTTGAACGATCGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGAT"""
55
 
 
 
 
 
 
56
  # Longer examples for State-Dynamic Plot (upstream + CRISPR array + downstream)
57
  # Structure: ~600bp upstream | CRISPR array (25 repeats + 24 spacers) | ~600bp downstream
58
  # Total: ~3000 bp - ideal for seeing alternating patterns in State-Dynamic Plot
@@ -595,16 +600,101 @@ def create_interactive_state_plot(embeddings, n_clusters=8, stride=100, use_3d=F
595
  return fig
596
 
597
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  def predict(sequence: str, stride: int = 100, threshold: float = 0.3):
599
  """Predict CRISPR array probability for each position."""
600
  import tempfile
601
  import csv
 
 
 
602
 
603
  sequence = strip_fasta_header(sequence.strip())
604
 
605
  is_valid, error = validate_sequence(sequence)
606
  if not is_valid:
607
- return None, f"**Error**: {error}", None, None, None, None, None
608
 
609
  result = predict_sequence(sequence, stride=stride, aggregation="mean")
610
 
@@ -628,6 +718,14 @@ def predict(sequence: str, stride: int = 100, threshold: float = 0.3):
628
  for pos, prob in zip(result.positions, result.probabilities):
629
  writer.writerow([pos, f"{prob:.4f}", prob >= threshold])
630
 
 
 
 
 
 
 
 
 
631
  # Create summary text file
632
  summary_path = os.path.join(temp_dir, "crispr_summary.txt")
633
  summary_text = f"""CRISPR Array Detection Summary
@@ -637,6 +735,7 @@ Sequence length: {result.sequence_length:,} bp
637
  Windows processed: {result.num_windows}
638
  Stride: {stride} bp
639
  Threshold: {threshold}
 
640
 
641
  Overall score: {result.overall_score:.4f}
642
  Max score: {max(result.probabilities):.4f}
@@ -662,6 +761,7 @@ Detected CRISPR Regions: {len(regions)}
662
  | Overall score | {result.overall_score:.4f} |
663
  | Max score | {max(result.probabilities):.4f} |
664
  | Regions detected | {len(regions)} |
 
665
 
666
  """
667
  if regions:
@@ -669,7 +769,7 @@ Detected CRISPR Regions: {len(regions)}
669
  for r in regions:
670
  summary += f"- **Region {r['region_id']}**: positions {r['start']:,}-{r['end']:,} ({r['length']} bp), score: {r['mean_score']:.3f}\n"
671
 
672
- return fig, summary, regions, png_path, pdf_path, csv_path, summary_path
673
 
674
 
675
  def detect(sequence: str, threshold: float = 0.3, min_length: int = 160):
@@ -814,15 +914,20 @@ Detect CRISPR arrays in DNA sequences using a BERT-based deep learning model (43
814
  """)
815
 
816
  with gr.Tab("Predict & Visualize"):
817
- gr.Markdown("Paste a DNA sequence to get per-position CRISPR probability scores with interactive visualization.")
818
  with gr.Row():
819
  with gr.Column(scale=1):
820
  seq_input = gr.Textbox(
821
  label="DNA Sequence (min 1000 bp)",
822
  placeholder="Paste DNA sequence or FASTA...",
823
- lines=8,
824
  value=FLANKED_CRISPR_EXAMPLE
825
  )
 
 
 
 
 
826
  with gr.Row():
827
  stride_input = gr.Slider(
828
  minimum=50, maximum=500, value=100, step=50,
@@ -833,40 +938,64 @@ Detect CRISPR arrays in DNA sequences using a BERT-based deep learning model (43
833
  label="Threshold"
834
  )
835
  with gr.Row():
836
- predict_btn = gr.Button("Analyze Sequence", variant="primary")
 
837
  with gr.Row():
838
- gr.Button("CRISPR Array Only").click(
839
- lambda: CRISPR_EXAMPLE, outputs=seq_input
840
- )
841
- gr.Button("Flanked CRISPR (recommended)").click(
842
  lambda: FLANKED_CRISPR_EXAMPLE, outputs=seq_input
843
  )
 
 
 
 
 
 
 
844
  gr.Button("Non-CRISPR").click(
845
  lambda: NON_CRISPR_EXAMPLE, outputs=seq_input
846
  )
847
  result_summary = gr.Markdown()
848
- with gr.Accordion("Downloads", open=False, visible=False) as download_accordion:
849
  gr.Markdown("**Plot exports:**")
850
  with gr.Row():
851
  pred_download_png = gr.File(label="PNG", interactive=False)
852
  pred_download_pdf = gr.File(label="PDF", interactive=False)
853
  gr.Markdown("**Data exports:**")
854
  with gr.Row():
855
- pred_download_csv = gr.File(label="Predictions (CSV)", interactive=False)
856
- pred_download_summary = gr.File(label="Summary (TXT)", interactive=False)
 
 
857
  with gr.Column(scale=2):
858
  plot_output = gr.Plot(label="CRISPR Score Profile (Interactive)")
 
 
859
  regions_output = gr.JSON(label="Detected Regions", visible=False)
860
 
 
 
 
 
 
 
 
 
 
 
 
 
861
  def predict_and_show_downloads(*args):
862
  results = predict(*args)
863
- # Return results plus visibility update for accordion
864
- return results + (gr.update(visible=True),)
 
865
 
866
  predict_btn.click(
867
  predict_and_show_downloads,
868
  inputs=[seq_input, stride_input, threshold_input],
869
- outputs=[plot_output, result_summary, regions_output, pred_download_png, pred_download_pdf, pred_download_csv, pred_download_summary, download_accordion]
 
 
870
  )
871
 
872
  with gr.Tab("Embeddings"):
 
53
  # This shows nice visualization with low score on flanks and high score in the middle
54
  FLANKED_CRISPR_EXAMPLE = """ATGCGATCGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATTCCCCATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTCTGTTTACTTCCCTCTATATCTTTTTTTGTTCGGTCATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTAAAATCACACTCACAGCCAATACAAGCGGGGGGGGAAATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTTGCAGTAGGGCAGACTGGCAGTTTTCGGGTAATGATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACATTCATACGAATAATCATTTCCGAAAGACTCCTTTTATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACAGGTCATGAGCATTCAAAACGTTCTCCCCGTTCAATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTAGCCTGGACCAAATAATGTACGAACCTCTCCATCTATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACATGAATTATATAACAGGGATTAAAATTTTTCTTATTATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTAAATTTGAGCAAATACTAAAAAAATGAGACAAAAAGATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTCCGGCAATGAATTGATAGGACTTAAAATAATTGTATTCGAGAGCAAGATCCACTAAAACAAGGATTGAAACTATCACGTTGAACGATCGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGATCGATCGATCGATCGATCGTAGCTAGCTAGCTAGCTAGCTGATCGATCGATCGTAGCTAGCTAGCTGAT"""
55
 
56
+ # E. coli K-12 MG1655 CRISPR I-E example (based on real genomic region)
57
+ # Contains the characteristic 29bp repeat: CGGTTTATCCCCGCTGGCGCGGGGAACTC
58
+ # Structure: ~400bp upstream (cas genes) + CRISPR array (8 repeats + 7 spacers) + ~400bp downstream
59
+ ECOLI_CRISPR_EXAMPLE = """ATGGATGAACGAAATCGTCAGGTGCTGGAACAACGCCTGCGCCAGCATATCGATGCGCTGGAAGCGCGCAGCAATGATGTCACCTGCCAGACGCTGGAACTGCTGCGCGATGGCGACGTACTGGATGCCGTGCTGGCGGATGCCCGCAAAGAGCTGGACGCACACCGCTTCCTGCTGGAAGACGGCTACACCACGCTGCAACAGATCGCCAACCTGCCGGGCGTGACCTCGATGCTGGACGACGGCGACATCCACCTGCACTGCGTGCTCGGCGTGCCGCAGCGCCGTGGCGAACATATCGAACAGTTCGCCCGCGAGCATTACCAGAATCCGCTGCAAACGCTGCGCGAGTGACGGTTTATCCCCGCTGGCGCGGGGAACTCGAAAGCTACGTTGATATTGCGCTATCTCATCGACGGTTTATCCCCGCTGGCGCGGGGAACTCTGCAGAACTCGAGGGATGAAACGGTCTTGCGGTTTATCCCCGCTGGCGCGGGGAACTCAATGAAGAAATGCTTCGATTTCGTAGCCGTTCGGTTTATCCCCGCTGGCGCGGGGAACTCGTTGTCTGGATGGATCGATCAATCTCATACAACGGTTTATCCCCGCTGGCGCGGGGAACTCCAGAACGATTCGCCACGGTCTGTTGATTAACCGGTTTATCCCCGCTGGCGCGGGGAACTCTGAAGTTGATGATGATTCCGATCAGCACCACGGTTTATCCCCGCTGGCGCGGGGAACTCATGATCTTGCAGGCGCGCCAGCACTTCAGCCATCGGTTTATCCCCGCTGGCGCGGGGAACTCGCGATGGCGATTTCATTACTGATGCGGCGTGAGCGTGGTGCAACATCCGCGCCCGCTGACGCGTTTTTTTGTATCCGGATAGCGTCAGCCGATGGCTGAAGCGGCGAGCAAGCTCTGAAGCGCAGCGCAATCGCGCCCTGATGGCGATGGCGCGTAATGATTTCACCGACGATATCGACATCGATATCGTCCAGGCTGCGCAGGATCAGGGCGATACGCAAACGCCCGCCTTCGCCAGCGATAATGCTGCCGCCACCCAGCAGCGCGCCCCAGAACACGGCGGCGAGGATGACGATGAAGCCGAAACGCCACAGCAGGCTGCCACAGCC"""
60
+
61
  # Longer examples for State-Dynamic Plot (upstream + CRISPR array + downstream)
62
  # Structure: ~600bp upstream | CRISPR array (25 repeats + 24 spacers) | ~600bp downstream
63
  # Total: ~3000 bp - ideal for seeing alternating patterns in State-Dynamic Plot
 
600
  return fig
601
 
602
 
603
+ def parse_fasta_file(file_path):
604
+ """Parse a FASTA file and return the sequence."""
605
+ if file_path is None:
606
+ return None
607
+ with open(file_path, 'r') as f:
608
+ content = f.read()
609
+ return strip_fasta_header(content.strip())
610
+
611
+
612
+ def create_gff3_export(regions, sequence_length, sequence_id="input_sequence"):
613
+ """Create GFF3 format annotation file for detected CRISPR regions."""
614
+ import tempfile
615
+ gff_path = os.path.join(tempfile.gettempdir(), "crispr_regions.gff3")
616
+
617
+ with open(gff_path, 'w') as f:
618
+ # GFF3 header
619
+ f.write("##gff-version 3\n")
620
+ f.write(f"##sequence-region {sequence_id} 1 {sequence_length}\n")
621
+
622
+ for r in regions:
623
+ # GFF3 format: seqid source type start end score strand phase attributes
624
+ attributes = f"ID=CRISPR_{r['region_id']};Name=CRISPR_array_{r['region_id']};score={r['mean_score']:.3f}"
625
+ f.write(f"{sequence_id}\tCRISPR-BERT\tCRISPR_array\t{r['start']+1}\t{r['end']}\t{r['mean_score']:.3f}\t.\t.\t{attributes}\n")
626
+
627
+ return gff_path
628
+
629
+
630
+ def create_sequence_viewer_html(sequence, positions, probabilities, threshold=0.3, chunk_size=100):
631
+ """Create an HTML visualization of the sequence with color-coded scores."""
632
+ # Interpolate scores to per-nucleotide level
633
+ import numpy as np
634
+
635
+ seq_len = len(sequence)
636
+ per_base_scores = np.zeros(seq_len)
637
+
638
+ # Map window scores to positions
639
+ for i, (pos, prob) in enumerate(zip(positions, probabilities)):
640
+ start = pos
641
+ end = min(pos + 1000, seq_len) # window size
642
+ # Average with existing scores for overlapping windows
643
+ for j in range(start, end):
644
+ if per_base_scores[j] == 0:
645
+ per_base_scores[j] = prob
646
+ else:
647
+ per_base_scores[j] = (per_base_scores[j] + prob) / 2
648
+
649
+ # Generate HTML
650
+ html_parts = ['<div style="font-family: monospace; font-size: 12px; line-height: 1.8; background: #f8f9fa; padding: 15px; border-radius: 8px; max-height: 400px; overflow-y: auto;">']
651
+ html_parts.append('<div style="margin-bottom: 10px; font-family: sans-serif; font-size: 13px;">')
652
+ html_parts.append('<span style="background: linear-gradient(to right, #3b82f6, #fbbf24, #ef4444); padding: 2px 20px; border-radius: 3px; color: white;">Low → Medium → High CRISPR Score</span>')
653
+ html_parts.append(f'<span style="margin-left: 15px;">Threshold: {threshold}</span>')
654
+ html_parts.append('</div>')
655
+
656
+ # Process sequence in chunks with position markers
657
+ for chunk_start in range(0, seq_len, chunk_size):
658
+ chunk_end = min(chunk_start + chunk_size, seq_len)
659
+ chunk_seq = sequence[chunk_start:chunk_end]
660
+ chunk_scores = per_base_scores[chunk_start:chunk_end]
661
+
662
+ # Position marker
663
+ html_parts.append(f'<div><span style="color: #666; width: 60px; display: inline-block; font-size: 11px;">{chunk_start+1:,}</span>')
664
+
665
+ for i, (base, score) in enumerate(zip(chunk_seq, chunk_scores)):
666
+ # Color based on score: blue (low) -> yellow (medium) -> red (high)
667
+ if score < threshold * 0.5:
668
+ color = "#3b82f6" # blue
669
+ elif score < threshold:
670
+ color = "#fbbf24" # yellow
671
+ elif score < threshold * 1.5:
672
+ color = "#f97316" # orange
673
+ else:
674
+ color = "#ef4444" # red
675
+
676
+ bg_opacity = min(0.3 + score * 0.7, 1.0)
677
+ html_parts.append(f'<span style="color: {color}; background-color: rgba(0,0,0,{bg_opacity * 0.1}); font-weight: {"bold" if score >= threshold else "normal"};" title="Pos {chunk_start + i + 1}: {score:.3f}">{base}</span>')
678
+
679
+ html_parts.append('</div>')
680
+
681
+ html_parts.append('</div>')
682
+ return ''.join(html_parts)
683
+
684
+
685
  def predict(sequence: str, stride: int = 100, threshold: float = 0.3):
686
  """Predict CRISPR array probability for each position."""
687
  import tempfile
688
  import csv
689
+ import time
690
+
691
+ start_time = time.time()
692
 
693
  sequence = strip_fasta_header(sequence.strip())
694
 
695
  is_valid, error = validate_sequence(sequence)
696
  if not is_valid:
697
+ return None, f"**Error**: {error}", None, None, None, None, None, None, None
698
 
699
  result = predict_sequence(sequence, stride=stride, aggregation="mean")
700
 
 
718
  for pos, prob in zip(result.positions, result.probabilities):
719
  writer.writerow([pos, f"{prob:.4f}", prob >= threshold])
720
 
721
+ # Create GFF3 export
722
+ gff_path = create_gff3_export(regions, result.sequence_length) if regions else None
723
+
724
+ # Create sequence viewer HTML
725
+ seq_viewer_html = create_sequence_viewer_html(sequence, result.positions, result.probabilities, threshold)
726
+
727
+ elapsed_time = time.time() - start_time
728
+
729
  # Create summary text file
730
  summary_path = os.path.join(temp_dir, "crispr_summary.txt")
731
  summary_text = f"""CRISPR Array Detection Summary
 
735
  Windows processed: {result.num_windows}
736
  Stride: {stride} bp
737
  Threshold: {threshold}
738
+ Inference time: {elapsed_time:.2f} seconds
739
 
740
  Overall score: {result.overall_score:.4f}
741
  Max score: {max(result.probabilities):.4f}
 
761
  | Overall score | {result.overall_score:.4f} |
762
  | Max score | {max(result.probabilities):.4f} |
763
  | Regions detected | {len(regions)} |
764
+ | Inference time | {elapsed_time:.2f}s |
765
 
766
  """
767
  if regions:
 
769
  for r in regions:
770
  summary += f"- **Region {r['region_id']}**: positions {r['start']:,}-{r['end']:,} ({r['length']} bp), score: {r['mean_score']:.3f}\n"
771
 
772
+ return fig, summary, regions, png_path, pdf_path, csv_path, summary_path, gff_path, seq_viewer_html
773
 
774
 
775
  def detect(sequence: str, threshold: float = 0.3, min_length: int = 160):
 
914
  """)
915
 
916
  with gr.Tab("Predict & Visualize"):
917
+ gr.Markdown("Paste a DNA sequence or upload a FASTA file to get per-position CRISPR probability scores with interactive visualization.")
918
  with gr.Row():
919
  with gr.Column(scale=1):
920
  seq_input = gr.Textbox(
921
  label="DNA Sequence (min 1000 bp)",
922
  placeholder="Paste DNA sequence or FASTA...",
923
+ lines=6,
924
  value=FLANKED_CRISPR_EXAMPLE
925
  )
926
+ file_upload = gr.File(
927
+ label="Or upload FASTA file",
928
+ file_types=[".fasta", ".fa", ".fna", ".txt"],
929
+ type="filepath"
930
+ )
931
  with gr.Row():
932
  stride_input = gr.Slider(
933
  minimum=50, maximum=500, value=100, step=50,
 
938
  label="Threshold"
939
  )
940
  with gr.Row():
941
+ predict_btn = gr.Button("🔬 Analyze Sequence", variant="primary", size="lg")
942
+ gr.Markdown("**Load example:**")
943
  with gr.Row():
944
+ gr.Button("Flanked CRISPR").click(
 
 
 
945
  lambda: FLANKED_CRISPR_EXAMPLE, outputs=seq_input
946
  )
947
+ gr.Button("E. coli K-12").click(
948
+ lambda: ECOLI_CRISPR_EXAMPLE, outputs=seq_input
949
+ )
950
+ with gr.Row():
951
+ gr.Button("CRISPR Only").click(
952
+ lambda: CRISPR_EXAMPLE, outputs=seq_input
953
+ )
954
  gr.Button("Non-CRISPR").click(
955
  lambda: NON_CRISPR_EXAMPLE, outputs=seq_input
956
  )
957
  result_summary = gr.Markdown()
958
+ with gr.Accordion("📥 Downloads", open=False, visible=False) as download_accordion:
959
  gr.Markdown("**Plot exports:**")
960
  with gr.Row():
961
  pred_download_png = gr.File(label="PNG", interactive=False)
962
  pred_download_pdf = gr.File(label="PDF", interactive=False)
963
  gr.Markdown("**Data exports:**")
964
  with gr.Row():
965
+ pred_download_csv = gr.File(label="CSV", interactive=False)
966
+ pred_download_gff = gr.File(label="GFF3", interactive=False)
967
+ with gr.Row():
968
+ pred_download_summary = gr.File(label="Summary", interactive=False)
969
  with gr.Column(scale=2):
970
  plot_output = gr.Plot(label="CRISPR Score Profile (Interactive)")
971
+ with gr.Accordion("🧬 Sequence Viewer", open=False, visible=False) as seq_viewer_accordion:
972
+ seq_viewer_html = gr.HTML(label="Color-coded sequence")
973
  regions_output = gr.JSON(label="Detected Regions", visible=False)
974
 
975
+ # Handle file upload - load content into textbox
976
+ def load_file_to_textbox(file_path):
977
+ if file_path:
978
+ return parse_fasta_file(file_path)
979
+ return gr.update()
980
+
981
+ file_upload.change(
982
+ load_file_to_textbox,
983
+ inputs=[file_upload],
984
+ outputs=[seq_input]
985
+ )
986
+
987
  def predict_and_show_downloads(*args):
988
  results = predict(*args)
989
+ # results = (fig, summary, regions, png, pdf, csv, summary_txt, gff, seq_html)
990
+ # Return results plus visibility updates for accordions
991
+ return results + (gr.update(visible=True), gr.update(visible=True))
992
 
993
  predict_btn.click(
994
  predict_and_show_downloads,
995
  inputs=[seq_input, stride_input, threshold_input],
996
+ outputs=[plot_output, result_summary, regions_output, pred_download_png, pred_download_pdf,
997
+ pred_download_csv, pred_download_summary, pred_download_gff, seq_viewer_html,
998
+ download_accordion, seq_viewer_accordion]
999
  )
1000
 
1001
  with gr.Tab("Embeddings"):