re-type commited on
Commit
dc0ba8b
·
verified ·
1 Parent(s): ff9eeb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -85
app.py CHANGED
@@ -466,39 +466,39 @@ def build_maximum_likelihood_tree(f_gene_sequence):
466
  # --- NEW Tree Analysis Function (Using the new analyzer API) ---
467
  def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tuple:
468
  """
469
- Analyze sequence and create phylogenetic tree using the new analyzer API
470
 
471
  Args:
472
  sequence (str): DNA sequence to analyze
473
  matching_percentage (float): Similarity threshold percentage
474
 
475
  Returns:
476
- tuple: (status_message, html_file_path)
477
  """
478
  try:
479
  if not analyzer:
480
- return "❌ Error: Tree analyzer not initialized. Please check if the CSV data file is available.", None
481
 
482
  if not sequence:
483
- return "❌ Error: Please provide a sequence.", None
484
 
485
  if not (1 <= matching_percentage <= 99):
486
- return "❌ Error: Matching percentage must be between 1 and 99.", None
487
 
488
  # Validate inputs
489
  sequence = sequence.strip()
490
  if len(sequence) < 10:
491
- return "❌ Error: Invalid or missing sequence. Must be ≥10 nucleotides.", None
492
 
493
  # Find query sequence
494
  if not analyzer.find_query_sequence(sequence):
495
- return "❌ Error: Sequence not accepted.", None
496
 
497
  # Find similar sequences
498
  matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
499
 
500
  if not matched_ids:
501
- return f"❌ Error: No similar sequences found at {matching_percentage}% similarity threshold.", None
502
 
503
  logging.info(f"Found {len(matched_ids)} similar sequences at {actual_percentage:.2f}% similarity")
504
 
@@ -508,22 +508,28 @@ def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tupl
508
  # Create interactive tree
509
  fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
510
 
511
- # Save to temporary file that Gradio can access
512
  temp_dir = tempfile.gettempdir()
513
- output_path = os.path.join(temp_dir, 'phylogenetic_tree_interactive.html')
514
- fig.write_html(output_path)
 
 
 
 
 
 
 
515
 
516
  success_msg = f"✅ Analysis complete! Found {len(matched_ids)} similar sequences with {actual_percentage:.2f}% average similarity."
517
 
518
- return success_msg, output_path
519
 
520
  except Exception as e:
521
  error_msg = f"❌ Error during analysis: {str(e)}"
522
  logging.error(error_msg)
523
  import traceback
524
  logging.error(f"Full traceback: {traceback.format_exc()}")
525
- return error_msg, None
526
-
527
  # --- Keras Prediction ---
528
  def predict_with_keras(sequence):
529
  try:
@@ -577,19 +583,19 @@ def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
577
  try:
578
  dna_input = read_fasta_file(fasta_file_obj)
579
  if not dna_input:
580
- return "Failed to read FASTA file", "", "", "", "", None, None, None, "No input sequence"
581
  return run_pipeline(dna_input, similarity_score, build_ml_tree)
582
  except Exception as e:
583
  error_msg = f"Pipeline error: {str(e)}"
584
  logging.error(error_msg)
585
- return error_msg, "", "", "", "", None, None, None, error_msg
586
 
587
  def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
588
  try:
589
  # Clean input
590
  dna_input = dna_input.upper().strip()
591
  if not dna_input:
592
- return "Empty input", "", "", "", "", None, None, None, "No input provided"
593
 
594
  # Sanitize DNA sequence
595
  if not re.match('^[ACTGN]+$', dna_input):
@@ -602,7 +608,7 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
602
 
603
  if boundary_model:
604
  try:
605
- result = boundary_model.predict_sequence(dna_input) # Updated to use predict_sequence
606
  predictions = result['predictions']
607
  probs = result['probabilities']['gene']
608
  confidence = result['confidence']
@@ -628,12 +634,11 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
628
  keras_output = ""
629
  if processed_sequence and len(processed_sequence) >= 6:
630
  keras_prediction = predict_with_keras(processed_sequence)
631
- # Use the prediction directly as it's now a percentage
632
  keras_output = keras_prediction
633
  else:
634
  keras_output = "Skipped: sequence too short for F gene validation"
635
 
636
- # Step 3: Maximum Likelihood Tree (Phylogenetic Placement) - Using improved API
637
  aligned_file = None
638
  phy_file = None
639
  ml_tree_output = ""
@@ -648,7 +653,7 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
648
  aligned_file = ml_aligned
649
  phy_file = ml_tree
650
  else:
651
- ml_tree_output = ml_message # This now includes detailed error information
652
 
653
  except Exception as e:
654
  ml_tree_output = f"❌ Phylogenetic placement failed: {str(e)}"
@@ -658,54 +663,71 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
658
  else:
659
  ml_tree_output = "Phylogenetic placement skipped (not requested)"
660
 
661
- # Step 4: NEW Simplified Tree Analysis (using the new analyzer API)
662
- html_file = None
 
663
  tree_html_content = "No tree generated"
 
664
  simplified_ml_output = ""
665
 
666
  if analyzer and processed_sequence and len(processed_sequence) >= 10:
667
  try:
668
  logging.info(f"Starting simplified ML tree analysis with F gene sequence length: {len(processed_sequence)}")
669
 
670
- # Use the new analyze_sequence_for_tree function
671
- tree_result, html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
672
 
673
- if html_path and os.path.exists(html_path):
674
- # Success - copy the HTML file to a location Gradio can serve
675
  output_dir = "output"
676
  os.makedirs(output_dir, exist_ok=True)
677
-
678
- # Create a safe filename
679
  safe_seq_name = re.sub(r'[^a-zA-Z0-9_-]', '', processed_sequence[:20])
680
  timestamp = str(int(time.time()))
681
- html_filename = f"tree_{safe_seq_name}_{timestamp}.html"
682
- final_html_path = os.path.join(output_dir, html_filename)
683
-
684
- # Copy the HTML file
685
- shutil.copy2(html_path, final_html_path)
686
- html_file = final_html_path
687
 
688
- # Read HTML content for display
689
- with open(html_path, 'r', encoding='utf-8') as f:
690
  tree_html_content = f.read()
691
 
692
- simplified_ml_output = tree_result
693
- logging.info(f"Tree analysis completed successfully: {html_filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
 
695
- # Clean up temporary file
696
  try:
697
- os.unlink(html_path)
698
  except:
699
  pass
700
-
701
- else:
702
- simplified_ml_output = tree_result # Error message
703
  tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
704
-
 
 
 
 
705
  except Exception as e:
706
  error_msg = f"❌ Tree analysis failed: {str(e)}"
707
  simplified_ml_output = error_msg
708
  tree_html_content = f"<div style='color: red;'>{error_msg}</div>"
 
709
  logging.error(f"Tree analysis failed: {e}")
710
  else:
711
  if not analyzer:
@@ -716,6 +738,7 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
716
  simplified_ml_output = "❌ No processed sequence available for tree analysis"
717
 
718
  tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
 
719
 
720
  # Final summary
721
  summary_output = f"""
@@ -726,19 +749,22 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
726
  ✅ F GENE VALIDATION: {keras_output}
727
  🌳 PHYLOGENETIC PLACEMENT: {'✅ Completed' if 'successfully' in ml_tree_output else '❌ ' + ('Skipped' if 'skipped' in ml_tree_output else 'Failed')}
728
  🔬 TREE ANALYSIS: {'✅ Completed' if '✅' in simplified_ml_output else '❌ ' + ('Not available' if 'not available' in simplified_ml_output else 'Failed')}
 
729
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
730
  """
731
 
732
  return (
733
- boundary_output, # F gene sequence
734
- keras_output, # F gene validation
735
- ml_tree_output, # Phylogenetic placement
736
- simplified_ml_output, # Tree analysis
737
- summary_output, # Summary
738
- aligned_file, # Alignment file
739
- phy_file, # Tree file
740
- html_file, # HTML tree file
741
- tree_html_content # HTML content for display
 
 
742
  )
743
 
744
  except Exception as e:
@@ -746,14 +772,13 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
746
  logging.error(error_msg)
747
  import traceback
748
  logging.error(f"Full traceback: {traceback.format_exc()}")
749
- return error_msg, "", "", "", "", None, None, None, error_msg
750
 
751
 
752
  # --- Gradio Interface ---
753
  def create_interface():
754
  """Create and configure the Gradio interface"""
755
 
756
- # Custom CSS for better styling
757
  custom_css = """
758
  .gradio-container {
759
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
@@ -802,7 +827,6 @@ def create_interface():
802
 
803
  with gr.Blocks(css=custom_css, title="🧬 Advanced Gene Analysis Pipeline", theme=gr.themes.Soft()) as iface:
804
 
805
- # Header
806
  gr.HTML("""
807
  <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; margin-bottom: 20px;">
808
  <h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: 700;">🧬 Advanced Gene Analysis Pipeline</h1>
@@ -810,7 +834,6 @@ def create_interface():
810
  </div>
811
  """)
812
 
813
- # Instructions
814
  with gr.Accordion("📋 Instructions & Information", open=False):
815
  gr.HTML("""
816
  <div style="background: #f8fafc; padding: 20px; border-radius: 10px; border-left: 4px solid #3b82f6;">
@@ -820,6 +843,7 @@ def create_interface():
820
  <li><strong>Gene Validation:</strong> Validates extracted sequence as F gene using deep learning</li>
821
  <li><strong>Phylogenetic Placement:</strong> Places sequence in reference phylogenetic tree (MAFFT + IQ-TREE)</li>
822
  <li><strong>Interactive Tree Analysis:</strong> Creates interactive phylogenetic tree with similar sequences</li>
 
823
  </ol>
824
 
825
  <h3 style="color: #1e40af;">📁 Input Requirements</h3>
@@ -837,20 +861,17 @@ def create_interface():
837
  </div>
838
  """)
839
 
840
- # Main input section
841
  with gr.Row():
842
  with gr.Column(scale=2):
843
  gr.HTML("<h3 style='color: #1e40af; margin-bottom: 10px;'>📝 Sequence Input</h3>")
844
 
845
- # Input tabs
846
  with gr.Tabs():
847
  with gr.TabItem("✍️ Text Input"):
848
  dna_input = gr.Textbox(
849
  label="DNA Sequence",
850
  placeholder="Enter your DNA sequence here (A, T, C, G, N)...",
851
  lines=6,
852
- value="",
853
- info="Paste your DNA sequence or enter it manually"
854
  )
855
 
856
  with gr.TabItem("📁 File Upload"):
@@ -868,17 +889,14 @@ def create_interface():
868
  maximum=99.0,
869
  value=95.0,
870
  step=1.0,
871
- label="Similarity Threshold (%)",
872
- info="Minimum similarity for tree analysis"
873
  )
874
 
875
  build_ml_tree = gr.Checkbox(
876
  label="🌳 Enable Phylogenetic Placement",
877
- value=False,
878
- info="Requires MAFFT and IQ-TREE (slower but more accurate)"
879
  )
880
 
881
- # Action buttons
882
  with gr.Row():
883
  analyze_text_btn = gr.Button(
884
  "🚀 Analyze Text Input",
@@ -891,52 +909,50 @@ def create_interface():
891
  size="lg"
892
  )
893
 
894
- # Results section
895
  gr.HTML("<hr style='margin: 30px 0; border: none; height: 2px; background: linear-gradient(to right, #3b82f6, #8b5cf6);'>")
896
  gr.HTML("<h2 style='color: #1e40af; text-align: center; margin-bottom: 20px;'>📊 Analysis Results</h2>")
897
 
898
- # Output tabs
899
  with gr.Tabs():
900
  with gr.TabItem("🎯 F Gene Extraction"):
901
  f_gene_output = gr.Textbox(
902
  label="Extracted F Gene Sequence",
903
- lines=8,
904
- info="Boundary-detected F gene region"
905
  )
906
 
907
  with gr.TabItem("✅ Gene Validation"):
908
  keras_output = gr.Textbox(
909
  label="F Gene Validation Result",
910
- lines=3,
911
- info="Deep learning validation of F gene"
912
  )
913
 
914
  with gr.TabItem("🌳 Phylogenetic Placement"):
915
  ml_tree_output = gr.Textbox(
916
  label="Phylogenetic Placement Results",
917
- lines=10,
918
- info="MAFFT alignment + IQ-TREE placement results"
919
  )
920
 
921
  with gr.TabItem("🔬 Interactive Tree"):
922
  tree_analysis_output = gr.Textbox(
923
  label="Tree Analysis Status",
924
- lines=5,
925
- info="Interactive phylogenetic tree generation"
926
  )
927
  tree_html_display = gr.HTML(
928
  label="Interactive Phylogenetic Tree",
929
  value="<div style='text-align: center; color: #6b7280; padding: 40px;'>No tree generated yet. Run analysis to create interactive tree.</div>"
930
  )
931
 
 
 
 
 
 
 
932
  with gr.TabItem("📋 Summary"):
933
  summary_output = gr.Textbox(
934
  label="Analysis Summary",
935
- lines=12,
936
- info="Complete pipeline summary"
937
  )
938
 
939
- # Download section
940
  with gr.Accordion("💾 Download Results", open=False):
941
  with gr.Row():
942
  alignment_file = gr.File(
@@ -951,8 +967,11 @@ def create_interface():
951
  label="🌐 Download Interactive Tree (HTML)",
952
  visible=True
953
  )
 
 
 
 
954
 
955
- # Footer
956
  gr.HTML("""
957
  <div style="text-align: center; padding: 20px; margin-top: 30px; border-top: 2px solid #e5e7eb; color: #6b7280;">
958
  <p style="margin: 0;">🧬 Advanced Gene Analysis Pipeline | Powered by Deep Learning & Phylogenetics</p>
@@ -960,7 +979,6 @@ def create_interface():
960
  </div>
961
  """)
962
 
963
- # Event handlers
964
  analyze_text_btn.click(
965
  fn=run_pipeline,
966
  inputs=[dna_input, similarity_score, build_ml_tree],
@@ -973,7 +991,9 @@ def create_interface():
973
  alignment_file,
974
  tree_file,
975
  html_tree_file,
976
- tree_html_display
 
 
977
  ]
978
  )
979
 
@@ -989,12 +1009,13 @@ def create_interface():
989
  alignment_file,
990
  tree_file,
991
  html_tree_file,
992
- tree_html_display
 
 
993
  ]
994
  )
995
 
996
  return iface
997
-
998
  # --- Main Execution ---
999
  if __name__ == "__main__":
1000
  try:
 
466
  # --- NEW Tree Analysis Function (Using the new analyzer API) ---
467
  def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tuple:
468
  """
469
+ Analyze sequence and create phylogenetic tree and detailed report using the new analyzer API
470
 
471
  Args:
472
  sequence (str): DNA sequence to analyze
473
  matching_percentage (float): Similarity threshold percentage
474
 
475
  Returns:
476
+ tuple: (status_message, tree_html_path, report_html_path)
477
  """
478
  try:
479
  if not analyzer:
480
+ return "❌ Error: Tree analyzer not initialized. Please check if the CSV data file is available.", None, None
481
 
482
  if not sequence:
483
+ return "❌ Error: Please provide a sequence.", None, None
484
 
485
  if not (1 <= matching_percentage <= 99):
486
+ return "❌ Error: Matching percentage must be between 1 and 99.", None, None
487
 
488
  # Validate inputs
489
  sequence = sequence.strip()
490
  if len(sequence) < 10:
491
+ return "❌ Error: Invalid or missing sequence. Must be ≥10 nucleotides.", None, None
492
 
493
  # Find query sequence
494
  if not analyzer.find_query_sequence(sequence):
495
+ return "❌ Error: Sequence not accepted.", None, None
496
 
497
  # Find similar sequences
498
  matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
499
 
500
  if not matched_ids:
501
+ return f"❌ Error: No similar sequences found at {matching_percentage}% similarity threshold.", None, None
502
 
503
  logging.info(f"Found {len(matched_ids)} similar sequences at {actual_percentage:.2f}% similarity")
504
 
 
508
  # Create interactive tree
509
  fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
510
 
511
+ # Save tree to temporary file
512
  temp_dir = tempfile.gettempdir()
513
+ query_id = analyzer.query_id or f"query_{int(time.time())}"
514
+ tree_html_path = os.path.join(temp_dir, f'phylogenetic_tree_interactive_{query_id}.html')
515
+ fig.write_html(tree_html_path)
516
+
517
+ # Generate and save detailed report
518
+ report_html_content = analyzer.generate_detailed_report(matched_ids, actual_percentage)
519
+ report_html_path = os.path.join(temp_dir, f'detailed_report_{query_id}.html')
520
+ with open(report_html_path, 'w', encoding='utf-8') as f:
521
+ f.write(report_html_content)
522
 
523
  success_msg = f"✅ Analysis complete! Found {len(matched_ids)} similar sequences with {actual_percentage:.2f}% average similarity."
524
 
525
+ return success_msg, tree_html_path, report_html_path
526
 
527
  except Exception as e:
528
  error_msg = f"❌ Error during analysis: {str(e)}"
529
  logging.error(error_msg)
530
  import traceback
531
  logging.error(f"Full traceback: {traceback.format_exc()}")
532
+ return error_msg, None, None
 
533
  # --- Keras Prediction ---
534
  def predict_with_keras(sequence):
535
  try:
 
583
  try:
584
  dna_input = read_fasta_file(fasta_file_obj)
585
  if not dna_input:
586
+ return "Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input sequence", "No input sequence"
587
  return run_pipeline(dna_input, similarity_score, build_ml_tree)
588
  except Exception as e:
589
  error_msg = f"Pipeline error: {str(e)}"
590
  logging.error(error_msg)
591
+ return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
592
 
593
  def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
594
  try:
595
  # Clean input
596
  dna_input = dna_input.upper().strip()
597
  if not dna_input:
598
+ return "Empty input", "", "", "", "", None, None, None, None, "No input provided"
599
 
600
  # Sanitize DNA sequence
601
  if not re.match('^[ACTGN]+$', dna_input):
 
608
 
609
  if boundary_model:
610
  try:
611
+ result = boundary_model.predict_sequence(dna_input)
612
  predictions = result['predictions']
613
  probs = result['probabilities']['gene']
614
  confidence = result['confidence']
 
634
  keras_output = ""
635
  if processed_sequence and len(processed_sequence) >= 6:
636
  keras_prediction = predict_with_keras(processed_sequence)
 
637
  keras_output = keras_prediction
638
  else:
639
  keras_output = "Skipped: sequence too short for F gene validation"
640
 
641
+ # Step 3: Maximum Likelihood Tree (Phylogenetic Placement)
642
  aligned_file = None
643
  phy_file = None
644
  ml_tree_output = ""
 
653
  aligned_file = ml_aligned
654
  phy_file = ml_tree
655
  else:
656
+ ml_tree_output = ml_message
657
 
658
  except Exception as e:
659
  ml_tree_output = f"❌ Phylogenetic placement failed: {str(e)}"
 
663
  else:
664
  ml_tree_output = "Phylogenetic placement skipped (not requested)"
665
 
666
+ # Step 4: Simplified Tree Analysis
667
+ tree_html_file = None
668
+ report_html_file = None
669
  tree_html_content = "No tree generated"
670
+ report_html_content = "No report generated"
671
  simplified_ml_output = ""
672
 
673
  if analyzer and processed_sequence and len(processed_sequence) >= 10:
674
  try:
675
  logging.info(f"Starting simplified ML tree analysis with F gene sequence length: {len(processed_sequence)}")
676
 
677
+ # Updated call to analyze_sequence_for_tree
678
+ tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
679
 
680
+ if tree_html_path and os.path.exists(tree_html_path):
681
+ # Copy tree HTML to output directory
682
  output_dir = "output"
683
  os.makedirs(output_dir, exist_ok=True)
 
 
684
  safe_seq_name = re.sub(r'[^a-zA-Z0-9_-]', '', processed_sequence[:20])
685
  timestamp = str(int(time.time()))
686
+ tree_html_filename = f"tree_{safe_seq_name}_{timestamp}.html"
687
+ tree_html_final_path = os.path.join(output_dir, tree_html_filename)
688
+ shutil.copy2(tree_html_path, tree_html_final_path)
689
+ tree_html_file = tree_html_final_path
 
 
690
 
691
+ # Read tree HTML content for display
692
+ with open(tree_html_path, 'r', encoding='utf-8') as f:
693
  tree_html_content = f.read()
694
 
695
+ # Clean up temporary tree file
696
+ try:
697
+ os.unlink(tree_html_path)
698
+ except:
699
+ pass
700
+
701
+ if report_html_path and os.path.exists(report_html_path):
702
+ # Copy report HTML to output directory
703
+ report_html_filename = f"report_{safe_seq_name}_{timestamp}.html"
704
+ report_html_final_path = os.path.join(output_dir, report_html_filename)
705
+ shutil.copy2(report_html_path, report_html_final_path)
706
+ report_html_file = report_html_final_path
707
+
708
+ # Read report HTML content for display
709
+ with open(report_html_path, 'r', encoding='utf-8') as f:
710
+ report_html_content = f.read()
711
 
712
+ # Clean up temporary report file
713
  try:
714
+ os.unlink(report_html_path)
715
  except:
716
  pass
717
+
718
+ simplified_ml_output = tree_result
719
+ if not tree_html_file:
720
  tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
721
+ if not report_html_file:
722
+ report_html_content = f"<div style='color: red;'>{tree_result}</div>"
723
+
724
+ logging.info(f"Tree analysis completed successfully: {tree_html_filename}")
725
+
726
  except Exception as e:
727
  error_msg = f"❌ Tree analysis failed: {str(e)}"
728
  simplified_ml_output = error_msg
729
  tree_html_content = f"<div style='color: red;'>{error_msg}</div>"
730
+ report_html_content = f"<div style='color: red;'>{error_msg}</div>"
731
  logging.error(f"Tree analysis failed: {e}")
732
  else:
733
  if not analyzer:
 
738
  simplified_ml_output = "❌ No processed sequence available for tree analysis"
739
 
740
  tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
741
+ report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
742
 
743
  # Final summary
744
  summary_output = f"""
 
749
  ✅ F GENE VALIDATION: {keras_output}
750
  🌳 PHYLOGENETIC PLACEMENT: {'✅ Completed' if 'successfully' in ml_tree_output else '❌ ' + ('Skipped' if 'skipped' in ml_tree_output else 'Failed')}
751
  🔬 TREE ANALYSIS: {'✅ Completed' if '✅' in simplified_ml_output else '❌ ' + ('Not available' if 'not available' in simplified_ml_output else 'Failed')}
752
+ 📝 DETAILED REPORT: {'✅ Generated' if report_html_file else '❌ Not generated'}
753
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
754
  """
755
 
756
  return (
757
+ boundary_output,
758
+ keras_output,
759
+ ml_tree_output,
760
+ simplified_ml_output,
761
+ summary_output,
762
+ aligned_file,
763
+ phy_file,
764
+ tree_html_file,
765
+ report_html_file,
766
+ tree_html_content,
767
+ report_html_content
768
  )
769
 
770
  except Exception as e:
 
772
  logging.error(error_msg)
773
  import traceback
774
  logging.error(f"Full traceback: {traceback.format_exc()}")
775
+ return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
776
 
777
 
778
  # --- Gradio Interface ---
779
  def create_interface():
780
  """Create and configure the Gradio interface"""
781
 
 
782
  custom_css = """
783
  .gradio-container {
784
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
 
827
 
828
  with gr.Blocks(css=custom_css, title="🧬 Advanced Gene Analysis Pipeline", theme=gr.themes.Soft()) as iface:
829
 
 
830
  gr.HTML("""
831
  <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; margin-bottom: 20px;">
832
  <h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: 700;">🧬 Advanced Gene Analysis Pipeline</h1>
 
834
  </div>
835
  """)
836
 
 
837
  with gr.Accordion("📋 Instructions & Information", open=False):
838
  gr.HTML("""
839
  <div style="background: #f8fafc; padding: 20px; border-radius: 10px; border-left: 4px solid #3b82f6;">
 
843
  <li><strong>Gene Validation:</strong> Validates extracted sequence as F gene using deep learning</li>
844
  <li><strong>Phylogenetic Placement:</strong> Places sequence in reference phylogenetic tree (MAFFT + IQ-TREE)</li>
845
  <li><strong>Interactive Tree Analysis:</strong> Creates interactive phylogenetic tree with similar sequences</li>
846
+ <li><strong>Detailed Report:</strong> Provides comprehensive analysis details</li>
847
  </ol>
848
 
849
  <h3 style="color: #1e40af;">📁 Input Requirements</h3>
 
861
  </div>
862
  """)
863
 
 
864
  with gr.Row():
865
  with gr.Column(scale=2):
866
  gr.HTML("<h3 style='color: #1e40af; margin-bottom: 10px;'>📝 Sequence Input</h3>")
867
 
 
868
  with gr.Tabs():
869
  with gr.TabItem("✍️ Text Input"):
870
  dna_input = gr.Textbox(
871
  label="DNA Sequence",
872
  placeholder="Enter your DNA sequence here (A, T, C, G, N)...",
873
  lines=6,
874
+ value=""
 
875
  )
876
 
877
  with gr.TabItem("📁 File Upload"):
 
889
  maximum=99.0,
890
  value=95.0,
891
  step=1.0,
892
+ label="Similarity Threshold (%)"
 
893
  )
894
 
895
  build_ml_tree = gr.Checkbox(
896
  label="🌳 Enable Phylogenetic Placement",
897
+ value=False
 
898
  )
899
 
 
900
  with gr.Row():
901
  analyze_text_btn = gr.Button(
902
  "🚀 Analyze Text Input",
 
909
  size="lg"
910
  )
911
 
 
912
  gr.HTML("<hr style='margin: 30px 0; border: none; height: 2px; background: linear-gradient(to right, #3b82f6, #8b5cf6);'>")
913
  gr.HTML("<h2 style='color: #1e40af; text-align: center; margin-bottom: 20px;'>📊 Analysis Results</h2>")
914
 
 
915
  with gr.Tabs():
916
  with gr.TabItem("🎯 F Gene Extraction"):
917
  f_gene_output = gr.Textbox(
918
  label="Extracted F Gene Sequence",
919
+ lines=8
 
920
  )
921
 
922
  with gr.TabItem("✅ Gene Validation"):
923
  keras_output = gr.Textbox(
924
  label="F Gene Validation Result",
925
+ lines=3
 
926
  )
927
 
928
  with gr.TabItem("🌳 Phylogenetic Placement"):
929
  ml_tree_output = gr.Textbox(
930
  label="Phylogenetic Placement Results",
931
+ lines=10
 
932
  )
933
 
934
  with gr.TabItem("🔬 Interactive Tree"):
935
  tree_analysis_output = gr.Textbox(
936
  label="Tree Analysis Status",
937
+ lines=5
 
938
  )
939
  tree_html_display = gr.HTML(
940
  label="Interactive Phylogenetic Tree",
941
  value="<div style='text-align: center; color: #6b7280; padding: 40px;'>No tree generated yet. Run analysis to create interactive tree.</div>"
942
  )
943
 
944
+ with gr.TabItem("📝 Detailed Report"):
945
+ report_html_display = gr.HTML(
946
+ label="Detailed Analysis Report",
947
+ value="<div style='text-align: center; color: #6b7280; padding: 40px;'>No report generated yet. Run analysis to create detailed report.</div>"
948
+ )
949
+
950
  with gr.TabItem("📋 Summary"):
951
  summary_output = gr.Textbox(
952
  label="Analysis Summary",
953
+ lines=12
 
954
  )
955
 
 
956
  with gr.Accordion("💾 Download Results", open=False):
957
  with gr.Row():
958
  alignment_file = gr.File(
 
967
  label="🌐 Download Interactive Tree (HTML)",
968
  visible=True
969
  )
970
+ report_file = gr.File(
971
+ label="📝 Download Detailed Report (HTML)",
972
+ visible=True
973
+ )
974
 
 
975
  gr.HTML("""
976
  <div style="text-align: center; padding: 20px; margin-top: 30px; border-top: 2px solid #e5e7eb; color: #6b7280;">
977
  <p style="margin: 0;">🧬 Advanced Gene Analysis Pipeline | Powered by Deep Learning & Phylogenetics</p>
 
979
  </div>
980
  """)
981
 
 
982
  analyze_text_btn.click(
983
  fn=run_pipeline,
984
  inputs=[dna_input, similarity_score, build_ml_tree],
 
991
  alignment_file,
992
  tree_file,
993
  html_tree_file,
994
+ report_file,
995
+ tree_html_display,
996
+ report_html_display
997
  ]
998
  )
999
 
 
1009
  alignment_file,
1010
  tree_file,
1011
  html_tree_file,
1012
+ report_file,
1013
+ tree_html_display,
1014
+ report_html_display
1015
  ]
1016
  )
1017
 
1018
  return iface
 
1019
  # --- Main Execution ---
1020
  if __name__ == "__main__":
1021
  try: