Spaces:
No application file
No application file
Update app.py
Browse files
app.py
CHANGED
|
@@ -466,39 +466,39 @@ def build_maximum_likelihood_tree(f_gene_sequence):
|
|
| 466 |
# --- NEW Tree Analysis Function (Using the new analyzer API) ---
|
| 467 |
def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tuple:
|
| 468 |
"""
|
| 469 |
-
Analyze sequence and create phylogenetic tree using the new analyzer API
|
| 470 |
|
| 471 |
Args:
|
| 472 |
sequence (str): DNA sequence to analyze
|
| 473 |
matching_percentage (float): Similarity threshold percentage
|
| 474 |
|
| 475 |
Returns:
|
| 476 |
-
tuple: (status_message,
|
| 477 |
"""
|
| 478 |
try:
|
| 479 |
if not analyzer:
|
| 480 |
-
return "❌ Error: Tree analyzer not initialized. Please check if the CSV data file is available.", None
|
| 481 |
|
| 482 |
if not sequence:
|
| 483 |
-
return "❌ Error: Please provide a sequence.", None
|
| 484 |
|
| 485 |
if not (1 <= matching_percentage <= 99):
|
| 486 |
-
return "❌ Error: Matching percentage must be between 1 and 99.", None
|
| 487 |
|
| 488 |
# Validate inputs
|
| 489 |
sequence = sequence.strip()
|
| 490 |
if len(sequence) < 10:
|
| 491 |
-
return "❌ Error: Invalid or missing sequence. Must be ≥10 nucleotides.", None
|
| 492 |
|
| 493 |
# Find query sequence
|
| 494 |
if not analyzer.find_query_sequence(sequence):
|
| 495 |
-
return "❌ Error: Sequence not accepted.", None
|
| 496 |
|
| 497 |
# Find similar sequences
|
| 498 |
matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
|
| 499 |
|
| 500 |
if not matched_ids:
|
| 501 |
-
return f"❌ Error: No similar sequences found at {matching_percentage}% similarity threshold.", None
|
| 502 |
|
| 503 |
logging.info(f"Found {len(matched_ids)} similar sequences at {actual_percentage:.2f}% similarity")
|
| 504 |
|
|
@@ -508,22 +508,28 @@ def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tupl
|
|
| 508 |
# Create interactive tree
|
| 509 |
fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
|
| 510 |
|
| 511 |
-
# Save to temporary file
|
| 512 |
temp_dir = tempfile.gettempdir()
|
| 513 |
-
|
| 514 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 515 |
|
| 516 |
success_msg = f"✅ Analysis complete! Found {len(matched_ids)} similar sequences with {actual_percentage:.2f}% average similarity."
|
| 517 |
|
| 518 |
-
return success_msg,
|
| 519 |
|
| 520 |
except Exception as e:
|
| 521 |
error_msg = f"❌ Error during analysis: {str(e)}"
|
| 522 |
logging.error(error_msg)
|
| 523 |
import traceback
|
| 524 |
logging.error(f"Full traceback: {traceback.format_exc()}")
|
| 525 |
-
return error_msg, None
|
| 526 |
-
|
| 527 |
# --- Keras Prediction ---
|
| 528 |
def predict_with_keras(sequence):
|
| 529 |
try:
|
|
@@ -577,19 +583,19 @@ def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
|
|
| 577 |
try:
|
| 578 |
dna_input = read_fasta_file(fasta_file_obj)
|
| 579 |
if not dna_input:
|
| 580 |
-
return "Failed to read FASTA file", "", "", "", "", None, None, None, "No input sequence"
|
| 581 |
return run_pipeline(dna_input, similarity_score, build_ml_tree)
|
| 582 |
except Exception as e:
|
| 583 |
error_msg = f"Pipeline error: {str(e)}"
|
| 584 |
logging.error(error_msg)
|
| 585 |
-
return error_msg, "", "", "", "", None, None, None, error_msg
|
| 586 |
|
| 587 |
def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
| 588 |
try:
|
| 589 |
# Clean input
|
| 590 |
dna_input = dna_input.upper().strip()
|
| 591 |
if not dna_input:
|
| 592 |
-
return "Empty input", "", "", "", "", None, None, None, "No input provided"
|
| 593 |
|
| 594 |
# Sanitize DNA sequence
|
| 595 |
if not re.match('^[ACTGN]+$', dna_input):
|
|
@@ -602,7 +608,7 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
|
| 602 |
|
| 603 |
if boundary_model:
|
| 604 |
try:
|
| 605 |
-
result = boundary_model.predict_sequence(dna_input)
|
| 606 |
predictions = result['predictions']
|
| 607 |
probs = result['probabilities']['gene']
|
| 608 |
confidence = result['confidence']
|
|
@@ -628,12 +634,11 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
|
| 628 |
keras_output = ""
|
| 629 |
if processed_sequence and len(processed_sequence) >= 6:
|
| 630 |
keras_prediction = predict_with_keras(processed_sequence)
|
| 631 |
-
# Use the prediction directly as it's now a percentage
|
| 632 |
keras_output = keras_prediction
|
| 633 |
else:
|
| 634 |
keras_output = "Skipped: sequence too short for F gene validation"
|
| 635 |
|
| 636 |
-
# Step 3: Maximum Likelihood Tree (Phylogenetic Placement)
|
| 637 |
aligned_file = None
|
| 638 |
phy_file = None
|
| 639 |
ml_tree_output = ""
|
|
@@ -648,7 +653,7 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
|
| 648 |
aligned_file = ml_aligned
|
| 649 |
phy_file = ml_tree
|
| 650 |
else:
|
| 651 |
-
ml_tree_output = ml_message
|
| 652 |
|
| 653 |
except Exception as e:
|
| 654 |
ml_tree_output = f"❌ Phylogenetic placement failed: {str(e)}"
|
|
@@ -658,54 +663,71 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
|
| 658 |
else:
|
| 659 |
ml_tree_output = "Phylogenetic placement skipped (not requested)"
|
| 660 |
|
| 661 |
-
# Step 4:
|
| 662 |
-
|
|
|
|
| 663 |
tree_html_content = "No tree generated"
|
|
|
|
| 664 |
simplified_ml_output = ""
|
| 665 |
|
| 666 |
if analyzer and processed_sequence and len(processed_sequence) >= 10:
|
| 667 |
try:
|
| 668 |
logging.info(f"Starting simplified ML tree analysis with F gene sequence length: {len(processed_sequence)}")
|
| 669 |
|
| 670 |
-
#
|
| 671 |
-
tree_result,
|
| 672 |
|
| 673 |
-
if
|
| 674 |
-
#
|
| 675 |
output_dir = "output"
|
| 676 |
os.makedirs(output_dir, exist_ok=True)
|
| 677 |
-
|
| 678 |
-
# Create a safe filename
|
| 679 |
safe_seq_name = re.sub(r'[^a-zA-Z0-9_-]', '', processed_sequence[:20])
|
| 680 |
timestamp = str(int(time.time()))
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
shutil.copy2(html_path, final_html_path)
|
| 686 |
-
html_file = final_html_path
|
| 687 |
|
| 688 |
-
# Read HTML content for display
|
| 689 |
-
with open(
|
| 690 |
tree_html_content = f.read()
|
| 691 |
|
| 692 |
-
|
| 693 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
|
| 695 |
-
# Clean up temporary file
|
| 696 |
try:
|
| 697 |
-
os.unlink(
|
| 698 |
except:
|
| 699 |
pass
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
|
| 704 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 705 |
except Exception as e:
|
| 706 |
error_msg = f"❌ Tree analysis failed: {str(e)}"
|
| 707 |
simplified_ml_output = error_msg
|
| 708 |
tree_html_content = f"<div style='color: red;'>{error_msg}</div>"
|
|
|
|
| 709 |
logging.error(f"Tree analysis failed: {e}")
|
| 710 |
else:
|
| 711 |
if not analyzer:
|
|
@@ -716,6 +738,7 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
|
| 716 |
simplified_ml_output = "❌ No processed sequence available for tree analysis"
|
| 717 |
|
| 718 |
tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
|
|
|
|
| 719 |
|
| 720 |
# Final summary
|
| 721 |
summary_output = f"""
|
|
@@ -726,19 +749,22 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
|
| 726 |
✅ F GENE VALIDATION: {keras_output}
|
| 727 |
🌳 PHYLOGENETIC PLACEMENT: {'✅ Completed' if 'successfully' in ml_tree_output else '❌ ' + ('Skipped' if 'skipped' in ml_tree_output else 'Failed')}
|
| 728 |
🔬 TREE ANALYSIS: {'✅ Completed' if '✅' in simplified_ml_output else '❌ ' + ('Not available' if 'not available' in simplified_ml_output else 'Failed')}
|
|
|
|
| 729 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 730 |
"""
|
| 731 |
|
| 732 |
return (
|
| 733 |
-
boundary_output,
|
| 734 |
-
keras_output,
|
| 735 |
-
ml_tree_output,
|
| 736 |
-
simplified_ml_output,
|
| 737 |
-
summary_output,
|
| 738 |
-
aligned_file,
|
| 739 |
-
phy_file,
|
| 740 |
-
|
| 741 |
-
|
|
|
|
|
|
|
| 742 |
)
|
| 743 |
|
| 744 |
except Exception as e:
|
|
@@ -746,14 +772,13 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
|
| 746 |
logging.error(error_msg)
|
| 747 |
import traceback
|
| 748 |
logging.error(f"Full traceback: {traceback.format_exc()}")
|
| 749 |
-
return error_msg, "", "", "", "", None, None, None, error_msg
|
| 750 |
|
| 751 |
|
| 752 |
# --- Gradio Interface ---
|
| 753 |
def create_interface():
|
| 754 |
"""Create and configure the Gradio interface"""
|
| 755 |
|
| 756 |
-
# Custom CSS for better styling
|
| 757 |
custom_css = """
|
| 758 |
.gradio-container {
|
| 759 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
|
@@ -802,7 +827,6 @@ def create_interface():
|
|
| 802 |
|
| 803 |
with gr.Blocks(css=custom_css, title="🧬 Advanced Gene Analysis Pipeline", theme=gr.themes.Soft()) as iface:
|
| 804 |
|
| 805 |
-
# Header
|
| 806 |
gr.HTML("""
|
| 807 |
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; margin-bottom: 20px;">
|
| 808 |
<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: 700;">🧬 Advanced Gene Analysis Pipeline</h1>
|
|
@@ -810,7 +834,6 @@ def create_interface():
|
|
| 810 |
</div>
|
| 811 |
""")
|
| 812 |
|
| 813 |
-
# Instructions
|
| 814 |
with gr.Accordion("📋 Instructions & Information", open=False):
|
| 815 |
gr.HTML("""
|
| 816 |
<div style="background: #f8fafc; padding: 20px; border-radius: 10px; border-left: 4px solid #3b82f6;">
|
|
@@ -820,6 +843,7 @@ def create_interface():
|
|
| 820 |
<li><strong>Gene Validation:</strong> Validates extracted sequence as F gene using deep learning</li>
|
| 821 |
<li><strong>Phylogenetic Placement:</strong> Places sequence in reference phylogenetic tree (MAFFT + IQ-TREE)</li>
|
| 822 |
<li><strong>Interactive Tree Analysis:</strong> Creates interactive phylogenetic tree with similar sequences</li>
|
|
|
|
| 823 |
</ol>
|
| 824 |
|
| 825 |
<h3 style="color: #1e40af;">📁 Input Requirements</h3>
|
|
@@ -837,20 +861,17 @@ def create_interface():
|
|
| 837 |
</div>
|
| 838 |
""")
|
| 839 |
|
| 840 |
-
# Main input section
|
| 841 |
with gr.Row():
|
| 842 |
with gr.Column(scale=2):
|
| 843 |
gr.HTML("<h3 style='color: #1e40af; margin-bottom: 10px;'>📝 Sequence Input</h3>")
|
| 844 |
|
| 845 |
-
# Input tabs
|
| 846 |
with gr.Tabs():
|
| 847 |
with gr.TabItem("✍️ Text Input"):
|
| 848 |
dna_input = gr.Textbox(
|
| 849 |
label="DNA Sequence",
|
| 850 |
placeholder="Enter your DNA sequence here (A, T, C, G, N)...",
|
| 851 |
lines=6,
|
| 852 |
-
value=""
|
| 853 |
-
info="Paste your DNA sequence or enter it manually"
|
| 854 |
)
|
| 855 |
|
| 856 |
with gr.TabItem("📁 File Upload"):
|
|
@@ -868,17 +889,14 @@ def create_interface():
|
|
| 868 |
maximum=99.0,
|
| 869 |
value=95.0,
|
| 870 |
step=1.0,
|
| 871 |
-
label="Similarity Threshold (%)"
|
| 872 |
-
info="Minimum similarity for tree analysis"
|
| 873 |
)
|
| 874 |
|
| 875 |
build_ml_tree = gr.Checkbox(
|
| 876 |
label="🌳 Enable Phylogenetic Placement",
|
| 877 |
-
value=False
|
| 878 |
-
info="Requires MAFFT and IQ-TREE (slower but more accurate)"
|
| 879 |
)
|
| 880 |
|
| 881 |
-
# Action buttons
|
| 882 |
with gr.Row():
|
| 883 |
analyze_text_btn = gr.Button(
|
| 884 |
"🚀 Analyze Text Input",
|
|
@@ -891,52 +909,50 @@ def create_interface():
|
|
| 891 |
size="lg"
|
| 892 |
)
|
| 893 |
|
| 894 |
-
# Results section
|
| 895 |
gr.HTML("<hr style='margin: 30px 0; border: none; height: 2px; background: linear-gradient(to right, #3b82f6, #8b5cf6);'>")
|
| 896 |
gr.HTML("<h2 style='color: #1e40af; text-align: center; margin-bottom: 20px;'>📊 Analysis Results</h2>")
|
| 897 |
|
| 898 |
-
# Output tabs
|
| 899 |
with gr.Tabs():
|
| 900 |
with gr.TabItem("🎯 F Gene Extraction"):
|
| 901 |
f_gene_output = gr.Textbox(
|
| 902 |
label="Extracted F Gene Sequence",
|
| 903 |
-
lines=8
|
| 904 |
-
info="Boundary-detected F gene region"
|
| 905 |
)
|
| 906 |
|
| 907 |
with gr.TabItem("✅ Gene Validation"):
|
| 908 |
keras_output = gr.Textbox(
|
| 909 |
label="F Gene Validation Result",
|
| 910 |
-
lines=3
|
| 911 |
-
info="Deep learning validation of F gene"
|
| 912 |
)
|
| 913 |
|
| 914 |
with gr.TabItem("🌳 Phylogenetic Placement"):
|
| 915 |
ml_tree_output = gr.Textbox(
|
| 916 |
label="Phylogenetic Placement Results",
|
| 917 |
-
lines=10
|
| 918 |
-
info="MAFFT alignment + IQ-TREE placement results"
|
| 919 |
)
|
| 920 |
|
| 921 |
with gr.TabItem("🔬 Interactive Tree"):
|
| 922 |
tree_analysis_output = gr.Textbox(
|
| 923 |
label="Tree Analysis Status",
|
| 924 |
-
lines=5
|
| 925 |
-
info="Interactive phylogenetic tree generation"
|
| 926 |
)
|
| 927 |
tree_html_display = gr.HTML(
|
| 928 |
label="Interactive Phylogenetic Tree",
|
| 929 |
value="<div style='text-align: center; color: #6b7280; padding: 40px;'>No tree generated yet. Run analysis to create interactive tree.</div>"
|
| 930 |
)
|
| 931 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 932 |
with gr.TabItem("📋 Summary"):
|
| 933 |
summary_output = gr.Textbox(
|
| 934 |
label="Analysis Summary",
|
| 935 |
-
lines=12
|
| 936 |
-
info="Complete pipeline summary"
|
| 937 |
)
|
| 938 |
|
| 939 |
-
# Download section
|
| 940 |
with gr.Accordion("💾 Download Results", open=False):
|
| 941 |
with gr.Row():
|
| 942 |
alignment_file = gr.File(
|
|
@@ -951,8 +967,11 @@ def create_interface():
|
|
| 951 |
label="🌐 Download Interactive Tree (HTML)",
|
| 952 |
visible=True
|
| 953 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 954 |
|
| 955 |
-
# Footer
|
| 956 |
gr.HTML("""
|
| 957 |
<div style="text-align: center; padding: 20px; margin-top: 30px; border-top: 2px solid #e5e7eb; color: #6b7280;">
|
| 958 |
<p style="margin: 0;">🧬 Advanced Gene Analysis Pipeline | Powered by Deep Learning & Phylogenetics</p>
|
|
@@ -960,7 +979,6 @@ def create_interface():
|
|
| 960 |
</div>
|
| 961 |
""")
|
| 962 |
|
| 963 |
-
# Event handlers
|
| 964 |
analyze_text_btn.click(
|
| 965 |
fn=run_pipeline,
|
| 966 |
inputs=[dna_input, similarity_score, build_ml_tree],
|
|
@@ -973,7 +991,9 @@ def create_interface():
|
|
| 973 |
alignment_file,
|
| 974 |
tree_file,
|
| 975 |
html_tree_file,
|
| 976 |
-
|
|
|
|
|
|
|
| 977 |
]
|
| 978 |
)
|
| 979 |
|
|
@@ -989,12 +1009,13 @@ def create_interface():
|
|
| 989 |
alignment_file,
|
| 990 |
tree_file,
|
| 991 |
html_tree_file,
|
| 992 |
-
|
|
|
|
|
|
|
| 993 |
]
|
| 994 |
)
|
| 995 |
|
| 996 |
return iface
|
| 997 |
-
|
| 998 |
# --- Main Execution ---
|
| 999 |
if __name__ == "__main__":
|
| 1000 |
try:
|
|
|
|
| 466 |
# --- NEW Tree Analysis Function (Using the new analyzer API) ---
|
| 467 |
def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tuple:
|
| 468 |
"""
|
| 469 |
+
Analyze sequence and create phylogenetic tree and detailed report using the new analyzer API
|
| 470 |
|
| 471 |
Args:
|
| 472 |
sequence (str): DNA sequence to analyze
|
| 473 |
matching_percentage (float): Similarity threshold percentage
|
| 474 |
|
| 475 |
Returns:
|
| 476 |
+
tuple: (status_message, tree_html_path, report_html_path)
|
| 477 |
"""
|
| 478 |
try:
|
| 479 |
if not analyzer:
|
| 480 |
+
return "❌ Error: Tree analyzer not initialized. Please check if the CSV data file is available.", None, None
|
| 481 |
|
| 482 |
if not sequence:
|
| 483 |
+
return "❌ Error: Please provide a sequence.", None, None
|
| 484 |
|
| 485 |
if not (1 <= matching_percentage <= 99):
|
| 486 |
+
return "❌ Error: Matching percentage must be between 1 and 99.", None, None
|
| 487 |
|
| 488 |
# Validate inputs
|
| 489 |
sequence = sequence.strip()
|
| 490 |
if len(sequence) < 10:
|
| 491 |
+
return "❌ Error: Invalid or missing sequence. Must be ≥10 nucleotides.", None, None
|
| 492 |
|
| 493 |
# Find query sequence
|
| 494 |
if not analyzer.find_query_sequence(sequence):
|
| 495 |
+
return "❌ Error: Sequence not accepted.", None, None
|
| 496 |
|
| 497 |
# Find similar sequences
|
| 498 |
matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
|
| 499 |
|
| 500 |
if not matched_ids:
|
| 501 |
+
return f"❌ Error: No similar sequences found at {matching_percentage}% similarity threshold.", None, None
|
| 502 |
|
| 503 |
logging.info(f"Found {len(matched_ids)} similar sequences at {actual_percentage:.2f}% similarity")
|
| 504 |
|
|
|
|
| 508 |
# Create interactive tree
|
| 509 |
fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
|
| 510 |
|
| 511 |
+
# Save tree to temporary file
|
| 512 |
temp_dir = tempfile.gettempdir()
|
| 513 |
+
query_id = analyzer.query_id or f"query_{int(time.time())}"
|
| 514 |
+
tree_html_path = os.path.join(temp_dir, f'phylogenetic_tree_interactive_{query_id}.html')
|
| 515 |
+
fig.write_html(tree_html_path)
|
| 516 |
+
|
| 517 |
+
# Generate and save detailed report
|
| 518 |
+
report_html_content = analyzer.generate_detailed_report(matched_ids, actual_percentage)
|
| 519 |
+
report_html_path = os.path.join(temp_dir, f'detailed_report_{query_id}.html')
|
| 520 |
+
with open(report_html_path, 'w', encoding='utf-8') as f:
|
| 521 |
+
f.write(report_html_content)
|
| 522 |
|
| 523 |
success_msg = f"✅ Analysis complete! Found {len(matched_ids)} similar sequences with {actual_percentage:.2f}% average similarity."
|
| 524 |
|
| 525 |
+
return success_msg, tree_html_path, report_html_path
|
| 526 |
|
| 527 |
except Exception as e:
|
| 528 |
error_msg = f"❌ Error during analysis: {str(e)}"
|
| 529 |
logging.error(error_msg)
|
| 530 |
import traceback
|
| 531 |
logging.error(f"Full traceback: {traceback.format_exc()}")
|
| 532 |
+
return error_msg, None, None
|
|
|
|
| 533 |
# --- Keras Prediction ---
|
| 534 |
def predict_with_keras(sequence):
|
| 535 |
try:
|
|
|
|
| 583 |
try:
|
| 584 |
dna_input = read_fasta_file(fasta_file_obj)
|
| 585 |
if not dna_input:
|
| 586 |
+
return "Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input sequence", "No input sequence"
|
| 587 |
return run_pipeline(dna_input, similarity_score, build_ml_tree)
|
| 588 |
except Exception as e:
|
| 589 |
error_msg = f"Pipeline error: {str(e)}"
|
| 590 |
logging.error(error_msg)
|
| 591 |
+
return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
|
| 592 |
|
| 593 |
def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
|
| 594 |
try:
|
| 595 |
# Clean input
|
| 596 |
dna_input = dna_input.upper().strip()
|
| 597 |
if not dna_input:
|
| 598 |
+
return "Empty input", "", "", "", "", None, None, None, None, "No input provided"
|
| 599 |
|
| 600 |
# Sanitize DNA sequence
|
| 601 |
if not re.match('^[ACTGN]+$', dna_input):
|
|
|
|
| 608 |
|
| 609 |
if boundary_model:
|
| 610 |
try:
|
| 611 |
+
result = boundary_model.predict_sequence(dna_input)
|
| 612 |
predictions = result['predictions']
|
| 613 |
probs = result['probabilities']['gene']
|
| 614 |
confidence = result['confidence']
|
|
|
|
| 634 |
keras_output = ""
|
| 635 |
if processed_sequence and len(processed_sequence) >= 6:
|
| 636 |
keras_prediction = predict_with_keras(processed_sequence)
|
|
|
|
| 637 |
keras_output = keras_prediction
|
| 638 |
else:
|
| 639 |
keras_output = "Skipped: sequence too short for F gene validation"
|
| 640 |
|
| 641 |
+
# Step 3: Maximum Likelihood Tree (Phylogenetic Placement)
|
| 642 |
aligned_file = None
|
| 643 |
phy_file = None
|
| 644 |
ml_tree_output = ""
|
|
|
|
| 653 |
aligned_file = ml_aligned
|
| 654 |
phy_file = ml_tree
|
| 655 |
else:
|
| 656 |
+
ml_tree_output = ml_message
|
| 657 |
|
| 658 |
except Exception as e:
|
| 659 |
ml_tree_output = f"❌ Phylogenetic placement failed: {str(e)}"
|
|
|
|
| 663 |
else:
|
| 664 |
ml_tree_output = "Phylogenetic placement skipped (not requested)"
|
| 665 |
|
| 666 |
+
# Step 4: Simplified Tree Analysis
|
| 667 |
+
tree_html_file = None
|
| 668 |
+
report_html_file = None
|
| 669 |
tree_html_content = "No tree generated"
|
| 670 |
+
report_html_content = "No report generated"
|
| 671 |
simplified_ml_output = ""
|
| 672 |
|
| 673 |
if analyzer and processed_sequence and len(processed_sequence) >= 10:
|
| 674 |
try:
|
| 675 |
logging.info(f"Starting simplified ML tree analysis with F gene sequence length: {len(processed_sequence)}")
|
| 676 |
|
| 677 |
+
# Updated call to analyze_sequence_for_tree
|
| 678 |
+
tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
|
| 679 |
|
| 680 |
+
if tree_html_path and os.path.exists(tree_html_path):
|
| 681 |
+
# Copy tree HTML to output directory
|
| 682 |
output_dir = "output"
|
| 683 |
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
|
|
|
| 684 |
safe_seq_name = re.sub(r'[^a-zA-Z0-9_-]', '', processed_sequence[:20])
|
| 685 |
timestamp = str(int(time.time()))
|
| 686 |
+
tree_html_filename = f"tree_{safe_seq_name}_{timestamp}.html"
|
| 687 |
+
tree_html_final_path = os.path.join(output_dir, tree_html_filename)
|
| 688 |
+
shutil.copy2(tree_html_path, tree_html_final_path)
|
| 689 |
+
tree_html_file = tree_html_final_path
|
|
|
|
|
|
|
| 690 |
|
| 691 |
+
# Read tree HTML content for display
|
| 692 |
+
with open(tree_html_path, 'r', encoding='utf-8') as f:
|
| 693 |
tree_html_content = f.read()
|
| 694 |
|
| 695 |
+
# Clean up temporary tree file
|
| 696 |
+
try:
|
| 697 |
+
os.unlink(tree_html_path)
|
| 698 |
+
except:
|
| 699 |
+
pass
|
| 700 |
+
|
| 701 |
+
if report_html_path and os.path.exists(report_html_path):
|
| 702 |
+
# Copy report HTML to output directory
|
| 703 |
+
report_html_filename = f"report_{safe_seq_name}_{timestamp}.html"
|
| 704 |
+
report_html_final_path = os.path.join(output_dir, report_html_filename)
|
| 705 |
+
shutil.copy2(report_html_path, report_html_final_path)
|
| 706 |
+
report_html_file = report_html_final_path
|
| 707 |
+
|
| 708 |
+
# Read report HTML content for display
|
| 709 |
+
with open(report_html_path, 'r', encoding='utf-8') as f:
|
| 710 |
+
report_html_content = f.read()
|
| 711 |
|
| 712 |
+
# Clean up temporary report file
|
| 713 |
try:
|
| 714 |
+
os.unlink(report_html_path)
|
| 715 |
except:
|
| 716 |
pass
|
| 717 |
+
|
| 718 |
+
simplified_ml_output = tree_result
|
| 719 |
+
if not tree_html_file:
|
| 720 |
tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
|
| 721 |
+
if not report_html_file:
|
| 722 |
+
report_html_content = f"<div style='color: red;'>{tree_result}</div>"
|
| 723 |
+
|
| 724 |
+
logging.info(f"Tree analysis completed successfully: {tree_html_filename}")
|
| 725 |
+
|
| 726 |
except Exception as e:
|
| 727 |
error_msg = f"❌ Tree analysis failed: {str(e)}"
|
| 728 |
simplified_ml_output = error_msg
|
| 729 |
tree_html_content = f"<div style='color: red;'>{error_msg}</div>"
|
| 730 |
+
report_html_content = f"<div style='color: red;'>{error_msg}</div>"
|
| 731 |
logging.error(f"Tree analysis failed: {e}")
|
| 732 |
else:
|
| 733 |
if not analyzer:
|
|
|
|
| 738 |
simplified_ml_output = "❌ No processed sequence available for tree analysis"
|
| 739 |
|
| 740 |
tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
|
| 741 |
+
report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
|
| 742 |
|
| 743 |
# Final summary
|
| 744 |
summary_output = f"""
|
|
|
|
| 749 |
✅ F GENE VALIDATION: {keras_output}
|
| 750 |
🌳 PHYLOGENETIC PLACEMENT: {'✅ Completed' if 'successfully' in ml_tree_output else '❌ ' + ('Skipped' if 'skipped' in ml_tree_output else 'Failed')}
|
| 751 |
🔬 TREE ANALYSIS: {'✅ Completed' if '✅' in simplified_ml_output else '❌ ' + ('Not available' if 'not available' in simplified_ml_output else 'Failed')}
|
| 752 |
+
📝 DETAILED REPORT: {'✅ Generated' if report_html_file else '❌ Not generated'}
|
| 753 |
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
| 754 |
"""
|
| 755 |
|
| 756 |
return (
|
| 757 |
+
boundary_output,
|
| 758 |
+
keras_output,
|
| 759 |
+
ml_tree_output,
|
| 760 |
+
simplified_ml_output,
|
| 761 |
+
summary_output,
|
| 762 |
+
aligned_file,
|
| 763 |
+
phy_file,
|
| 764 |
+
tree_html_file,
|
| 765 |
+
report_html_file,
|
| 766 |
+
tree_html_content,
|
| 767 |
+
report_html_content
|
| 768 |
)
|
| 769 |
|
| 770 |
except Exception as e:
|
|
|
|
| 772 |
logging.error(error_msg)
|
| 773 |
import traceback
|
| 774 |
logging.error(f"Full traceback: {traceback.format_exc()}")
|
| 775 |
+
return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
|
| 776 |
|
| 777 |
|
| 778 |
# --- Gradio Interface ---
|
| 779 |
def create_interface():
|
| 780 |
"""Create and configure the Gradio interface"""
|
| 781 |
|
|
|
|
| 782 |
custom_css = """
|
| 783 |
.gradio-container {
|
| 784 |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
|
|
|
|
| 827 |
|
| 828 |
with gr.Blocks(css=custom_css, title="🧬 Advanced Gene Analysis Pipeline", theme=gr.themes.Soft()) as iface:
|
| 829 |
|
|
|
|
| 830 |
gr.HTML("""
|
| 831 |
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; margin-bottom: 20px;">
|
| 832 |
<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: 700;">🧬 Advanced Gene Analysis Pipeline</h1>
|
|
|
|
| 834 |
</div>
|
| 835 |
""")
|
| 836 |
|
|
|
|
| 837 |
with gr.Accordion("📋 Instructions & Information", open=False):
|
| 838 |
gr.HTML("""
|
| 839 |
<div style="background: #f8fafc; padding: 20px; border-radius: 10px; border-left: 4px solid #3b82f6;">
|
|
|
|
| 843 |
<li><strong>Gene Validation:</strong> Validates extracted sequence as F gene using deep learning</li>
|
| 844 |
<li><strong>Phylogenetic Placement:</strong> Places sequence in reference phylogenetic tree (MAFFT + IQ-TREE)</li>
|
| 845 |
<li><strong>Interactive Tree Analysis:</strong> Creates interactive phylogenetic tree with similar sequences</li>
|
| 846 |
+
<li><strong>Detailed Report:</strong> Provides comprehensive analysis details</li>
|
| 847 |
</ol>
|
| 848 |
|
| 849 |
<h3 style="color: #1e40af;">📁 Input Requirements</h3>
|
|
|
|
| 861 |
</div>
|
| 862 |
""")
|
| 863 |
|
|
|
|
| 864 |
with gr.Row():
|
| 865 |
with gr.Column(scale=2):
|
| 866 |
gr.HTML("<h3 style='color: #1e40af; margin-bottom: 10px;'>📝 Sequence Input</h3>")
|
| 867 |
|
|
|
|
| 868 |
with gr.Tabs():
|
| 869 |
with gr.TabItem("✍️ Text Input"):
|
| 870 |
dna_input = gr.Textbox(
|
| 871 |
label="DNA Sequence",
|
| 872 |
placeholder="Enter your DNA sequence here (A, T, C, G, N)...",
|
| 873 |
lines=6,
|
| 874 |
+
value=""
|
|
|
|
| 875 |
)
|
| 876 |
|
| 877 |
with gr.TabItem("📁 File Upload"):
|
|
|
|
| 889 |
maximum=99.0,
|
| 890 |
value=95.0,
|
| 891 |
step=1.0,
|
| 892 |
+
label="Similarity Threshold (%)"
|
|
|
|
| 893 |
)
|
| 894 |
|
| 895 |
build_ml_tree = gr.Checkbox(
|
| 896 |
label="🌳 Enable Phylogenetic Placement",
|
| 897 |
+
value=False
|
|
|
|
| 898 |
)
|
| 899 |
|
|
|
|
| 900 |
with gr.Row():
|
| 901 |
analyze_text_btn = gr.Button(
|
| 902 |
"🚀 Analyze Text Input",
|
|
|
|
| 909 |
size="lg"
|
| 910 |
)
|
| 911 |
|
|
|
|
| 912 |
gr.HTML("<hr style='margin: 30px 0; border: none; height: 2px; background: linear-gradient(to right, #3b82f6, #8b5cf6);'>")
|
| 913 |
gr.HTML("<h2 style='color: #1e40af; text-align: center; margin-bottom: 20px;'>📊 Analysis Results</h2>")
|
| 914 |
|
|
|
|
| 915 |
with gr.Tabs():
|
| 916 |
with gr.TabItem("🎯 F Gene Extraction"):
|
| 917 |
f_gene_output = gr.Textbox(
|
| 918 |
label="Extracted F Gene Sequence",
|
| 919 |
+
lines=8
|
|
|
|
| 920 |
)
|
| 921 |
|
| 922 |
with gr.TabItem("✅ Gene Validation"):
|
| 923 |
keras_output = gr.Textbox(
|
| 924 |
label="F Gene Validation Result",
|
| 925 |
+
lines=3
|
|
|
|
| 926 |
)
|
| 927 |
|
| 928 |
with gr.TabItem("🌳 Phylogenetic Placement"):
|
| 929 |
ml_tree_output = gr.Textbox(
|
| 930 |
label="Phylogenetic Placement Results",
|
| 931 |
+
lines=10
|
|
|
|
| 932 |
)
|
| 933 |
|
| 934 |
with gr.TabItem("🔬 Interactive Tree"):
|
| 935 |
tree_analysis_output = gr.Textbox(
|
| 936 |
label="Tree Analysis Status",
|
| 937 |
+
lines=5
|
|
|
|
| 938 |
)
|
| 939 |
tree_html_display = gr.HTML(
|
| 940 |
label="Interactive Phylogenetic Tree",
|
| 941 |
value="<div style='text-align: center; color: #6b7280; padding: 40px;'>No tree generated yet. Run analysis to create interactive tree.</div>"
|
| 942 |
)
|
| 943 |
|
| 944 |
+
with gr.TabItem("📝 Detailed Report"):
|
| 945 |
+
report_html_display = gr.HTML(
|
| 946 |
+
label="Detailed Analysis Report",
|
| 947 |
+
value="<div style='text-align: center; color: #6b7280; padding: 40px;'>No report generated yet. Run analysis to create detailed report.</div>"
|
| 948 |
+
)
|
| 949 |
+
|
| 950 |
with gr.TabItem("📋 Summary"):
|
| 951 |
summary_output = gr.Textbox(
|
| 952 |
label="Analysis Summary",
|
| 953 |
+
lines=12
|
|
|
|
| 954 |
)
|
| 955 |
|
|
|
|
| 956 |
with gr.Accordion("💾 Download Results", open=False):
|
| 957 |
with gr.Row():
|
| 958 |
alignment_file = gr.File(
|
|
|
|
| 967 |
label="🌐 Download Interactive Tree (HTML)",
|
| 968 |
visible=True
|
| 969 |
)
|
| 970 |
+
report_file = gr.File(
|
| 971 |
+
label="📝 Download Detailed Report (HTML)",
|
| 972 |
+
visible=True
|
| 973 |
+
)
|
| 974 |
|
|
|
|
| 975 |
gr.HTML("""
|
| 976 |
<div style="text-align: center; padding: 20px; margin-top: 30px; border-top: 2px solid #e5e7eb; color: #6b7280;">
|
| 977 |
<p style="margin: 0;">🧬 Advanced Gene Analysis Pipeline | Powered by Deep Learning & Phylogenetics</p>
|
|
|
|
| 979 |
</div>
|
| 980 |
""")
|
| 981 |
|
|
|
|
| 982 |
analyze_text_btn.click(
|
| 983 |
fn=run_pipeline,
|
| 984 |
inputs=[dna_input, similarity_score, build_ml_tree],
|
|
|
|
| 991 |
alignment_file,
|
| 992 |
tree_file,
|
| 993 |
html_tree_file,
|
| 994 |
+
report_file,
|
| 995 |
+
tree_html_display,
|
| 996 |
+
report_html_display
|
| 997 |
]
|
| 998 |
)
|
| 999 |
|
|
|
|
| 1009 |
alignment_file,
|
| 1010 |
tree_file,
|
| 1011 |
html_tree_file,
|
| 1012 |
+
report_file,
|
| 1013 |
+
tree_html_display,
|
| 1014 |
+
report_html_display
|
| 1015 |
]
|
| 1016 |
)
|
| 1017 |
|
| 1018 |
return iface
|
|
|
|
| 1019 |
# --- Main Execution ---
|
| 1020 |
if __name__ == "__main__":
|
| 1021 |
try:
|