Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 12, 2025

Commit

b03fbbd

verified ·

1 Parent(s): 4af0cff

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -80

app.py CHANGED Viewed

@@ -9,7 +9,6 @@ import logging
 import numpy as np
 from predictor import EnhancedGenePredictor
 from tensorflow.keras.models import load_model
-# Import the new analyzer
 from analyzer import PhylogeneticTreeAnalyzer
 import tempfile
 import shutil
@@ -23,6 +22,21 @@ from Bio.SeqRecord import SeqRecord
 import stat
 import time
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft")  # Updated path
@@ -156,16 +170,15 @@ def check_tool_availability():
     mafft_available = False
     mafft_cmd = None
-    # Updated MAFFT candidates list based on your new API
     mafft_candidates = [
-        MAFFT_PATH,  # Primary path from your new API
         os.path.join(BASE_DIR, "binaries", "mafft", "mafft"),
-        os.path.join(BASE_DIR, "binaries", "mafft", "mafft.bat"),  # Windows fallback
         'mafft',
         '/usr/bin/mafft',
         '/usr/local/bin/mafft',
         os.path.join(BASE_DIR, "binaries", "mafft", "mafftdir", "bin", "mafft"),
-        # Add potential conda/miniconda paths
         os.path.expanduser("~/anaconda3/bin/mafft"),
         os.path.expanduser("~/miniconda3/bin/mafft"),
         "/opt/conda/bin/mafft",
@@ -176,9 +189,7 @@ def check_tool_availability():
         if not candidate:
             continue
-        # First check if file exists or is in PATH
         if os.path.exists(candidate) or shutil.which(candidate):
-            # Now test actual execution
             try:
                 test_cmd = [candidate, "--help"]
                 result = subprocess.run(
@@ -196,13 +207,13 @@ def check_tool_availability():
                 logging.debug(f"MAFFT test failed for {candidate}: {e}")
                 continue
-    # Check IQ-TREE with similar approach
     iqtree_available = False
     iqtree_cmd = None
     # Updated IQ-TREE candidates list
     iqtree_candidates = [
-        IQTREE_PATH,  # Primary path from your new API
         'iqtree2',
         'iqtree',
         'iqtree3',
@@ -210,11 +221,10 @@ def check_tool_availability():
         '/usr/local/bin/iqtree2',
         '/usr/bin/iqtree',
         '/usr/local/bin/iqtree',
-        'iqtree2.exe',  # Windows
-        'iqtree.exe',   # Windows
-        'iqtree3.exe',  # Windows
         os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree2"),
-        # Add potential conda paths
         os.path.expanduser("~/anaconda3/bin/iqtree2"),
         os.path.expanduser("~/miniconda3/bin/iqtree2"),
         "/opt/conda/bin/iqtree2",
@@ -303,7 +313,7 @@ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
         if not os.path.exists(TREE_PATH):
             return False, f"Reference tree not found: {TREE_PATH}", None, None
-        # Save query sequence as FASTA (improved error handling)
         try:
             query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
             SeqIO.write([query_record], query_fasta, "fasta")
@@ -311,7 +321,7 @@ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
         except Exception as e:
             return False, f"Error writing query sequence: {e}", None, None
-        # Step 1: Add query sequence to reference alignment using MAFFT (improved approach)
         logging.info("Adding query sequence to reference alignment...")
         try:
             with open(aligned_with_query, "w") as output_file:
@@ -319,7 +329,6 @@ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
                     mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
                 ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
-            # Verify alignment file was created and is not empty
             if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
                 return False, "MAFFT alignment failed: output file is empty", None, None
@@ -335,7 +344,7 @@ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
         except Exception as e:
             return False, f"MAFFT execution error: {e}", None, None
-        # Step 2: Place sequence in phylogenetic tree using IQ-TREE (improved approach)
         logging.info("Placing sequence in phylogenetic tree...")
         try:
             iqtree_result = subprocess.run([
@@ -385,7 +394,6 @@ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
         logging.error(f"Phylogenetic placement failed: {e}")
         return False, f"Phylogenetic placement failed: {str(e)}", None, None
     finally:
-        # Clean up temporary query file
         if 'query_fasta' in locals() and os.path.exists(query_fasta):
             try:
                 os.unlink(query_fasta)
@@ -397,10 +405,8 @@ def build_maximum_likelihood_tree(f_gene_sequence):
     Build maximum likelihood phylogenetic tree using the improved phylogenetic placement approach.
     """
     try:
-        # Check tool availability with enhanced detection
         mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
-        # Prepare status message
         status_msg = "🔍 Checking dependencies...\n"
         if not mafft_available:
@@ -413,7 +419,6 @@ def build_maximum_likelihood_tree(f_gene_sequence):
         else:
             status_msg += f"✅ IQ-TREE found and tested: {iqtree_cmd}\n"
-        # Check for reference files
         if not os.path.exists(ALIGNMENT_PATH):
             status_msg += f"❌ Reference alignment not found: {ALIGNMENT_PATH}\n"
         else:
@@ -424,7 +429,6 @@ def build_maximum_likelihood_tree(f_gene_sequence):
         else:
             status_msg += f"✅ Reference tree found\n"
-        # If any required component is missing, provide installation guide
         if not mafft_available or not iqtree_available:
             guide = install_dependencies_guide()
             return False, f"{status_msg}\n{guide}", None, None
@@ -434,7 +438,6 @@ def build_maximum_likelihood_tree(f_gene_sequence):
             status_msg += "Please ensure f_gene_sequences_aligned.fasta and f_gene_sequences.phy.treefile are available."
             return False, status_msg, None, None
-        # Perform phylogenetic placement using improved method
         logging.info("Starting phylogenetic placement...")
         placement_success, placement_message, aligned_file, tree_file = phylogenetic_placement(
             f_gene_sequence, mafft_cmd, iqtree_cmd
@@ -443,7 +446,6 @@ def build_maximum_likelihood_tree(f_gene_sequence):
         if placement_success:
             final_message = f"{status_msg}\n{placement_message}"
-            # Copy files to standard locations for compatibility
             if aligned_file and os.path.exists(aligned_file):
                 standard_aligned = "query_with_references_aligned.fasta"
                 shutil.copy2(aligned_file, standard_aligned)
@@ -463,19 +465,9 @@ def build_maximum_likelihood_tree(f_gene_sequence):
         logging.error(f"ML tree construction failed: {e}")
         return False, f"ML tree construction failed: {str(e)}", None, None
-# --- NEW Tree Analysis Function (Using the new analyzer API) ---
-# Replace this part in your analyze_sequence_for_tree function:
 def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tuple:
     """
     Analyze sequence and create phylogenetic tree and detailed report using the new analyzer API
-    Args:
-        sequence (str): DNA sequence to analyze
-        matching_percentage (float): Similarity threshold percentage
-    Returns:
-        tuple: (status_message, tree_html_path, report_html_path)
     """
     try:
         if not analyzer:
@@ -487,16 +479,13 @@ def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tupl
         if not (1 <= matching_percentage <= 99):
             return "❌ Error: Matching percentage must be between 1 and 99.", None, None
-        # Validate inputs
         sequence = sequence.strip()
         if len(sequence) < 10:
             return "❌ Error: Invalid or missing sequence. Must be ≥10 nucleotides.", None, None
-        # Find query sequence
         if not analyzer.find_query_sequence(sequence):
             return "❌ Error: Sequence not accepted.", None, None
-        # Find similar sequences
         matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
         if not matched_ids:
@@ -504,22 +493,17 @@ def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tupl
         logging.info(f"Found {len(matched_ids)} similar sequences at {actual_percentage:.2f}% similarity")
-        # Build tree structure
         analyzer.build_tree_structure_with_ml_safe(matched_ids)
-        # Create interactive tree
         fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
-        # Save tree to temporary file
         temp_dir = tempfile.gettempdir()
         query_id = analyzer.query_id or f"query_{int(time.time())}"
         tree_html_path = os.path.join(temp_dir, f'phylogenetic_tree_interactive_{query_id}.html')
         fig.write_html(tree_html_path)
-        # Ensure the analyzer has the correct user input threshold for the report
         analyzer.matching_percentage = matching_percentage
-        # Generate detailed report - FIXED: Only pass the two required parameters
         report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
         report_html_path = None
         if report_success:
@@ -540,7 +524,8 @@ def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tupl
         logging.error(error_msg)
         import traceback
         logging.error(f"Full traceback: {traceback.format_exc()}")
-        return error_msg, None, None# --- Keras Prediction ---
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
@@ -549,32 +534,26 @@ def predict_with_keras(sequence):
         if len(sequence) < 6:
             return "Skipped: sequence too short for F gene validation (minimum 6 nucleotides required)."
-        # Generate k-mers
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
-        # Prepare input
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
-        # Assume the last value is the F gene probability (adjust index if model outputs differ)
-        f_gene_prob = prediction[-1]  # Take the probability of the F gene class
-        # Convert to percentage with a buffer (e.g., add 5% to account for minor mismatches)
-        percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))  # Ensure 0-100% range
         return f"{percentage}% F gene"
     except Exception as e:
         logging.error(f"Keras prediction failed: {e}")
         return f"Keras prediction failed: {str(e)}"
-# --- FASTA Reader ---
 def read_fasta_file(file_obj):
     try:
         if file_obj is None:
             return ""
-        # Handle file object
         if hasattr(file_obj, 'name'):
             with open(file_obj.name, "r") as f:
                 content = f.read()
@@ -588,7 +567,21 @@ def read_fasta_file(file_obj):
         logging.error(f"Failed to read FASTA file: {e}")
         return ""
-# --- Full Pipeline ---
 def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
     try:
         dna_input = read_fasta_file(fasta_file_obj)
@@ -602,17 +595,14 @@ def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
 def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
-        # Clean input
         dna_input = dna_input.upper().strip()
         if not dna_input:
             return "Empty input", "", "", "", "", None, None, None, None, "No input provided"
-        # Sanitize DNA sequence
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
             logging.info("DNA sequence sanitized")
-        # Step 1: Boundary Prediction - Extract F gene sequence
         processed_sequence = dna_input
         boundary_output = ""
@@ -640,7 +630,6 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
             boundary_output = f"Boundary model not available. Using original input: {len(dna_input)} bp"
             processed_sequence = dna_input
-        # Step 2: Keras Prediction (F gene validation)
         keras_output = ""
         if processed_sequence and len(processed_sequence) >= 6:
             keras_prediction = predict_with_keras(processed_sequence)
@@ -648,7 +637,6 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
         else:
             keras_output = "Skipped: sequence too short for F gene validation"
-        # Step 3: Maximum Likelihood Tree (Phylogenetic Placement)
         aligned_file = None
         phy_file = None
         ml_tree_output = ""
@@ -673,7 +661,6 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
         else:
             ml_tree_output = "Phylogenetic placement skipped (not requested)"
-        # Step 4: Simplified Tree Analysis
         tree_html_file = None
         report_html_file = None
         tree_html_content = "No tree generated"
@@ -684,11 +671,9 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
             try:
                 logging.info(f"Starting simplified ML tree analysis with F gene sequence length: {len(processed_sequence)}")
-                # Updated call to analyze_sequence_for_tree
                 tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
                 if tree_html_path and os.path.exists(tree_html_path):
-                    # Copy tree HTML to output directory
                     output_dir = "output"
                     os.makedirs(output_dir, exist_ok=True)
                     safe_seq_name = re.sub(r'[^a-zA-Z0-9_-]', '', processed_sequence[:20])
@@ -698,28 +683,23 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
                     shutil.copy2(tree_html_path, tree_html_final_path)
                     tree_html_file = tree_html_final_path
-                    # Read tree HTML content for display
                     with open(tree_html_path, 'r', encoding='utf-8') as f:
                         tree_html_content = f.read()
-                    # Clean up temporary tree file
                     try:
                         os.unlink(tree_html_path)
                     except:
                         pass
                 if report_html_path and os.path.exists(report_html_path):
-                    # Copy report HTML to output directory
                     report_html_filename = f"report_{safe_seq_name}_{timestamp}.html"
                     report_html_final_path = os.path.join(output_dir, report_html_filename)
                     shutil.copy2(report_html_path, report_html_final_path)
                     report_html_file = report_html_final_path
-                    # Read report HTML content for display
                     with open(report_html_path, 'r', encoding='utf-8') as f:
                         report_html_content = f.read()
-                    # Clean up temporary report file
                     try:
                         os.unlink(report_html_path)
                     except:
@@ -750,7 +730,6 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
             tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
             report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
-        # Final summary
         summary_output = f"""
 🧬 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -784,9 +763,82 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
         logging.error(f"Full traceback: {traceback.format_exc()}")
         return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
-# --- Gradio Interface ---
-def create_interface():
     """Create and configure the Gradio interface"""
     custom_css = """
@@ -1026,10 +1078,15 @@ def create_interface():
         )
     return iface
 # --- Main Execution ---
 if __name__ == "__main__":
     try:
-        # Print startup information
         print("🧬 Advanced Gene Analysis Pipeline")
         print("=" * 50)
         print(f"Base Directory: {BASE_DIR}")
@@ -1037,24 +1094,23 @@ if __name__ == "__main__":
         print(f"Keras Model: {'✅ Loaded' if keras_model else '❌ Not Available'}")
         print(f"Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Not Available'}")
-        # Check tool availability
         mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
         print(f"MAFFT: {'✅ Available' if mafft_available else '❌ Not Found'}")
         print(f"IQ-TREE: {'✅ Available' if iqtree_available else '❌ Not Found'}")
         if not mafft_available or not iqtree_available:
-            print("\n⚠️  Warning: Some phylogenetic tools are missing!")
             print("Install with: conda install -c bioconda mafft iqtree")
-        print("\n🚀 Starting Gradio interface...")
-        # Create and launch interface
-        iface = create_interface()
-        iface.launch(
-            share=True,            # Set to True if you want to create a public link
-            server_name="0.0.0.0",  # Allow connections from any IP
-            server_port=7860,       # Default Gradio port
-            show_error=True,         # Show errors in the interface
         )
     except Exception as e:
@@ -1062,4 +1118,4 @@ if __name__ == "__main__":
         import traceback
         print(f"Error: {e}")
         print(f"Traceback: {traceback.format_exc()}")
-        sys.exit(1)

 import numpy as np
 from predictor import EnhancedGenePredictor
 from tensorflow.keras.models import load_model
 from analyzer import PhylogeneticTreeAnalyzer
 import tempfile
 import shutil
 import stat
 import time
+# FastAPI imports
+from fastapi import FastAPI, File, UploadFile, HTTPException, Form
+from fastapi.responses import HTMLResponse, FileResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+from typing import Optional
+import uvicorn
+# --- FastAPI App Setup ---
+app = FastAPI(
+    title="🧬 Advanced Gene Analysis Pipeline",
+    description="F Gene Boundary Detection • Validation • Phylogenetic Analysis",
+    version="1.0.0"
+)
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft")  # Updated path
     mafft_available = False
     mafft_cmd = None
+    # Updated MAFFT candidates list
     mafft_candidates = [
+        MAFFT_PATH,
         os.path.join(BASE_DIR, "binaries", "mafft", "mafft"),
+        os.path.join(BASE_DIR, "binaries", "mafft", "mafft.bat"),
         'mafft',
         '/usr/bin/mafft',
         '/usr/local/bin/mafft',
         os.path.join(BASE_DIR, "binaries", "mafft", "mafftdir", "bin", "mafft"),
         os.path.expanduser("~/anaconda3/bin/mafft"),
         os.path.expanduser("~/miniconda3/bin/mafft"),
         "/opt/conda/bin/mafft",
         if not candidate:
             continue
         if os.path.exists(candidate) or shutil.which(candidate):
             try:
                 test_cmd = [candidate, "--help"]
                 result = subprocess.run(
                 logging.debug(f"MAFFT test failed for {candidate}: {e}")
                 continue
+    # Check IQ-TREE
     iqtree_available = False
     iqtree_cmd = None
     # Updated IQ-TREE candidates list
     iqtree_candidates = [
+        IQTREE_PATH,
         'iqtree2',
         'iqtree',
         'iqtree3',
         '/usr/local/bin/iqtree2',
         '/usr/bin/iqtree',
         '/usr/local/bin/iqtree',
+        'iqtree2.exe',
+        'iqtree.exe',
+        'iqtree3.exe',
         os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree2"),
         os.path.expanduser("~/anaconda3/bin/iqtree2"),
         os.path.expanduser("~/miniconda3/bin/iqtree2"),
         "/opt/conda/bin/iqtree2",
         if not os.path.exists(TREE_PATH):
             return False, f"Reference tree not found: {TREE_PATH}", None, None
+        # Save query sequence as FASTA
         try:
             query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
             SeqIO.write([query_record], query_fasta, "fasta")
         except Exception as e:
             return False, f"Error writing query sequence: {e}", None, None
+        # Step 1: Add query sequence to reference alignment using MAFFT
         logging.info("Adding query sequence to reference alignment...")
         try:
             with open(aligned_with_query, "w") as output_file:
                     mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
                 ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
             if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
                 return False, "MAFFT alignment failed: output file is empty", None, None
         except Exception as e:
             return False, f"MAFFT execution error: {e}", None, None
+        # Step 2: Place sequence in phylogenetic tree using IQ-TREE
         logging.info("Placing sequence in phylogenetic tree...")
         try:
             iqtree_result = subprocess.run([
         logging.error(f"Phylogenetic placement failed: {e}")
         return False, f"Phylogenetic placement failed: {str(e)}", None, None
     finally:
         if 'query_fasta' in locals() and os.path.exists(query_fasta):
             try:
                 os.unlink(query_fasta)
     Build maximum likelihood phylogenetic tree using the improved phylogenetic placement approach.
     """
     try:
         mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
         status_msg = "🔍 Checking dependencies...\n"
         if not mafft_available:
         else:
             status_msg += f"✅ IQ-TREE found and tested: {iqtree_cmd}\n"
         if not os.path.exists(ALIGNMENT_PATH):
             status_msg += f"❌ Reference alignment not found: {ALIGNMENT_PATH}\n"
         else:
         else:
             status_msg += f"✅ Reference tree found\n"
         if not mafft_available or not iqtree_available:
             guide = install_dependencies_guide()
             return False, f"{status_msg}\n{guide}", None, None
             status_msg += "Please ensure f_gene_sequences_aligned.fasta and f_gene_sequences.phy.treefile are available."
             return False, status_msg, None, None
         logging.info("Starting phylogenetic placement...")
         placement_success, placement_message, aligned_file, tree_file = phylogenetic_placement(
             f_gene_sequence, mafft_cmd, iqtree_cmd
         if placement_success:
             final_message = f"{status_msg}\n{placement_message}"
             if aligned_file and os.path.exists(aligned_file):
                 standard_aligned = "query_with_references_aligned.fasta"
                 shutil.copy2(aligned_file, standard_aligned)
         logging.error(f"ML tree construction failed: {e}")
         return False, f"ML tree construction failed: {str(e)}", None, None
 def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tuple:
     """
     Analyze sequence and create phylogenetic tree and detailed report using the new analyzer API
     """
     try:
         if not analyzer:
         if not (1 <= matching_percentage <= 99):
             return "❌ Error: Matching percentage must be between 1 and 99.", None, None
         sequence = sequence.strip()
         if len(sequence) < 10:
             return "❌ Error: Invalid or missing sequence. Must be ≥10 nucleotides.", None, None
         if not analyzer.find_query_sequence(sequence):
             return "❌ Error: Sequence not accepted.", None, None
         matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
         if not matched_ids:
         logging.info(f"Found {len(matched_ids)} similar sequences at {actual_percentage:.2f}% similarity")
         analyzer.build_tree_structure_with_ml_safe(matched_ids)
         fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
         temp_dir = tempfile.gettempdir()
         query_id = analyzer.query_id or f"query_{int(time.time())}"
         tree_html_path = os.path.join(temp_dir, f'phylogenetic_tree_interactive_{query_id}.html')
         fig.write_html(tree_html_path)
         analyzer.matching_percentage = matching_percentage
         report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
         report_html_path = None
         if report_success:
         logging.error(error_msg)
         import traceback
         logging.error(f"Full traceback: {traceback.format_exc()}")
+        return error_msg, None, None
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
         if len(sequence) < 6:
             return "Skipped: sequence too short for F gene validation (minimum 6 nucleotides required)."
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
+        f_gene_prob = prediction[-1]
+        percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"{percentage}% F gene"
     except Exception as e:
         logging.error(f"Keras prediction failed: {e}")
         return f"Keras prediction failed: {str(e)}"
 def read_fasta_file(file_obj):
     try:
         if file_obj is None:
             return ""
         if hasattr(file_obj, 'name'):
             with open(file_obj.name, "r") as f:
                 content = f.read()
         logging.error(f"Failed to read FASTA file: {e}")
         return ""
+# --- Pydantic Models for FastAPI ---
+class AnalysisRequest(BaseModel):
+    sequence: str
+    similarity_score: float = 95.0
+    build_ml_tree: bool = False
+class AnalysisResponse(BaseModel):
+    boundary_output: str
+    keras_output: str
+    ml_tree_output: str
+    tree_analysis_output: str
+    summary_output: str
+    success: bool
+    error_message: Optional[str] = None
 def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
     try:
         dna_input = read_fasta_file(fasta_file_obj)
 def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
         dna_input = dna_input.upper().strip()
         if not dna_input:
             return "Empty input", "", "", "", "", None, None, None, None, "No input provided"
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
             logging.info("DNA sequence sanitized")
         processed_sequence = dna_input
         boundary_output = ""
             boundary_output = f"Boundary model not available. Using original input: {len(dna_input)} bp"
             processed_sequence = dna_input
         keras_output = ""
         if processed_sequence and len(processed_sequence) >= 6:
             keras_prediction = predict_with_keras(processed_sequence)
         else:
             keras_output = "Skipped: sequence too short for F gene validation"
         aligned_file = None
         phy_file = None
         ml_tree_output = ""
         else:
             ml_tree_output = "Phylogenetic placement skipped (not requested)"
         tree_html_file = None
         report_html_file = None
         tree_html_content = "No tree generated"
             try:
                 logging.info(f"Starting simplified ML tree analysis with F gene sequence length: {len(processed_sequence)}")
                 tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
                 if tree_html_path and os.path.exists(tree_html_path):
                     output_dir = "output"
                     os.makedirs(output_dir, exist_ok=True)
                     safe_seq_name = re.sub(r'[^a-zA-Z0-9_-]', '', processed_sequence[:20])
                     shutil.copy2(tree_html_path, tree_html_final_path)
                     tree_html_file = tree_html_final_path
                     with open(tree_html_path, 'r', encoding='utf-8') as f:
                         tree_html_content = f.read()
                     try:
                         os.unlink(tree_html_path)
                     except:
                         pass
                 if report_html_path and os.path.exists(report_html_path):
                     report_html_filename = f"report_{safe_seq_name}_{timestamp}.html"
                     report_html_final_path = os.path.join(output_dir, report_html_filename)
                     shutil.copy2(report_html_path, report_html_final_path)
                     report_html_file = report_html_final_path
                     with open(report_html_path, 'r', encoding='utf-8') as f:
                         report_html_content = f.read()
                     try:
                         os.unlink(report_html_path)
                     except:
             tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
             report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
         summary_output = f"""
 🧬 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
         logging.error(f"Full traceback: {traceback.format_exc()}")
         return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
+# --- FastAPI Endpoints ---
+@app.get("/")
+async def root():
+    return {"message": "🧬 Advanced Gene Analysis Pipeline API", "docs": "/docs"}
+@app.post("/analyze", response_model=AnalysisResponse)
+async def analyze_sequence(request: AnalysisRequest):
+    """
+    Analyze a DNA sequence through the complete pipeline
+    """
+    try:
+        result = run_pipeline(
+            request.sequence,
+            request.similarity_score,
+            request.build_ml_tree
+        )
+        return AnalysisResponse(
+            boundary_output=result[0],
+            keras_output=result[1],
+            ml_tree_output=result[2],
+            tree_analysis_output=result[3],
+            summary_output=result[4],
+            success=True
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/analyze-file")
+async def analyze_file(
+    file: UploadFile = File(...),
+    similarity_score: float = Form(95.0),
+    build_ml_tree: bool = Form(False)
+):
+    """
+    Analyze a FASTA file through the complete pipeline
+    """
+    try:
+        # Save uploaded file temporarily
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta") as temp_file:
+            content = await file.read()
+            temp_file.write(content)
+            temp_file_path = temp_file.name
+        result = run_pipeline_from_file(
+            temp_file_path,
+            similarity_score,
+            build_ml_tree
+        )
+        # Clean up
+        os.unlink(temp_file_path)
+        return AnalysisResponse(
+            boundary_output=result[0],
+            keras_output=result[1],
+            ml_tree_output=result[2],
+            tree_analysis_output=result[3],
+            summary_output=result[4],
+            success=True
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "boundary_model": boundary_model is not None,
+        "keras_model": keras_model is not None,
+        "tree_analyzer": analyzer is not None
+    }
+# --- Create Gradio Interface ---
+def create_gradio_interface():
     """Create and configure the Gradio interface"""
     custom_css = """
         )
     return iface
+# --- Mount Gradio App in FastAPI ---
+gradio_app = create_gradio_interface()
+app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
 # --- Main Execution ---
 if __name__ == "__main__":
     try:
         print("🧬 Advanced Gene Analysis Pipeline")
         print("=" * 50)
         print(f"Base Directory: {BASE_DIR}")
         print(f"Keras Model: {'✅ Loaded' if keras_model else '❌ Not Available'}")
         print(f"Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Not Available'}")
         mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
         print(f"MAFFT: {'✅ Available' if mafft_available else '❌ Not Found'}")
         print(f"IQ-TREE: {'✅ Available' if iqtree_available else '❌ Not Found'}")
         if not mafft_available or not iqtree_available:
+            print("\n⚠️ Warning: Some phylogenetic tools are missing!")
             print("Install with: conda install -c bioconda mafft iqtree")
+        print("\n🚀 Starting FastAPI + Gradio server...")
+        print("📖 API Documentation: http://localhost:8000/docs")
+        print("🎨 Gradio Interface: http://localhost:8000/gradio")
+        uvicorn.run(
+            app,
+            host="0.0.0.0",
+            port=8000,
+            reload=False  # Set to True for development
         )
     except Exception as e:
         import traceback
         print(f"Error: {e}")
         print(f"Traceback: {traceback.format_exc()}")
+        sys.exit(1)