Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 10, 2025

Commit

8c08884

verified ·

1 Parent(s): 9ab9398

Update app.py

Browse files

Files changed (1) hide show

app.py +826 -275

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ import time
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft")
 IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
 ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
 TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
@@ -35,17 +35,20 @@ os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
 # --- Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# --- Load Models (same as your original code) ---
 model_repo = "GGproject10/best_boundary_aware_model"
 csv_path = "f cleaned.csv"
 hf_token = os.getenv("HF_TOKEN")
 boundary_model = None
 keras_model = None
 kmer_to_index = None
-analyzer = None
-# [Include all your model loading code here - same as original]
 try:
     boundary_path = hf_hub_download(
         repo_id=model_repo,
@@ -55,9 +58,12 @@ try:
     if os.path.exists(boundary_path):
         boundary_model = GenePredictor(boundary_path)
         logging.info("Boundary model loaded successfully from Hugging Face Hub.")
 except Exception as e:
     logging.error(f"Failed to load boundary model from HF Hub: {e}")
 try:
     keras_path = hf_hub_download(
         repo_id=model_repo,
@@ -74,11 +80,56 @@ try:
         keras_model = load_model(keras_path)
         with open(kmer_path, "rb") as f:
             kmer_to_index = pickle.load(f)
-        logging.info("Keras model and k-mer index loaded successfully.")
 except Exception as e:
     logging.error(f"Failed to load Keras model from HF Hub: {e}")
-# [Include all your helper functions - same as original]
 def setup_binary_permissions():
     """Set executable permissions on MAFFT and IQ-TREE binaries"""
     binaries = [MAFFT_PATH, IQTREE_PATH]
@@ -86,92 +137,428 @@ def setup_binary_permissions():
     for binary in binaries:
         if os.path.exists(binary):
             try:
                 current_mode = os.stat(binary).st_mode
                 os.chmod(binary, current_mode | stat.S_IEXEC)
                 logging.info(f"Set executable permission on {binary}")
             except Exception as e:
                 logging.warning(f"Failed to set executable permission on {binary}: {e}")
 def check_tool_availability():
-    """Enhanced check for MAFFT and IQ-TREE availability"""
     setup_binary_permissions()
     # Check MAFFT
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = [
-        MAFFT_PATH,
         'mafft',
         '/usr/bin/mafft',
         '/usr/local/bin/mafft',
     ]
     for candidate in mafft_candidates:
         if not candidate:
             continue
         if os.path.exists(candidate) or shutil.which(candidate):
             try:
-                result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=10)
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     mafft_available = True
                     mafft_cmd = candidate
                     break
-            except:
                 continue
-    # Check IQ-TREE
     iqtree_available = False
     iqtree_cmd = None
     iqtree_candidates = [
-        IQTREE_PATH,
         'iqtree2',
         'iqtree',
         '/usr/bin/iqtree2',
         '/usr/local/bin/iqtree2',
     ]
     for candidate in iqtree_candidates:
         if not candidate:
             continue
         if os.path.exists(candidate) or shutil.which(candidate):
             try:
-                result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=10)
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     iqtree_available = True
                     iqtree_cmd = candidate
                     break
-            except:
                 continue
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
-            return f"Keras model not available."
         if len(sequence) < 6:
-            return "Sequence too short for F gene validation."
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
-        f_gene_prob = prediction[-1]
-        percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"{percentage}% F gene"
     except Exception as e:
         return f"Keras prediction failed: {str(e)}"
 def read_fasta_file(file_obj):
     try:
         if file_obj is None:
             return ""
         if hasattr(file_obj, 'name'):
             with open(file_obj.name, "r") as f:
                 content = f.read()
@@ -185,58 +572,7 @@ def read_fasta_file(file_obj):
         logging.error(f"Failed to read FASTA file: {e}")
         return ""
-# API-friendly wrapper functions
-def api_analyze_sequence(sequence: str, similarity_threshold: float = 95.0, enable_phylogeny: bool = False):
-    """
-    API endpoint for analyzing a DNA sequence
-    Returns structured data suitable for API consumption
-    """
-    try:
-        results = run_pipeline(sequence, similarity_threshold, enable_phylogeny)
-        return {
-            "status": "success",
-            "input_length": len(sequence),
-            "f_gene_sequence": results[0] if results[0] else "",
-            "f_gene_validation": results[1] if results[1] else "",
-            "phylogenetic_placement": results[2] if results[2] else "",
-            "tree_analysis": results[3] if results[3] else "",
-            "summary": results[4] if results[4] else "",
-            "has_alignment_file": results[5] is not None,
-            "has_tree_file": results[6] is not None,
-            "has_html_tree": results[7] is not None
-        }
-    except Exception as e:
-        return {
-            "status": "error",
-            "error_message": str(e),
-            "input_length": len(sequence) if sequence else 0
-        }
-def api_analyze_fasta(file_content: str, similarity_threshold: float = 95.0, enable_phylogeny: bool = False):
-    """
-    API endpoint for analyzing a FASTA file content
-    """
-    try:
-        # Parse FASTA content
-        lines = file_content.strip().split("\n")
-        seq_lines = [line.strip() for line in lines if not line.startswith(">")]
-        sequence = ''.join(seq_lines)
-        if not sequence:
-            return {
-                "status": "error",
-                "error_message": "No valid sequence found in FASTA content"
-            }
-        return api_analyze_sequence(sequence, similarity_threshold, enable_phylogeny)
-    except Exception as e:
-        return {
-            "status": "error",
-            "error_message": f"FASTA parsing error: {str(e)}"
-        }
-# Main pipeline function (simplified version of your original)
 def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
         # Clean input
@@ -247,9 +583,10 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
         # Sanitize DNA sequence
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
-        # Step 1: Boundary Prediction
-        processed_sequence = dna_input
         boundary_output = ""
         if boundary_model:
@@ -257,257 +594,471 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
                 predictions, probs, confidence = boundary_model.predict(dna_input)
                 regions = boundary_model.extract_gene_regions(predictions, dna_input)
                 if regions:
-                    processed_sequence = regions[0]["sequence"]
-                    boundary_output = processed_sequence
                 else:
-                    boundary_output = "No F gene regions found"
             except Exception as e:
                 boundary_output = f"Boundary model error: {str(e)}"
         else:
-            boundary_output = f"Boundary model not available. Using input: {len(dna_input)} bp"
-        # Step 2: Keras Prediction
         keras_output = ""
         if processed_sequence and len(processed_sequence) >= 6:
-            keras_output = predict_with_keras(processed_sequence)
         else:
-            keras_output = "Sequence too short for validation"
-        # Step 3: ML Tree (simplified)
-        ml_tree_output = "Phylogenetic analysis skipped"
-        if build_ml_tree:
-            mafft_available, iqtree_available, _, _ = check_tool_availability()
-            if mafft_available and iqtree_available:
-                ml_tree_output = "Phylogenetic tools available - analysis would run here"
             else:
-                ml_tree_output = "Phylogenetic tools not available"
-        # Step 4: Tree Analysis (simplified)
-        tree_analysis_output = "Tree analysis not implemented in this version"
-        # Summary
-        summary_output = f"""
-ANALYSIS SUMMARY:
-Input: {len(dna_input)} bp
-F Gene: {len(processed_sequence)} bp
-Validation: {keras_output}
-Phylogeny: {ml_tree_output}
 """
         return (
             boundary_output,
-            keras_output,
             ml_tree_output,
-            tree_analysis_output,
-            summary_output,
-            None,  # alignment_file
-            None,  # tree_file
-            None,  # html_file
-            "No tree visualization available"
         )
     except Exception as e:
-        error_msg = f"Pipeline error: {str(e)}"
-        return error_msg, "", "", "", "", None, None, None, error_msg
-def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
     try:
-        dna_input = read_fasta_file(fasta_file_obj)
-        if not dna_input:
-            return "Failed to read FASTA file", "", "", "", "", None, None, None, "No sequence"
-        return run_pipeline(dna_input, similarity_score, build_ml_tree)
     except Exception as e:
-        error_msg = f"File pipeline error: {str(e)}"
-        return error_msg, "", "", "", "", None, None, None, error_msg
 def create_interface():
-    """Create Gradio interface with proper API configuration"""
-    with gr.Blocks(title="🧬 Gene Analysis Pipeline API") as iface:
-        gr.HTML("""
-        <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;">
-            <h1 style="margin: 0; font-size: 2.5em;">🧬 Gene Analysis Pipeline</h1>
-            <p style="margin: 10px 0 0 0; font-size: 1.2em; opacity: 0.9;">Advanced DNA Sequence Analysis with API Access</p>
-        </div>
         """)
-        # API Information
-        with gr.Accordion("🔗 API Information", open=True):
-            gr.HTML("""
-            <div style="background: #f8fafc; padding: 15px; border-radius: 8px; border-left: 4px solid #3b82f6;">
-                <h3 style="color: #1e40af; margin-top: 0;">API Endpoints Available:</h3>
-                <ul style="line-height: 1.8;">
-                    <li><strong>POST /api/analyze_text</strong> - Analyze DNA sequence from text input</li>
-                    <li><strong>POST /api/analyze_file</strong> - Analyze DNA sequence from FASTA file</li>
-                    <li><strong>POST /api/api_analyze_sequence</strong> - Structured API response for sequence analysis</li>
-                    <li><strong>POST /api/api_analyze_fasta</strong> - Structured API response for FASTA content</li>
-                </ul>
-                <p style="margin: 15px 0 0 0; padding: 10px; background: #dbeafe; border-radius: 5px;">
-                    <strong>📝 Note:</strong> Access API documentation at <code>/docs</code> when the server is running
-                </p>
-            </div>
-            """)
-        # Input Section
         with gr.Row():
             with gr.Column(scale=2):
-                with gr.Tabs():
-                    with gr.TabItem("✍️ Text Input"):
-                        dna_input = gr.Textbox(
-                            label="DNA Sequence",
-                            placeholder="Enter DNA sequence (A, T, C, G, N)...",
-                            lines=6,
-                            info="Input your DNA sequence for analysis"
-                        )
-                    with gr.TabItem("📁 File Upload"):
-                        fasta_file = gr.File(
-                            label="Upload FASTA File",
-                            file_types=[".fasta", ".fa", ".fas", ".txt"]
-                        )
-            with gr.Column(scale=1):
-                similarity_score = gr.Slider(
-                    minimum=70.0,
-                    maximum=99.0,
-                    value=95.0,
-                    step=1.0,
-                    label="Similarity Threshold (%)"
                 )
-                build_ml_tree = gr.Checkbox(
-                    label="🌳 Enable Phylogenetic Analysis",
-                    value=False
                 )
-                with gr.Row():
-                    analyze_text_btn = gr.Button("🚀 Analyze Text", variant="primary")
-                    analyze_file_btn = gr.Button("📁 Analyze File", variant="secondary")
-        # Results Section
-        with gr.Tabs():
-            with gr.TabItem("🎯 F Gene"):
-                f_gene_output = gr.Textbox(label="F Gene Sequence", lines=5)
-            with gr.TabItem("✅ Validation"):
-                keras_output = gr.Textbox(label="Gene Validation", lines=3)
-            with gr.TabItem("🌳 Phylogeny"):
-                ml_tree_output = gr.Textbox(label="Phylogenetic Analysis", lines=5)
-            with gr.TabItem("📊 Summary"):
-                summary_output = gr.Textbox(label="Analysis Summary", lines=8)
-        # API Test Section
-        with gr.Accordion("🧪 API Testing", open=False):
-            gr.HTML("""
-            <div style="background: #fef7e7; padding: 15px; border-radius: 8px; border-left: 4px solid #f59e0b;">
-                <h4 style="color: #92400e; margin-top: 0;">Test API Endpoints:</h4>
-                <p>Use these functions to test structured API responses:</p>
-            </div>
-            """)
-            with gr.Row():
-                api_sequence_input = gr.Textbox(
-                    label="Test Sequence for API",
-                    placeholder="ATCGATCG...",
-                    lines=2
                 )
-                api_test_btn = gr.Button("Test API Response", variant="primary")
-            api_response = gr.JSON(label="API Response Structure")
-        # Event Handlers
-        analyze_text_btn.click(
-            fn=run_pipeline,
-            inputs=[dna_input, similarity_score, build_ml_tree],
-            outputs=[f_gene_output, keras_output, ml_tree_output, gr.Textbox(), summary_output,
-                    gr.File(), gr.File(), gr.File(), gr.HTML()],
-            api_name="analyze_text"
-        )
-        analyze_file_btn.click(
-            fn=run_pipeline_from_file,
-            inputs=[fasta_file, similarity_score, build_ml_tree],
-            outputs=[f_gene_output, keras_output, ml_tree_output, gr.Textbox(), summary_output,
-                    gr.File(), gr.File(), gr.File(), gr.HTML()],
-            api_name="analyze_file"
         )
-        # API Test Handler
-        api_test_btn.click(
-            fn=api_analyze_sequence,
-            inputs=[api_sequence_input, similarity_score, build_ml_tree],
-            outputs=[api_response],
-            api_name="api_analyze_sequence"
         )
-        # Additional API endpoint for FASTA content
-        gr.Interface(
-            fn=api_analyze_fasta,
-            inputs=[
-                gr.Textbox(label="FASTA Content", lines=5),
-                gr.Slider(70, 99, 95, label="Similarity %"),
-                gr.Checkbox(label="Enable Phylogeny")
-            ],
-            outputs=gr.JSON(label="API Response"),
-            title="FASTA API Endpoint",
-            api_name="api_analyze_fasta",
-            visible=False  # Hidden interface just for API
         )
         # Footer
-        gr.HTML("""
-        <div style="text-align: center; padding: 20px; margin-top: 20px; border-top: 2px solid #e5e7eb;">
-            <p style="color: #6b7280; margin: 0;">🧬 Gene Analysis Pipeline with API Access</p>
-            <p style="color: #9ca3af; font-size: 0.9em; margin: 5px 0 0 0;">
-                Access API at <code>/api/endpoint_name</code> • Documentation at <code>/docs</code>
-            </p>
-        </div>
         """)
-    return iface
-# Replace the launch section at the end of your app.py file with this:
-# Main execution
 if __name__ == "__main__":
     try:
-        print("🧬 Starting Gene Analysis Pipeline with API Access")
-        print("=" * 60)
-        print(f"Boundary Model: {'✅' if boundary_model else '❌'}")
-        print(f"Keras Model: {'✅' if keras_model else '❌'}")
-        # Check tools
-        mafft_available, iqtree_available, _, _ = check_tool_availability()
-        print(f"MAFFT: {'✅' if mafft_available else '❌'}")
-        print(f"IQ-TREE: {'✅' if iqtree_available else '❌'}")
-        print("\n🚀 Launching with API enabled...")
-        print("Access URLs:")
-        print("  - Local: http://localhost:7861")
-        print("  - Network: http://0.0.0.0:7861")
-        print("  - API Docs: http://localhost:7861/docs")
-        # Create and launch interface
-        iface = create_interface()
-        # Launch with broader accessibility
-        iface.launch(
-            share=False,           # Set to True if you want public sharing
-            server_name="0.0.0.0", # Allow external connections
-            server_port=8080,      # Your current port
-            show_error=True,
-            show_api=True,
-            quiet=False,
-            inbrowser=True,        # Try to open browser automatically
-            prevent_thread_lock=False
         )
     except Exception as e:
-        logging.error(f"Failed to start application: {e}")
-        import traceback
-        print(f"Error: {e}")
-        print(f"Traceback: {traceback.format_exc()}")
-        sys.exit(1)

 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft")  # Updated path
 IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
 ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
 TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
 # --- Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# --- Paths ---
+# Model repository and file paths
 model_repo = "GGproject10/best_boundary_aware_model"
 csv_path = "f cleaned.csv"
+# Get HF token from environment (if available)
 hf_token = os.getenv("HF_TOKEN")
+# --- Load Models ---
 boundary_model = None
 keras_model = None
 kmer_to_index = None
+# Try to load boundary model from Hugging Face Hub
 try:
     boundary_path = hf_hub_download(
         repo_id=model_repo,
     if os.path.exists(boundary_path):
         boundary_model = GenePredictor(boundary_path)
         logging.info("Boundary model loaded successfully from Hugging Face Hub.")
+    else:
+        logging.warning(f"Boundary model file not found after download")
 except Exception as e:
     logging.error(f"Failed to load boundary model from HF Hub: {e}")
+# Try to load Keras model from Hugging Face Hub
 try:
     keras_path = hf_hub_download(
         repo_id=model_repo,
         keras_model = load_model(keras_path)
         with open(kmer_path, "rb") as f:
             kmer_to_index = pickle.load(f)
+        logging.info("Keras model and k-mer index loaded successfully from Hugging Face Hub.")
+    else:
+        logging.warning(f"Keras model or kmer files not found after download")
 except Exception as e:
     logging.error(f"Failed to load Keras model from HF Hub: {e}")
+# --- Initialize New Tree Analyzer ---
+analyzer = None
+try:
+    analyzer = PhylogeneticTreeAnalyzer()
+    # Try multiple potential locations for the CSV file
+    csv_candidates = [
+        csv_path,
+        os.path.join(BASE_DIR, csv_path),
+        os.path.join(BASE_DIR, "app", csv_path),
+        os.path.join(os.path.dirname(__file__), csv_path),
+        "f_cleaned.csv",  # Alternative naming
+        os.path.join(BASE_DIR, "f_cleaned.csv")
+    ]
+    csv_loaded = False
+    for csv_candidate in csv_candidates:
+        if os.path.exists(csv_candidate):
+            if analyzer.load_data(csv_candidate):
+                logging.info(f"Tree analyzer data loaded from: {csv_candidate}")
+                csv_loaded = True
+                csv_path = csv_candidate  # Update path for consistency
+                break
+            else:
+                logging.warning(f"Failed to load data from: {csv_candidate}")
+    if not csv_loaded:
+        logging.error("Failed to load CSV data from any candidate location")
+        analyzer = None
+    else:
+        # Try to train AI model (optional)
+        try:
+            if analyzer.train_ai_model():
+                logging.info("AI model training completed successfully")
+            else:
+                logging.warning("AI model training failed; proceeding with basic analysis.")
+        except Exception as e:
+            logging.warning(f"AI model training failed: {e}")
+except Exception as e:
+    logging.error(f"Failed to initialize tree analyzer: {e}")
+    analyzer = None
+# --- Enhanced Tool Detection with Binary Permission Setup ---
 def setup_binary_permissions():
     """Set executable permissions on MAFFT and IQ-TREE binaries"""
     binaries = [MAFFT_PATH, IQTREE_PATH]
     for binary in binaries:
         if os.path.exists(binary):
             try:
+                # Set executable permission
                 current_mode = os.stat(binary).st_mode
                 os.chmod(binary, current_mode | stat.S_IEXEC)
                 logging.info(f"Set executable permission on {binary}")
             except Exception as e:
                 logging.warning(f"Failed to set executable permission on {binary}: {e}")
+        else:
+            logging.warning(f"Binary not found: {binary}")
 def check_tool_availability():
+    """Enhanced check for MAFFT and IQ-TREE availability with improved path validation"""
+    # First, ensure binaries have executable permissions
     setup_binary_permissions()
     # Check MAFFT
     mafft_available = False
     mafft_cmd = None
+    # Updated MAFFT candidates list based on your new API
     mafft_candidates = [
+        MAFFT_PATH,  # Primary path from your new API
+        os.path.join(BASE_DIR, "binaries", "mafft", "mafft"),
+        os.path.join(BASE_DIR, "binaries", "mafft", "mafft.bat"),  # Windows fallback
         'mafft',
         '/usr/bin/mafft',
         '/usr/local/bin/mafft',
+        os.path.join(BASE_DIR, "binaries", "mafft", "mafftdir", "bin", "mafft"),
+        # Add potential conda/miniconda paths
+        os.path.expanduser("~/anaconda3/bin/mafft"),
+        os.path.expanduser("~/miniconda3/bin/mafft"),
+        "/opt/conda/bin/mafft",
+        "/usr/local/miniconda3/bin/mafft"
     ]
     for candidate in mafft_candidates:
         if not candidate:
             continue
+        # First check if file exists or is in PATH
         if os.path.exists(candidate) or shutil.which(candidate):
+            # Now test actual execution
             try:
+                test_cmd = [candidate, "--help"]
+                result = subprocess.run(
+                    test_cmd,
+                    capture_output=True,
+                    text=True,
+                    timeout=10
+                )
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     mafft_available = True
                     mafft_cmd = candidate
+                    logging.info(f"MAFFT found and tested successfully at: {candidate}")
                     break
+            except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError) as e:
+                logging.debug(f"MAFFT test failed for {candidate}: {e}")
                 continue
+    # Check IQ-TREE with similar approach
     iqtree_available = False
     iqtree_cmd = None
+    # Updated IQ-TREE candidates list
     iqtree_candidates = [
+        IQTREE_PATH,  # Primary path from your new API
         'iqtree2',
         'iqtree',
+        'iqtree3',
         '/usr/bin/iqtree2',
         '/usr/local/bin/iqtree2',
+        '/usr/bin/iqtree',
+        '/usr/local/bin/iqtree',
+        'iqtree2.exe',  # Windows
+        'iqtree.exe',   # Windows
+        'iqtree3.exe',  # Windows
+        os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree2"),
+        # Add potential conda paths
+        os.path.expanduser("~/anaconda3/bin/iqtree2"),
+        os.path.expanduser("~/miniconda3/bin/iqtree2"),
+        "/opt/conda/bin/iqtree2",
+        "/usr/local/miniconda3/bin/iqtree2"
     ]
     for candidate in iqtree_candidates:
         if not candidate:
             continue
         if os.path.exists(candidate) or shutil.which(candidate):
             try:
+                test_cmd = [candidate, "--help"]
+                result = subprocess.run(
+                    test_cmd,
+                    capture_output=True,
+                    text=True,
+                    timeout=10
+                )
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     iqtree_available = True
                     iqtree_cmd = candidate
+                    logging.info(f"IQ-TREE found and tested successfully at: {candidate}")
                     break
+            except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError) as e:
+                logging.debug(f"IQ-TREE test failed for {candidate}: {e}")
                 continue
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
+def install_dependencies_guide():
+    """Provide installation guidance for missing dependencies"""
+    guide = """
+🔧 INSTALLATION GUIDE FOR MISSING DEPENDENCIES:
+For MAFFT:
+- Ubuntu/Debian: sudo apt-get install mafft
+- CentOS/RHEL: sudo yum install mafft
+- macOS: brew install mafft
+- Windows: Download from https://mafft.cbrc.jp/alignment/software/
+- Conda: conda install -c bioconda mafft
+For IQ-TREE:
+- Ubuntu/Debian: sudo apt-get install iqtree
+- CentOS/RHEL: sudo yum install iqtree
+- macOS: brew install iqtree
+- Windows: Download from http://www.iqtree.org/
+- Conda: conda install -c bioconda iqtree
+Alternative: Use conda/mamba (RECOMMENDED):
+- conda install -c bioconda mafft iqtree
+Docker option:
+- docker run -it --rm -v $(pwd):/data quay.io/biocontainers/mafft:7.490--h779adbc_0
+- docker run -it --rm -v $(pwd):/data quay.io/biocontainers/iqtree:2.1.4_beta--hdcc8f71_0
+TROUBLESHOOTING:
+If tools are installed but not detected, try:
+1. Add installation directory to PATH
+2. Use absolute paths in the configuration
+3. Check permissions on executable files
+4. Ensure binaries have executable permissions (chmod +x)
+"""
+    return guide
+def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
+    """
+    Improved phylogenetic placement using the new API approach.
+    This adds the query sequence to a reference alignment and tree.
+    """
+    try:
+        # Validate sequence
+        if len(sequence.strip()) < 100:
+            return False, "Error: Sequence is too short for phylogenetic placement (minimum 100 bp).", None, None
+        # Generate unique query ID
+        query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
+        query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
+        aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
+        output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
+        # Check if reference files exist
+        if not os.path.exists(ALIGNMENT_PATH):
+            return False, f"Reference alignment not found: {ALIGNMENT_PATH}", None, None
+        if not os.path.exists(TREE_PATH):
+            return False, f"Reference tree not found: {TREE_PATH}", None, None
+        # Save query sequence as FASTA (improved error handling)
+        try:
+            query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
+            SeqIO.write([query_record], query_fasta, "fasta")
+            logging.info(f"Query sequence saved: {query_fasta}")
+        except Exception as e:
+            return False, f"Error writing query sequence: {e}", None, None
+        # Step 1: Add query sequence to reference alignment using MAFFT (improved approach)
+        logging.info("Adding query sequence to reference alignment...")
+        try:
+            with open(aligned_with_query, "w") as output_file:
+                mafft_result = subprocess.run([
+                    mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
+                ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
+            # Verify alignment file was created and is not empty
+            if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
+                return False, "MAFFT alignment failed: output file is empty", None, None
+            logging.info(f"MAFFT alignment completed: {aligned_with_query}")
+        except subprocess.CalledProcessError as e:
+            error_msg = e.stderr if e.stderr else "Unknown MAFFT error"
+            return False, f"MAFFT alignment failed: {error_msg}", None, None
+        except subprocess.TimeoutExpired:
+            return False, "MAFFT alignment timeout (>10 minutes)", None, None
+        except FileNotFoundError:
+            return False, f"MAFFT executable not found: {mafft_cmd}", None, None
+        except Exception as e:
+            return False, f"MAFFT execution error: {e}", None, None
+        # Step 2: Place sequence in phylogenetic tree using IQ-TREE (improved approach)
+        logging.info("Placing sequence in phylogenetic tree...")
+        try:
+            iqtree_result = subprocess.run([
+                iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
+                "-m", "GTR+G", "-pre", output_prefix, "-redo"
+            ], capture_output=True, text=True, timeout=1200, check=True)
+            # Check if treefile was generated
+            treefile = f"{output_prefix}.treefile"
+            if not os.path.exists(treefile) or os.path.getsize(treefile) == 0:
+                return False, "IQ-TREE placement failed: treefile not generated", aligned_with_query, None
+            logging.info(f"IQ-TREE placement completed: {treefile}")
+            # Generate success message with details
+            success_msg = "✅ Phylogenetic placement completed successfully!\n"
+            success_msg += f"- Query ID: {query_id}\n"
+            success_msg += f"- Alignment: {os.path.basename(aligned_with_query)}\n"
+            success_msg += f"- Tree: {os.path.basename(treefile)}\n"
+            # Try to extract model information from log
+            log_file = f"{output_prefix}.log"
+            if os.path.exists(log_file):
+                try:
+                    with open(log_file, 'r') as f:
+                        log_content = f.read()
+                        if "Log-likelihood" in log_content:
+                            log_lines = [line for line in log_content.split('\n') if "Log-likelihood" in line]
+                            if log_lines:
+                                success_msg += f"- {log_lines[0].strip()}\n"
+                except Exception as e:
+                    logging.warning(f"Could not read log file: {e}")
+            return True, success_msg, aligned_with_query, treefile
+        except subprocess.CalledProcessError as e:
+            error_msg = e.stderr if e.stderr else "Unknown IQ-TREE error"
+            return False, f"IQ-TREE placement failed: {error_msg}", aligned_with_query, None
+        except subprocess.TimeoutExpired:
+            return False, "IQ-TREE placement timeout (>20 minutes)", aligned_with_query, None
+        except FileNotFoundError:
+            return False, f"IQ-TREE executable not found: {iqtree_cmd}", aligned_with_query, None
+        except Exception as e:
+            return False, f"IQ-TREE execution error: {e}", aligned_with_query, None
+    except Exception as e:
+        logging.error(f"Phylogenetic placement failed: {e}")
+        return False, f"Phylogenetic placement failed: {str(e)}", None, None
+    finally:
+        # Clean up temporary query file
+        if 'query_fasta' in locals() and os.path.exists(query_fasta):
+            try:
+                os.unlink(query_fasta)
+            except:
+                pass
+def build_maximum_likelihood_tree(f_gene_sequence):
+    """
+    Build maximum likelihood phylogenetic tree using the improved phylogenetic placement approach.
+    """
+    try:
+        # Check tool availability with enhanced detection
+        mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
+        # Prepare status message
+        status_msg = "🔍 Checking dependencies...\n"
+        if not mafft_available:
+            status_msg += "❌ MAFFT not found or not executable\n"
+        else:
+            status_msg += f"✅ MAFFT found and tested: {mafft_cmd}\n"
+        if not iqtree_available:
+            status_msg += "❌ IQ-TREE not found or not executable\n"
+        else:
+            status_msg += f"✅ IQ-TREE found and tested: {iqtree_cmd}\n"
+        # Check for reference files
+        if not os.path.exists(ALIGNMENT_PATH):
+            status_msg += f"❌ Reference alignment not found: {ALIGNMENT_PATH}\n"
+        else:
+            status_msg += f"✅ Reference alignment found\n"
+        if not os.path.exists(TREE_PATH):
+            status_msg += f"❌ Reference tree not found: {TREE_PATH}\n"
+        else:
+            status_msg += f"✅ Reference tree found\n"
+        # If any required component is missing, provide installation guide
+        if not mafft_available or not iqtree_available:
+            guide = install_dependencies_guide()
+            return False, f"{status_msg}\n{guide}", None, None
+        if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
+            status_msg += "\n❌ Reference alignment and/or tree files are missing.\n"
+            status_msg += "Please ensure f_gene_sequences_aligned.fasta and f_gene_sequences.phy.treefile are available."
+            return False, status_msg, None, None
+        # Perform phylogenetic placement using improved method
+        logging.info("Starting phylogenetic placement...")
+        placement_success, placement_message, aligned_file, tree_file = phylogenetic_placement(
+            f_gene_sequence, mafft_cmd, iqtree_cmd
+        )
+        if placement_success:
+            final_message = f"{status_msg}\n{placement_message}"
+            # Copy files to standard locations for compatibility
+            if aligned_file and os.path.exists(aligned_file):
+                standard_aligned = "query_with_references_aligned.fasta"
+                shutil.copy2(aligned_file, standard_aligned)
+                aligned_file = standard_aligned
+            if tree_file and os.path.exists(tree_file):
+                standard_tree = "query_placement_tree.treefile"
+                shutil.copy2(tree_file, standard_tree)
+                tree_file = standard_tree
+            logging.info("Phylogenetic placement completed successfully")
+            return True, final_message, aligned_file, tree_file
+        else:
+            return False, f"{status_msg}\n{placement_message}", aligned_file, tree_file
+    except Exception as e:
+        logging.error(f"ML tree construction failed: {e}")
+        return False, f"ML tree construction failed: {str(e)}", None, None
+# --- NEW Tree Analysis Function (Using the new analyzer API) ---
+def analyze_sequence_for_tree(sequence: str, matching_percentage: float) -> tuple:
+    """
+    Analyze sequence and create phylogenetic tree using the new analyzer API
+    Args:
+        sequence (str): DNA sequence to analyze
+        matching_percentage (float): Similarity threshold percentage
+    Returns:
+        tuple: (status_message, html_file_path)
+    """
+    try:
+        if not analyzer:
+            return "❌ Error: Tree analyzer not initialized. Please check if the CSV data file is available.", None
+        if not sequence:
+            return "❌ Error: Please provide a sequence.", None
+        if not (1 <= matching_percentage <= 99):
+            return "❌ Error: Matching percentage must be between 1 and 99.", None
+        # Validate inputs
+        sequence = sequence.strip()
+        if len(sequence) < 10:
+            return "❌ Error: Invalid or missing sequence. Must be ≥10 nucleotides.", None
+        # Find query sequence
+        if not analyzer.find_query_sequence(sequence):
+            return "❌ Error: Sequence not accepted.", None
+        # Find similar sequences
+        matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
+        if not matched_ids:
+            return f"❌ Error: No similar sequences found at {matching_percentage}% similarity threshold.", None
+        logging.info(f"Found {len(matched_ids)} similar sequences at {actual_percentage:.2f}% similarity")
+        # Build tree structure
+        analyzer.build_tree_structure_with_ml_safe(matched_ids)
+        # Create interactive tree
+        fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
+        # Save to temporary file that Gradio can access
+        temp_dir = tempfile.gettempdir()
+        output_path = os.path.join(temp_dir, 'phylogenetic_tree_interactive.html')
+        fig.write_html(output_path)
+        success_msg = f"✅ Analysis complete! Found {len(matched_ids)} similar sequences with {actual_percentage:.2f}% average similarity."
+        return success_msg, output_path
+    except Exception as e:
+        error_msg = f"❌ Error during analysis: {str(e)}"
+        logging.error(error_msg)
+        import traceback
+        logging.error(f"Full traceback: {traceback.format_exc()}")
+        return error_msg, None
+# --- Keras Prediction ---
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
+            return f"Keras model not available. Input sequence: {sequence[:100]}..."
         if len(sequence) < 6:
+            return "Skipped: sequence too short for F gene validation (minimum 6 nucleotides required)."
+        # Generate k-mers
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
+        # Prepare input
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
+        # Assume the last value is the F gene probability (adjust index if model outputs differ)
+        f_gene_prob = prediction[-1]  # Take the probability of the F gene class
+        # Convert to percentage with a buffer (e.g., add 5% to account for minor mismatches)
+        percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))  # Ensure 0-100% range
         return f"{percentage}% F gene"
     except Exception as e:
+        logging.error(f"Keras prediction failed: {e}")
         return f"Keras prediction failed: {str(e)}"
+# --- FASTA Reader ---
 def read_fasta_file(file_obj):
     try:
         if file_obj is None:
             return ""
+        # Handle file object
         if hasattr(file_obj, 'name'):
             with open(file_obj.name, "r") as f:
                 content = f.read()
         logging.error(f"Failed to read FASTA file: {e}")
         return ""
+# --- Core Pipeline Function ---
 def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
         # Clean input
         # Sanitize DNA sequence
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
+            logging.info("DNA sequence sanitized")
+        # Step 1: Boundary Prediction - Extract F gene sequence
+        processed_sequence = dna_input  # This will be the sequence used for downstream analysis
         boundary_output = ""
         if boundary_model:
                 predictions, probs, confidence = boundary_model.predict(dna_input)
                 regions = boundary_model.extract_gene_regions(predictions, dna_input)
                 if regions:
+                    processed_sequence = regions[0]["sequence"]  # Use the extracted gene region
+                    boundary_output = processed_sequence  # Output the actual F gene sequence
+                    logging.info(f"F gene extracted: {len(processed_sequence)} bp (confidence: {confidence:.3f})")
                 else:
+                    boundary_output = f"No F gene regions found in input sequence"
+                    processed_sequence = dna_input
+                    logging.warning("No gene regions found, using full sequence")
+                logging.info("Boundary model prediction completed")
             except Exception as e:
+                logging.error(f"Boundary model failed: {e}")
                 boundary_output = f"Boundary model error: {str(e)}"
+                processed_sequence = dna_input  # Fall back to original sequence
         else:
+            boundary_output = f"Boundary model not available. Using original input: {len(dna_input)} bp"
+            processed_sequence = dna_input
+        # Step 2: Keras Prediction (F gene validation)
         keras_output = ""
         if processed_sequence and len(processed_sequence) >= 6:
+            keras_prediction = predict_with_keras(processed_sequence)
+            # Use the prediction directly as it's now a percentage
+            keras_output = keras_prediction
+        else:
+            keras_output = "Skipped: sequence too short for F gene validation"
+        # Step 3: Maximum Likelihood Tree (Phylogenetic Placement) - Using improved API
+        aligned_file = None
+        phy_file = None
+        ml_tree_output = ""
+        if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
+            try:
+                logging.info("Starting phylogenetic placement...")
+                ml_success, ml_message, ml_aligned, ml_tree = build_maximum_likelihood_tree(processed_sequence)
+                if ml_success:
+                    ml_tree_output = ml_message
+                    aligned_file = ml_aligned
+                    phy_file = ml_tree
+                else:
+                    ml_tree_output = ml_message  # This now includes detailed error information
+            except Exception as e:
+                ml_tree_output = f"❌ Phylogenetic placement failed: {str(e)}"
+                logging.error(f"Phylogenetic placement failed: {e}")
+        elif build_ml_tree:
+            ml_tree_output = "❌ F gene sequence too short for phylogenetic placement (minimum 100 bp)"
+        else:
+            ml_tree_output = "Phylogenetic placement skipped (not requested)"
+        # Step 4: NEW Simplified Tree Analysis (using the new analyzer API)
+        html_file = None
+        tree_html_content = "No tree generated"
+        simplified_ml_output = ""
+        if analyzer and processed_sequence and len(processed_sequence) >= 10:
+            try:
+                logging.info(f"Starting simplified ML tree analysis with F gene sequence length: {len(processed_sequence)}")
+                # Use the new analyze_sequence_for_tree function
+                tree_result, html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
+                if html_path and os.path.exists(html_path):
+                    # Success - copy the HTML file to a location Gradio can serve
+                    output_dir = "output"
+                    os.makedirs(output_dir, exist_ok=True)
+                    # Create a safe filename
+                    safe_seq_name = re.sub(r'[^a-zA-Z0-9_-]', '', processed_sequence[:20])
+                    timestamp = str(int(time.time()))
+                    html_filename = f"tree_{safe_seq_name}_{timestamp}.html"
+                    final_html_path = os.path.join(output_dir, html_filename)
+                    # Copy the HTML file
+                    shutil.copy2(html_path, final_html_path)
+                    html_file = final_html_path
+                    # Read HTML content for display
+                    with open(html_path, 'r', encoding='utf-8') as f:
+                    tree_html_content = f.read()
+                    simplified_ml_output = tree_result
+                    logging.info("Simplified ML tree analysis completed successfully")
+                else:
+                    simplified_ml_output = tree_result  # This contains the error message
+                    logging.warning(f"Simplified ML tree analysis failed: {tree_result}")
+            except Exception as e:
+                simplified_ml_output = f"❌ Simplified ML tree analysis failed: {str(e)}"
+                logging.error(f"Simplified ML tree analysis failed: {e}")
         else:
+            if not analyzer:
+                simplified_ml_output = "❌ Tree analyzer not available"
+            elif not processed_sequence:
+                simplified_ml_output = "❌ No sequence to analyze"
             else:
+                simplified_ml_output = "❌ Sequence too short for tree analysis (minimum 10 bp)"
+        # Prepare summary
+        summary = f"""
+=== ANALYSIS SUMMARY ===
+Input Length: {len(dna_input)} bp
+F Gene Length: {len(processed_sequence)} bp
+F Gene Validation: {keras_output}
+Phylogenetic Analysis: {'✅ Completed' if html_file else '❌ Not performed or failed'}
 """
         return (
             boundary_output,
+            keras_output,
             ml_tree_output,
+            simplified_ml_output,
+            summary,
+            aligned_file,
+            phy_file,
+            html_file,
+            tree_html_content
         )
     except Exception as e:
+        error_msg = f"Pipeline failed: {str(e)}"
+        logging.error(error_msg)
+        return error_msg, "", "", "", "", None, None, None, "Pipeline error occurred"
+# --- API-Compatible Wrapper Function ---
+def run_pipeline_api(dna_input, similarity_score=95.0, build_ml_tree=False):
+    """
+    API-compatible wrapper that returns only serializable data types
+    """
+    try:
+        # Run the main pipeline
+        results = run_pipeline(dna_input, similarity_score, build_ml_tree)
+        # Extract text results (first 5 are strings)
+        boundary_output = results[0] if results[0] else "No boundary analysis"
+        keras_output = results[1] if results[1] else "No F gene validation"
+        ml_tree_output = results[2] if results[2] else "No ML tree analysis"
+        simplified_ml_output = results[3] if results[3] else "No simplified analysis"
+        summary = results[4] if results[4] else "No summary"
+        # Handle file outputs - return file paths or status
+        aligned_file_status = "Available" if results[5] and os.path.exists(results[5]) else "Not generated"
+        phy_file_status = "Available" if results[6] and os.path.exists(results[6]) else "Not generated"
+        html_file_status = "Available" if results[7] and os.path.exists(results[7]) else "Not generated"
+        # HTML content (truncated for API)
+        html_content = results[8] if results[8] else "No HTML content"
+        if len(html_content) > 1000:  # Truncate for API response
+            html_content = html_content[:1000] + "... [truncated for API response]"
+        return {
+            "boundary_analysis": boundary_output,
+            "f_gene_validation": keras_output,
+            "ml_tree_analysis": ml_tree_output,
+            "simplified_tree_analysis": simplified_ml_output,
+            "summary": summary,
+            "aligned_file_status": aligned_file_status,
+            "phylogenetic_file_status": phy_file_status,
+            "html_tree_status": html_file_status,
+            "html_preview": html_content
+        }
+    except Exception as e:
+        return {
+            "error": f"API pipeline failed: {str(e)}",
+            "boundary_analysis": "",
+            "f_gene_validation": "",
+            "ml_tree_analysis": "",
+            "simplified_tree_analysis": "",
+            "summary": "",
+            "aligned_file_status": "Error",
+            "phylogenetic_file_status": "Error",
+            "html_tree_status": "Error",
+            "html_preview": ""
+        }
+# --- File Upload Handler ---
+def handle_file_upload_api(file_content):
+    """API-compatible file upload handler"""
     try:
+        if not file_content:
+            return "No file provided"
+        # Try to decode if it's bytes
+        if isinstance(file_content, bytes):
+            content = file_content.decode('utf-8')
+        else:
+            content = str(file_content)
+        # Extract sequence from FASTA format
+        lines = content.strip().split('\n')
+        sequence_lines = [line.strip() for line in lines if not line.startswith('>')]
+        sequence = ''.join(sequence_lines)
+        # Clean sequence
+        sequence = sequence.upper().strip()
+        sequence = ''.join(c if c in 'ACTGN' else 'N' for c in sequence)
+        return sequence
     except Exception as e:
+        return f"File processing error: {str(e)}"
+# --- Create Gradio Interface ---
 def create_interface():
+    """Create the Gradio interface with API support"""
+    # Custom CSS for better appearance
+    css = """
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto !important;
+    }
+    .output-html {
+        height: 600px !important;
+        overflow: auto !important;
+    }
+    """
+    with gr.Blocks(css=css, title="F Gene Analysis Pipeline") as app:
+        gr.Markdown("""
+        # 🧬 F Gene Analysis Pipeline
+        **Comprehensive F gene boundary detection, validation, and phylogenetic analysis**
+        This tool performs:
+        1. **Boundary Detection**: Extracts F gene sequences from input DNA
+        2. **F Gene Validation**: Validates extracted sequences using ML models
+        3. **Phylogenetic Analysis**: Places sequences in evolutionary context
+        4. **Interactive Trees**: Generates interactive phylogenetic visualizations
         """)
         with gr.Row():
             with gr.Column(scale=2):
+                # Input section
+                gr.Markdown("### 📥 Input")
+                sequence_input = gr.Textbox(
+                    label="DNA Sequence",
+                    placeholder="Enter DNA sequence (ACTG) or upload FASTA file...",
+                    lines=5,
+                    max_lines=10
                 )
+                file_input = gr.File(
+                    label="Upload FASTA File (optional)",
+                    file_types=[".fasta", ".fa", ".txt"],
+                    type="filepath"
                 )
+                # Parameters
+                gr.Markdown("### ⚙️ Parameters")
+                similarity_slider = gr.Slider(
+                    minimum=50,
+                    maximum=99,
+                    value=95,
+                    step=1,
+                    label="Similarity Threshold (%)",
+                    info="Minimum similarity for phylogenetic grouping"
                 )
+                ml_tree_checkbox = gr.Checkbox(
+                    label="Build Maximum Likelihood Tree",
+                    value=False,
+                    info="Requires MAFFT and IQ-TREE (slower but more accurate)"
+                )
+                # Action buttons
+                analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary", size="lg")
+                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+            with gr.Column(scale=3):
+                # Output section
+                gr.Markdown("### 📊 Results")
+                with gr.Tabs():
+                    with gr.TabItem("📈 Analysis Results"):
+                        boundary_output = gr.Textbox(
+                            label="1. Boundary Detection & F Gene Extraction",
+                            lines=3,
+                            interactive=False
+                        )
+                        keras_output = gr.Textbox(
+                            label="2. F Gene Validation",
+                            lines=2,
+                            interactive=False
+                        )
+                        ml_output = gr.Textbox(
+                            label="3. Maximum Likelihood Tree Analysis",
+                            lines=4,
+                            interactive=False
+                        )
+                        simplified_output = gr.Textbox(
+                            label="4. Simplified Phylogenetic Analysis",
+                            lines=3,
+                            interactive=False
+                        )
+                        summary_output = gr.Textbox(
+                            label="📋 Summary",
+                            lines=4,
+                            interactive=False
+                        )
+                    with gr.TabItem("🌳 Interactive Tree"):
+                        tree_html = gr.HTML(
+                            label="Phylogenetic Tree Visualization",
+                            elem_classes=["output-html"]
+                        )
+                    with gr.TabItem("📁 Downloads"):
+                        gr.Markdown("### Available Downloads")
+                        aligned_file = gr.File(
+                            label="Aligned Sequences (FASTA)",
+                            interactive=False
+                        )
+                        phy_file = gr.File(
+                            label="Phylogenetic Tree (Newick)",
+                            interactive=False
+                        )
+                        html_file = gr.File(
+                            label="Interactive Tree (HTML)",
+                            interactive=False
+                        )
+        # Event handlers
+        def handle_file_upload(file_obj):
+            if file_obj:
+                return read_fasta_file(file_obj)
+            return ""
+        def clear_all():
+            return (
+                "",  # sequence_input
+                None,  # file_input
+                95,  # similarity_slider
+                False,  # ml_tree_checkbox
+                "",  # boundary_output
+                "",  # keras_output
+                "",  # ml_output
+                "",  # simplified_output
+                "",  # summary_output
+                "",  # tree_html
+                None,  # aligned_file
+                None,  # phy_file
+                None   # html_file
+            )
+        # File upload handler
+        file_input.change(
+            fn=handle_file_upload,
+            inputs=[file_input],
+            outputs=[sequence_input]
         )
+        # Main analysis handler
+        analyze_btn.click(
+            fn=run_pipeline,
+            inputs=[sequence_input, similarity_slider, ml_tree_checkbox],
+            outputs=[
+                boundary_output,
+                keras_output,
+                ml_output,
+                simplified_output,
+                summary_output,
+                aligned_file,
+                phy_file,
+                html_file,
+                tree_html
+            ]
         )
+        # Clear handler
+        clear_btn.click(
+            fn=clear_all,
+            outputs=[
+                sequence_input,
+                file_input,
+                similarity_slider,
+                ml_tree_checkbox,
+                boundary_output,
+                keras_output,
+                ml_output,
+                simplified_output,
+                summary_output,
+                tree_html,
+                aligned_file,
+                phy_file,
+                html_file
+            ]
         )
         # Footer
+        gr.Markdown("""
+        ---
+        **💡 Tips:**
+        - For best results, use sequences > 100 bp
+        - ML tree analysis requires external tools (MAFFT, IQ-TREE)
+        - Interactive trees work best with 10-100 sequences
+        - API endpoint available at `/api/predict/`
         """)
+    return app
+# --- API Interface Creation ---
+def create_api_interface():
+    """Create a separate API-only interface"""
+    api_interface = gr.Interface(
+        fn=run_pipeline_api,
+        inputs=[
+            gr.Textbox(label="DNA Sequence", placeholder="Enter DNA sequence..."),
+            gr.Slider(minimum=50, maximum=99, value=95, label="Similarity Threshold (%)"),
+            gr.Checkbox(label="Build ML Tree", value=False)
+        ],
+        outputs=gr.JSON(label="Analysis Results"),
+        title="F Gene Analysis API",
+        description="API endpoint for F gene analysis pipeline",
+        allow_flagging="never"
+    )
+    return api_interface
+# --- Main Application Setup ---
 if __name__ == "__main__":
+    # Create the main interface
+    main_app = create_interface()
+    # Create API interface
+    api_app = create_api_interface()
+    # Try to launch with API enabled
     try:
+        # Mount both interfaces
+        app = gr.TabbedInterface(
+            [main_app, api_app],
+            ["Main Interface", "API"],
+            title="F Gene Analysis Pipeline"
+        )
+        # Launch with API enabled
+        app.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False,
+            enable_api=True,  # This should work now
+            api_open=True,
+            show_error=True
         )
     except Exception as e:
+        logging.error(f"Failed to launch with API: {e}")
+        logging.info("Falling back to main interface without API...")
+        # Fallback: launch main interface without API
+        main_app.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False,
+            show_error=True
+        )