Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 13, 2025

Commit

1d23751

verified ·

1 Parent(s): 956abb6

Update app.py

Browse files

Files changed (1) hide show

app.py +288 -470

app.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import gradio as gr
 import torch
 import pickle
 import subprocess
 import pandas as pd
-import os
 import re
 import logging
 import numpy as np
@@ -22,14 +27,16 @@ from Bio.SeqRecord import SeqRecord
 import stat
 import time
 import asyncio
-# FastAPI imports
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 from typing import Optional
 import uvicorn
 # Set event loop policy for Spaces
 try:
     asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
@@ -43,8 +50,6 @@ app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
 log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 log_handler = logging.StreamHandler()
 log_handler.setFormatter(log_formatter)
-# File handler with error handling
 try:
     file_handler = logging.FileHandler('/tmp/app.log')
     file_handler.setFormatter(log_formatter)
@@ -52,23 +57,18 @@ try:
 except Exception:
     logging.basicConfig(level=logging.INFO, handlers=[log_handler])
-logger = logging.getLogger(__name__)
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-MODELS_DIR = os.path.join(BASE_DIR, "models")  # Local models directory
-MAFFT_PATH = shutil.which("mafft") or os.path.join(BASE_DIR, "binaries", "mafft", "mafft")
-IQTREE_PATH = shutil.which("iqtree") or os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
 ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
 TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
-QUERY_OUTPUT_DIR = os.path.join("/tmp", "queries")
 os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
-# --- Corrected Paths ---
-boundary_model_repo = "GGproject10/best_boundary_aware_model"
-other_models_repo = "GGproject10/simplified_tree_AI"
-csv_path = os.path.join(BASE_DIR, "f_cleaned.csv")
-hf_token = os.getenv("HF_TOKEN")
 # Initialize models as None
 boundary_model = None
@@ -76,128 +76,85 @@ keras_model = None
 kmer_to_index = None
 analyzer = None
-# --- Enhanced Model Loading with Correct Paths ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
-    logger.info(f"🔍 Looking for models in: {MODELS_DIR}")
-    logger.info(f"📁 Models directory exists: {os.path.exists(MODELS_DIR)}")
-    if os.path.exists(MODELS_DIR):
-        logger.info(f"📂 Contents of models directory: {os.listdir(MODELS_DIR)}")
-    # Load Boundary Model - Try local first, then HF from correct repo
     try:
-        # Local model paths
-        local_boundary_path = os.path.join(MODELS_DIR, "best_boundary_aware_model.pth")
-        if os.path.exists(local_boundary_path):
-            logger.info(f"✅ Loading boundary model from local path: {local_boundary_path}")
-            boundary_model = EnhancedGenePredictor(local_boundary_path)
-            logger.info("✅ Boundary model loaded successfully from local directory")
-        elif hf_token:
-            logger.info("🌐 Attempting to load boundary model from Hugging Face...")
-            boundary_path = hf_hub_download(
-                repo_id=boundary_model_repo,  # Correct repo for boundary model
-                filename="best_boundary_aware_model.pth",
-                token=hf_token,
-                cache_dir="/tmp/hf_cache"
-            )
-            if os.path.exists(boundary_path):
-                boundary_model = EnhancedGenePredictor(boundary_path)
-                logger.info("✅ Boundary model loaded successfully from HF")
-            else:
-                logger.warning("❌ Boundary model file not found after HF download")
         else:
-            logger.warning("❌ No local boundary model found and no HF_TOKEN available")
     except Exception as e:
-        logger.error(f"❌ Failed to load boundary model: {e}")
-        boundary_model = None
-    # Load Keras Model - Try local first, then HF from correct repo
     try:
-        # Local model paths
-        local_keras_path = os.path.join(MODELS_DIR, "best_model.keras")
-        local_kmer_path = os.path.join(MODELS_DIR, "kmer_to_index.pkl")
-        if os.path.exists(local_keras_path) and os.path.exists(local_kmer_path):
-            logger.info(f"✅ Loading Keras model from local paths:")
-            logger.info(f"   - Keras model: {local_keras_path}")
-            logger.info(f"   - K-mer index: {local_kmer_path}")
-            keras_model = load_model(local_keras_path)
-            with open(local_kmer_path, "rb") as f:
                 kmer_to_index = pickle.load(f)
-            logger.info("✅ Keras model loaded successfully from local directory")
-        elif hf_token:
-            logger.info("🌐 Attempting to load Keras model from Hugging Face...")
-            keras_path = hf_hub_download(
-                repo_id=other_models_repo,  # Correct repo for other models
-                filename="best_model.keras",
-                token=hf_token,
-                cache_dir="/tmp/hf_cache"
-            )
-            kmer_path = hf_hub_download(
-                repo_id=other_models_repo,  # Correct repo for other models
-                filename="kmer_to_index.pkl",
-                token=hf_token,
-                cache_dir="/tmp/hf_cache"
-            )
-            if os.path.exists(keras_path) and os.path.exists(kmer_path):
-                keras_model = load_model(keras_path)
-                with open(kmer_path, "rb") as f:
-                    kmer_to_index = pickle.load(f)
-                logger.info("✅ Keras model loaded successfully from HF")
-            else:
-                logger.warning("❌ Keras model files not found after HF download")
         else:
-            logger.warning("❌ No local Keras model found and no HF_TOKEN available")
     except Exception as e:
-        logger.error(f"❌ Failed to load Keras model: {e}")
-        keras_model = None
-        kmer_to_index = None
     # Initialize Tree Analyzer
     try:
         logger.info("🌳 Initializing tree analyzer...")
         analyzer = PhylogeneticTreeAnalyzer()
-        # Try multiple CSV locations
         csv_candidates = [
-            csv_path,
-            os.path.join(BASE_DIR, "f cleaned.csv"),
             "f_cleaned.csv",
-            os.path.join(BASE_DIR, "data", "f_cleaned.csv"),
-            os.path.join(MODELS_DIR, "f_cleaned.csv")  # Also check models directory
         ]
         csv_loaded = False
         for csv_candidate in csv_candidates:
             if os.path.exists(csv_candidate):
                 try:
-                    logger.info(f"📊 Trying to load CSV from: {csv_candidate}")
                     if analyzer.load_data(csv_candidate):
-                        logger.info(f"✅ Tree analyzer loaded CSV from: {csv_candidate}")
                         csv_loaded = True
                         break
                 except Exception as e:
-                    logger.warning(f"Failed to load CSV from {csv_candidate}: {e}")
                     continue
         if not csv_loaded:
-            logger.error("❌ Failed to load CSV data from any location")
-            logger.info("📂 Available files in base directory:")
-            try:
-                for file in os.listdir(BASE_DIR):
-                    if file.endswith('.csv'):
-                        logger.info(f"   - {file}")
-            except:
-                pass
             analyzer = None
     except Exception as e:
-        logger.error(f"❌ Failed to initialize tree analyzer: {e}")
         analyzer = None
 # Load models at startup
@@ -215,19 +172,16 @@ def setup_binary_permissions():
 def check_tool_availability():
     setup_binary_permissions()
-    # Check MAFFT
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
     for candidate in mafft_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                 result = subprocess.run(
-                    [candidate, "--help"],
-                    capture_output=True,
-                    text=True,
                     timeout=5
                 )
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
@@ -237,19 +191,16 @@ def check_tool_availability():
                     break
             except Exception as e:
                 logger.debug(f"MAFFT test failed for {candidate}: {e}")
-    # Check IQ-TREE
     iqtree_available = False
     iqtree_cmd = None
     iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
     for candidate in iqtree_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                 result = subprocess.run(
-                    [candidate, "--help"],
-                    capture_output=True,
-                    text=True,
                     timeout=5
                 )
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
@@ -259,46 +210,36 @@ def check_tool_availability():
                     break
             except Exception as e:
                 logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
-# --- Pipeline Functions (keeping your original logic) ---
 def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
     try:
         if len(sequence.strip()) < 100:
             return False, "Sequence too short (<100 bp).", None, None
         query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
         query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
         aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
         output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
         if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
             return False, "Reference alignment or tree not found.", None, None
         query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
         SeqIO.write([query_record], query_fasta, "fasta")
         with open(aligned_with_query, "w") as output_file:
             subprocess.run([
                 mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
             ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
         if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
             return False, "MAFFT alignment failed.", None, None
         subprocess.run([
-            iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
             "-m", "GTR+G", "-pre", output_prefix, "-redo"
         ], capture_output=True, text=True, timeout=1200, check=True)
         treefile = f"{output_prefix}.treefile"
         if not os.path.exists(treefile):
             return False, "IQ-TREE placement failed.", aligned_with_query, None
         success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
         return True, success_msg, aligned_with_query, treefile
     except Exception as e:
         logger.error(f"Phylogenetic placement failed: {e}")
         return False, f"Error: {str(e)}", None, None
@@ -309,40 +250,73 @@ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
             except:
                 pass
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
             return "❌ Keras model not available."
         if len(sequence) < 6:
             return "❌ Sequence too short (<6 bp)."
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
         f_gene_prob = prediction[-1]
         percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"✅ {percentage}% F gene confidence"
     except Exception as e:
         logger.error(f"Keras prediction failed: {e}")
         return f"❌ Error: {str(e)}"
 def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
         dna_input = dna_input.upper().strip()
         if not dna_input:
             return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input"
-        # Clean sequence
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
         processed_sequence = dna_input
-        # Boundary prediction
         boundary_output = ""
         if boundary_model:
             try:
@@ -359,15 +333,10 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
                 processed_sequence = dna_input
         else:
             boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
-        # Keras prediction
         keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
-        # ML Tree (keeping your original logic)
         aligned_file = None
         phy_file = None
         ml_tree_output = ""
         if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
             try:
                 mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
@@ -384,29 +353,23 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
             ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
         else:
             ml_tree_output = "⚠️ Phylogenetic placement skipped."
-        # Tree analysis
         tree_html_content = "No tree generated."
         report_html_content = "No report generated."
         simplified_ml_output = ""
         if analyzer and processed_sequence and len(processed_sequence) >= 10:
             try:
                 tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
                 simplified_ml_output = tree_result
                 if tree_html_path and os.path.exists(tree_html_path):
                     with open(tree_html_path, 'r', encoding='utf-8') as f:
                         tree_html_content = f.read()
                 else:
                     tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
                 if report_html_path and os.path.exists(report_html_path):
                     with open(report_html_path, 'r', encoding='utf-8') as f:
                         report_html_content = f.read()
                 else:
                     report_html_content = f"<div style='color: red;'>{tree_result}</div>"
             except Exception as e:
                 simplified_ml_output = f"❌ Tree analysis error: {str(e)}"
                 tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
@@ -415,8 +378,6 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
             simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
             tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
             report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
-        # Summary
         summary_output = f"""
 📊 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -427,72 +388,15 @@ Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skippe
 Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 """
         return (
             boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
             aligned_file, phy_file, None, None, tree_html_content, report_html_content
         )
     except Exception as e:
         logger.error(f"Pipeline error: {e}")
         error_msg = f"❌ Pipeline Error: {str(e)}"
         return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
-# Keep your other functions (analyze_sequence_for_tree, build_maximum_likelihood_tree, etc.)
-def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
-    try:
-        if not analyzer:
-            return "❌ Tree analyzer not initialized.", None, None
-        if not sequence or len(sequence.strip()) < 10:
-            return "❌ Invalid sequence.", None, None
-        if not (1 <= matching_percentage <= 99):
-            return "❌ Matching percentage must be 1-99.", None, None
-        if not analyzer.find_query_sequence(sequence):
-            return "❌ Sequence not accepted.", None, None
-        matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
-        if not matched_ids:
-            return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
-        analyzer.build_tree_structure_with_ml_safe(matched_ids)
-        fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
-        query_id = analyzer.query_id or f"query_{int(time.time())}"
-        tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
-        fig.write_html(tree_html_path)
-        analyzer.matching_percentage = matching_percentage
-        report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
-        report_html_path = os.path.join("/tmp", f"detailed_report_{query_id}.html") if report_success else None
-        return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
-    except Exception as e:
-        logger.error(f"Tree analysis failed: {e}")
-        return f"❌ Error: {str(e)}", None, None
-def read_fasta_file(file_obj):
-    try:
-        if file_obj is None:
-            return ""
-        if isinstance(file_obj, str):
-            with open(file_obj, "r") as f:
-                content = f.read()
-        else:
-            content = file_obj.read().decode("utf-8")
-        lines = content.strip().split("\n")
-        seq_lines = [line.strip() for line in lines if not line.startswith(">")]
-        return ''.join(seq_lines)
-    except Exception as e:
-        logger.error(f"Failed to read FASTA file: {e}")
-        return ""
 async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
     try:
         dna_input = read_fasta_file(fasta_file_obj)
@@ -549,18 +453,7 @@ async def health_check():
             },
             "paths": {
                 "base_dir": BASE_DIR,
-                "models_dir": MODELS_DIR,
-                "models_dir_exists": os.path.exists(MODELS_DIR),
-                "csv_path": csv_path,
-                "csv_exists": os.path.exists(csv_path)
-            },
-            "model_repos": {
-                "boundary_model": boundary_model_repo,
-                "other_models": other_models_repo
-            },
-            "recommendations": {
-                "models": "Models loaded from local directory" if (boundary_model and keras_model) else "Check models directory",
-                "bioinformatics_tools": "Install MAFFT and IQ-TREE" if not (mafft_available and iqtree_available) else "OK"
             }
         }
     except Exception as e:
@@ -582,15 +475,15 @@ async def analyze_sequence(request: AnalysisRequest):
     except Exception as e:
         logger.error(f"Analyze error: {e}")
         return AnalysisResponse(
-            boundary_output="", keras_output="", ml_tree_output="",
             tree_analysis_output="", summary_output="",
             success=False, error_message=str(e)
         )
 @app.post("/analyze-file")
 async def analyze_file(
-    file: UploadFile = File(...),
-    similarity_score: float = Form(95.0),
     build_ml_tree: bool = Form(False)
 ):
     temp_file_path = None
@@ -599,9 +492,7 @@ async def analyze_file(
             content = await file.read()
             temp_file.write(content)
             temp_file_path = temp_file.name
         result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
         return AnalysisResponse(
             boundary_output=result[0] or "",
             keras_output=result[1] or "",
@@ -613,7 +504,7 @@ async def analyze_file(
     except Exception as e:
         logger.error(f"Analyze-file error: {e}")
         return AnalysisResponse(
-            boundary_output="", keras_output="", ml_tree_output="",
             tree_analysis_output="", summary_output="",
             success=False, error_message=str(e)
         )
@@ -624,7 +515,7 @@ async def analyze_file(
             except:
                 pass
-# --- Fixed Gradio Interface ---
 def create_gradio_interface():
     try:
         with gr.Blocks(
@@ -638,10 +529,7 @@ def create_gradio_interface():
             .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
             """
         ) as iface:
             gr.Markdown("# 🧬 Gene Analysis Pipeline")
-            # Status display
             with gr.Row():
                 with gr.Column():
                     status_display = gr.HTML(value=f"""
@@ -649,281 +537,211 @@ def create_gradio_interface():
                         <h3>🔧 System Status</h3>
                         <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
                         <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
-                        <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}
-                        <p>🔬 MAFFT/IQ-TREE: {'✅ Available' if check_tool_availability()[0] and check_tool_availability()[1] else '❌ Missing'}</p>
                     """)
-            # Input tabs
             with gr.Tabs():
                 with gr.TabItem("📝 Text Input"):
-                    dna_input = gr.Textbox(
-                        label="🧬 DNA Sequence",
-                        placeholder="Enter DNA sequence (ATCG format)...",
-                        lines=5,
-                        max_lines=10
-                    )
                 with gr.TabItem("📁 File Upload"):
-                    fasta_file = gr.File(
-                        label="📄 Upload FASTA File",
-                        file_types=[".fasta", ".fa", ".txt"],
-                        file_count="single"
-                    )
-            # Analysis options
             with gr.Row():
                 with gr.Column():
-                    similarity_slider = gr.Slider(
-                        minimum=1,
-                        maximum=99,
-                        value=95,
-                        step=1,
-                        label="🎯 Similarity Threshold (%)",
-                        info="Minimum similarity for phylogenetic analysis"
-                    )
-                with gr.Column():
-                    ml_tree_checkbox = gr.Checkbox(
-                        label="🌲 Build ML Tree",
-                        value=False,
-                        info="Perform phylogenetic placement (slower)"
-                    )
-            # Action buttons
-            with gr.Row():
-                analyze_text_btn = gr.Button("🔍 Analyze Text", variant="primary", size="lg")
-                analyze_file_btn = gr.Button("📁 Analyze File", variant="secondary", size="lg")
-                clear_btn = gr.Button("🗑️ Clear", variant="stop")
-            # Results section
-            gr.Markdown("## 📊 Analysis Results")
-            with gr.Tabs():
-                with gr.TabItem("🎯 Boundary Prediction"):
                     boundary_output = gr.Textbox(
-                        label="🔍 F Gene Boundary Detection",
-                        lines=3,
-                        interactive=False
                     )
-                with gr.TabItem("🧠 Keras Validation"):
                     keras_output = gr.Textbox(
-                        label="🤖 Neural Network Validation",
-                        lines=3,
-                        interactive=False
                     )
-                with gr.TabItem("🌲 ML Tree Placement"):
                     ml_tree_output = gr.Textbox(
-                        label="🌳 Maximum Likelihood Tree",
-                        lines=5,
-                        interactive=False
                     )
-                with gr.TabItem("📈 Tree Analysis"):
                     tree_analysis_output = gr.Textbox(
-                        label="📊 Phylogenetic Analysis",
-                        lines=5,
-                        interactive=False
-                    )
-                with gr.TabItem("📋 Summary"):
-                    summary_output = gr.Textbox(
-                        label="📝 Analysis Summary",
-                        lines=10,
-                        interactive=False
                     )
-            # Visualization section
             with gr.Tabs():
                 with gr.TabItem("🌳 Interactive Tree"):
                     tree_html = gr.HTML(
-                        label="Phylogenetic Tree Visualization",
-                        value="<div style='text-align: center; padding: 20px; color: #666;'>Tree visualization will appear here after analysis</div>"
                     )
                 with gr.TabItem("📊 Detailed Report"):
                     report_html = gr.HTML(
                         label="Analysis Report",
-                        value="<div style='text-align: center; padding: 20px; color: #666;'>Detailed report will appear here after analysis</div>"
                     )
-            # File downloads
-            gr.Markdown("## 📥 Download Results")
-            with gr.Row():
-                aligned_file = gr.File(
-                    label="📄 Aligned Sequences",
-                    interactive=False
-                )
-                tree_file = gr.File(
-                    label="🌳 Tree File",
-                    interactive=False
-                )
-            # Event handlers
-            def clear_all():
-                return (
-                    "",  # dna_input
-                    None,  # fasta_file
-                    "",  # boundary_output
-                    "",  # keras_output
-                    "",  # ml_tree_output
-                    "",  # tree_analysis_output
-                    "",  # summary_output
-                    "<div style='text-align: center; padding: 20px; color: #666;'>Tree visualization will appear here after analysis</div>",  # tree_html
-                    "<div style='text-align: center; padding: 20px; color: #666;'>Detailed report will appear here after analysis</div>",  # report_html
-                    None,  # aligned_file
-                    None   # tree_file
-                )
-            # Text analysis
-            analyze_text_btn.click(
                 fn=run_pipeline,
-                inputs=[dna_input, similarity_slider, ml_tree_checkbox],
                 outputs=[
-                    boundary_output,
-                    keras_output,
-                    ml_tree_output,
-                    tree_analysis_output,
-                    summary_output,
-                    aligned_file,
-                    tree_file,
-                    gr.State(),  # placeholder for additional outputs
-                    gr.State(),  # placeholder for additional outputs
-                    tree_html,
-                    report_html
                 ]
             )
-            # File analysis
             analyze_file_btn.click(
                 fn=run_pipeline_from_file,
-                inputs=[fasta_file, similarity_slider, ml_tree_checkbox],
                 outputs=[
-                    boundary_output,
-                    keras_output,
-                    ml_tree_output,
-                    tree_analysis_output,
-                    summary_output,
-                    aligned_file,
-                    tree_file,
-                    gr.State(),  # placeholder for additional outputs
-                    gr.State(),  # placeholder for additional outputs
-                    tree_html,
-                    report_html
                 ]
             )
-            # Clear button
-            clear_btn.click(
-                fn=clear_all,
-                outputs=[
-                    dna_input,
-                    fasta_file,
-                    boundary_output,
-                    keras_output,
-                    ml_tree_output,
-                    tree_analysis_output,
-                    summary_output,
-                    tree_html,
-                    report_html,
-                    aligned_file,
-                    tree_file
-                ]
-            )
-            # Examples
-            gr.Markdown("## 🧪 Example Sequences")
             gr.Examples(
-                examples=[
-                    ["ATGAAACTGCAGCTGAGGTCCCTGGTGGTGAACAAGCTCAGCAGCAAGTGCTGAACTGGATGGGCGAGAAGAGCAACTGCATCCAGTGCAAGCGCCTGAAGAGGAACTGCAAGAAGGTGGTGGACCTGCAGTGCAGCAGCAGCAGCAGCAGCAGCAGCAGC", 95.0, False],
-                    ["ATGAAACTGCAGCTGAGGTCCCTGGTGGTGAACAAGCTCAGCAGCAAGTGCTGAACTGGATGGGCGAGAAGAGCAACTGCATCCAGTGCAAGCGCCTGAAGAGGAACTGCAAGAAGGTGGTGGACCTGCAGTGCAGCAGCAGCAGCAGCAGCAGCAGCAGC", 85.0, True],
-                    ["ATGGAGCTGCAGCTGAGGTCCCTGGTGGTGAACAAGCTCAGCAGCAAGTGCTGAACTGGATGGGCGAGAAGAGCAACTGCATCCAGTGCAAGCGCCTGAAGAGGAACTGCAAGAAGGTGGTGGACCTGCAG", 90.0, False]
-                ],
-                inputs=[dna_input, similarity_slider, ml_tree_checkbox],
                 label="Click to load example sequences"
             )
-            # Footer
-            gr.Markdown("""
-            ---
-            ### 🔬 About This Pipeline
-            This tool performs comprehensive analysis of DNA sequences using multiple approaches:
-            - **🎯 Boundary Detection**: Identifies F gene regions using ML models
-            - **🧠 Keras Validation**: Neural network-based sequence validation
-            - **🌲 ML Tree Placement**: Phylogenetic placement using MAFFT + IQ-TREE
-            - **📈 Tree Analysis**: Interactive phylogenetic analysis and visualization
-            ### 📝 Usage Notes
-            - Sequences should be in ATCG format (other characters will be converted to N)
-            - Minimum 100 bp recommended for phylogenetic placement
-            - Higher similarity thresholds = fewer but more similar sequences
-            - ML tree building requires MAFFT and IQ-TREE (slower but more accurate)
-            ### ⚠️ System Requirements
-            - Python packages: gradio, torch, tensorflow, biopython, plotly
-            - Bioinformatics tools: MAFFT, IQ-TREE (optional for ML placement)
-            - Pre-trained models: boundary detection + keras validation models
-            """)
         return iface
     except Exception as e:
         logger.error(f"Failed to create Gradio interface: {e}")
-        # Fallback simple interface
-        with gr.Blocks() as fallback_iface:
-            gr.Markdown("# 🧬 Gene Analysis Pipeline (Fallback Mode)")
-            gr.Markdown(f"⚠️ Error creating full interface: {str(e)}")
-            dna_input = gr.Textbox(label="DNA Sequence", lines=5)
-            analyze_btn = gr.Button("Analyze")
-            output = gr.Textbox(label="Results", lines=10)
-            analyze_btn.click(
-                fn=lambda seq: run_pipeline(seq, 95.0, False)[4],  # Just return summary
-                inputs=[dna_input],
-                outputs=[output]
-            )
-        return fallback_iface
 # --- Application Startup ---
-if __name__ == "__main__":
     try:
-        # Create Gradio interface
-        gr_interface = create_gradio_interface()
-        # Mount Gradio app to FastAPI
-        gr_app = gr.mount_gradio_app(app, gr_interface, path="/gradio")
-        # Log startup info
-        logger.info("🚀 Starting Gene Analysis Pipeline...")
-        logger.info(f"📁 Base directory: {BASE_DIR}")
-        logger.info(f"🤖 Models loaded: Boundary={boundary_model is not None}, Keras={keras_model is not None}")
-        logger.info(f"🌳 Tree analyzer: {analyzer is not None}")
-        mafft_available, iqtree_available, _, _ = check_tool_availability()
-        logger.info(f"🔬 Tools available: MAFFT={mafft_available}, IQ-TREE={iqtree_available}")
-        # Start server
-        logger.info("🌐 Starting server on http://0.0.0.0:7860")
-        logger.info("📊 FastAPI docs: http://0.0.0.0:7860/docs")
-        logger.info("🎮 Gradio interface: http://0.0.0.0:7860/gradio")
-        uvicorn.run(
-            app,
-            host="0.0.0.0",
-            port=7860,
-            log_level="info",
-            access_log=True
-        )
     except Exception as e:
-        logger.error(f"❌ Startup failed: {e}")
-        print(f"❌ Failed to start application: {e}")
-        sys.exit(1)

+import os
+# Disable GPU to avoid CUDA errors
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+# Suppress TensorFlow warnings
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 import gradio as gr
 import torch
 import pickle
 import subprocess
 import pandas as pd
 import re
 import logging
 import numpy as np
 import stat
 import time
 import asyncio
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 from typing import Optional
 import uvicorn
+# Log Gradio version
+logger = logging.getLogger(__name__)
+logger.info(f"Gradio version: {gr.__version__}")
 # Set event loop policy for Spaces
 try:
     asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
 log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 log_handler = logging.StreamHandler()
 log_handler.setFormatter(log_formatter)
 try:
     file_handler = logging.FileHandler('/tmp/app.log')
     file_handler.setFormatter(log_formatter)
 except Exception:
     logging.basicConfig(level=logging.INFO, handlers=[log_handler])
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft")
+IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
 ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
 TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
+QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
 os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
+# --- Model Configuration ---
+MODEL_REPO = "GGproject10/best_boundary_aware_model"
+CSV_PATH = "f cleaned.csv"
 # Initialize models as None
 boundary_model = None
 kmer_to_index = None
 analyzer = None
+# --- Model Loading ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
+    logger.info("🔍 Loading models...")
+    # Load Boundary Model
     try:
+        boundary_path = hf_hub_download(
+            repo_id=MODEL_REPO,
+            filename="best_boundary_aware_model.pth",
+            token=None
+        )
+        if os.path.exists(boundary_path):
+            boundary_model = EnhancedGenePredictor(boundary_path)
+            logger.info("✅ Boundary model loaded successfully from Hugging Face Hub.")
         else:
+            logger.error(f"❌ Boundary model file not found after download from {MODEL_REPO}")
     except Exception as e:
+        logger.error(f"❌ Failed to load boundary model from HF Hub: {e}. Ensure {MODEL_REPO} is public and accessible.")
+    # Load Keras Model
     try:
+        keras_path = hf_hub_download(
+            repo_id=MODEL_REPO,
+            filename="best_model.keras",
+            token=None
+        )
+        kmer_path = hf_hub_download(
+            repo_id=MODEL_REPO,
+            filename="kmer_to_index.pkl",
+            token=None
+        )
+        if os.path.exists(keras_path) and os.path.exists(kmer_path):
+            keras_model = load_model(keras_path)
+            with open(kmer_path, "rb") as f:
                 kmer_to_index = pickle.load(f)
+            logger.info("✅ Keras model and k-mer index loaded successfully from Hugging Face Hub.")
         else:
+            logger.error(f"❌ Keras model or kmer files not found after download from {MODEL_REPO}")
     except Exception as e:
+        logger.error(f"❌ Failed to load Keras model from HF Hub: {e}. Ensure {MODEL_REPO} is public and accessible.")
     # Initialize Tree Analyzer
     try:
         logger.info("🌳 Initializing tree analyzer...")
         analyzer = PhylogeneticTreeAnalyzer()
         csv_candidates = [
+            CSV_PATH,
+            os.path.join(BASE_DIR, CSV_PATH),
+            os.path.join(BASE_DIR, "app", CSV_PATH),
+            os.path.join(os.path.dirname(__file__), CSV_PATH),
             "f_cleaned.csv",
+            os.path.join(BASE_DIR, "f_cleaned.csv")
         ]
         csv_loaded = False
         for csv_candidate in csv_candidates:
             if os.path.exists(csv_candidate):
+                logger.info(f"📊 Trying CSV: {csv_candidate}")
                 try:
                     if analyzer.load_data(csv_candidate):
+                        logger.info(f"✅ CSV loaded from: {csv_candidate}")
                         csv_loaded = True
                         break
                 except Exception as e:
+                    logger.warning(f"CSV load failed for {csv_candidate}: {e}")
                     continue
         if not csv_loaded:
+            logger.error("❌ Failed to load CSV data from any candidate location. Place 'f cleaned.csv' in project root.")
             analyzer = None
+        else:
+            try:
+                if analyzer.train_ai_model():
+                    logger.info("✅ AI model training completed successfully")
+                else:
+                    logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
+            except Exception as e:
+                logger.warning(f"⚠️ AI model training failed: {e}")
     except Exception as e:
+        logger.error(f"❌ Tree analyzer initialization failed: {e}")
         analyzer = None
 # Load models at startup
 def check_tool_availability():
     setup_binary_permissions()
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
     for candidate in mafft_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                 result = subprocess.run(
+                    [candidate, "--help"],
+                    capture_output=True,
+                    text=True,
                     timeout=5
                 )
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     break
             except Exception as e:
                 logger.debug(f"MAFFT test failed for {candidate}: {e}")
     iqtree_available = False
     iqtree_cmd = None
     iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
     for candidate in iqtree_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                 result = subprocess.run(
+                    [candidate, "--help"],
+                    capture_output=True,
+                    text=True,
                     timeout=5
                 )
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     break
             except Exception as e:
                 logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
+# --- Pipeline Functions ---
 def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
     try:
         if len(sequence.strip()) < 100:
             return False, "Sequence too short (<100 bp).", None, None
         query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
         query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
         aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
         output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
         if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
             return False, "Reference alignment or tree not found.", None, None
         query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
         SeqIO.write([query_record], query_fasta, "fasta")
         with open(aligned_with_query, "w") as output_file:
             subprocess.run([
                 mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
             ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
         if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
             return False, "MAFFT alignment failed.", None, None
         subprocess.run([
+            iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
             "-m", "GTR+G", "-pre", output_prefix, "-redo"
         ], capture_output=True, text=True, timeout=1200, check=True)
         treefile = f"{output_prefix}.treefile"
         if not os.path.exists(treefile):
             return False, "IQ-TREE placement failed.", aligned_with_query, None
         success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
         return True, success_msg, aligned_with_query, treefile
     except Exception as e:
         logger.error(f"Phylogenetic placement failed: {e}")
         return False, f"Error: {str(e)}", None, None
             except:
                 pass
+def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
+    try:
+        if not analyzer:
+            return "❌ Tree analyzer not initialized.", None, None
+        if not sequence or len(sequence.strip()) < 10:
+            return "❌ Invalid sequence.", None, None
+        if not (1 <= matching_percentage <= 99):
+            return "❌ Matching percentage must be 1-99.", None, None
+        if not analyzer.find_query_sequence(sequence):
+            return "❌ Sequence not accepted.", None, None
+        matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
+        if not matched_ids:
+            return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
+        analyzer.build_tree_structure_with_ml_safe(matched_ids)
+        fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
+        query_id = analyzer.query_id or f"query_{int(time.time())}"
+        tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
+        fig.write_html(tree_html_path)
+        analyzer.matching_percentage = matching_percentage
+        report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
+        report_html_path = os.path.join("/tmp", f'detailed_report_{query_id}.html') if report_success else None
+        return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
+    except Exception as e:
+        logger.error(f"Tree analysis failed: {e}")
+        return f"❌ Error: {str(e)}", None, None
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
             return "❌ Keras model not available."
         if len(sequence) < 6:
             return "❌ Sequence too short (<6 bp)."
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
         f_gene_prob = prediction[-1]
         percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"✅ {percentage}% F gene confidence"
     except Exception as e:
         logger.error(f"Keras prediction failed: {e}")
         return f"❌ Error: {str(e)}"
+def read_fasta_file(file_obj):
+    try:
+        if file_obj is None:
+            return ""
+        if isinstance(file_obj, str):
+            with open(file_obj, "r") as f:
+                content = f.read()
+        else:
+            content = file_obj.read().decode("utf-8")
+        lines = content.strip().split("\n")
+        seq_lines = [line.strip() for line in lines if not line.startswith(">")]
+        return ''.join(seq_lines)
+    except Exception as e:
+        logger.error(f"Failed to read FASTA file: {e}")
+        return ""
 def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
         dna_input = dna_input.upper().strip()
         if not dna_input:
             return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input"
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
         processed_sequence = dna_input
         boundary_output = ""
         if boundary_model:
             try:
                 processed_sequence = dna_input
         else:
             boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
         keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
         aligned_file = None
         phy_file = None
         ml_tree_output = ""
         if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
             try:
                 mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
             ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
         else:
             ml_tree_output = "⚠️ Phylogenetic placement skipped."
         tree_html_content = "No tree generated."
         report_html_content = "No report generated."
         simplified_ml_output = ""
         if analyzer and processed_sequence and len(processed_sequence) >= 10:
             try:
                 tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
                 simplified_ml_output = tree_result
                 if tree_html_path and os.path.exists(tree_html_path):
                     with open(tree_html_path, 'r', encoding='utf-8') as f:
                         tree_html_content = f.read()
                 else:
                     tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
                 if report_html_path and os.path.exists(report_html_path):
                     with open(report_html_path, 'r', encoding='utf-8') as f:
                         report_html_content = f.read()
                 else:
                     report_html_content = f"<div style='color: red;'>{tree_result}</div>"
             except Exception as e:
                 simplified_ml_output = f"❌ Tree analysis error: {str(e)}"
                 tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
             simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
             tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
             report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
         summary_output = f"""
 📊 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 """
         return (
             boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
             aligned_file, phy_file, None, None, tree_html_content, report_html_content
         )
     except Exception as e:
         logger.error(f"Pipeline error: {e}")
         error_msg = f"❌ Pipeline Error: {str(e)}"
         return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
 async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
     try:
         dna_input = read_fasta_file(fasta_file_obj)
             },
             "paths": {
                 "base_dir": BASE_DIR,
+                "query_output_dir": QUERY_OUTPUT_DIR
             }
         }
     except Exception as e:
     except Exception as e:
         logger.error(f"Analyze error: {e}")
         return AnalysisResponse(
+            boundary_output="", keras_output="", ml_tree_output="",
             tree_analysis_output="", summary_output="",
             success=False, error_message=str(e)
         )
 @app.post("/analyze-file")
 async def analyze_file(
+    file: UploadFile = File(...),
+    similarity_score: float = Form(95.0),
     build_ml_tree: bool = Form(False)
 ):
     temp_file_path = None
             content = await file.read()
             temp_file.write(content)
             temp_file_path = temp_file.name
         result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
         return AnalysisResponse(
             boundary_output=result[0] or "",
             keras_output=result[1] or "",
     except Exception as e:
         logger.error(f"Analyze-file error: {e}")
         return AnalysisResponse(
+            boundary_output="", keras_output="", ml_tree_output="",
             tree_analysis_output="", summary_output="",
             success=False, error_message=str(e)
         )
             except:
                 pass
+# --- Gradio Interface ---
 def create_gradio_interface():
     try:
         with gr.Blocks(
             .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
             """
         ) as iface:
             gr.Markdown("# 🧬 Gene Analysis Pipeline")
             with gr.Row():
                 with gr.Column():
                     status_display = gr.HTML(value=f"""
                         <h3>🔧 System Status</h3>
                         <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
                         <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
+                        <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}</p>
+                        <p>🧬 MAFFT: {'✅ Available' if check_tool_availability()[0] else '❌ Missing'}</p>
+                        <p>🌲 IQ-TREE: {'✅ Available' if check_tool_availability()[1] else '❌ Missing'}</p>
+                    </div>
                     """)
             with gr.Tabs():
                 with gr.TabItem("📝 Text Input"):
+                    with gr.Row():
+                        with gr.Column(scale=2):
+                            dna_input = gr.Textbox(
+                                label="🧬 DNA Sequence",
+                                placeholder="Enter DNA sequence (ATCG format)...",
+                                lines=5,
+                                description="Paste your DNA sequence here"
+                            )
+                        with gr.Column(scale=1):
+                            similarity_score = gr.Slider(
+                                minimum=1,
+                                maximum=99,
+                                value=95.0,
+                                step=1.0,
+                                label="🎯 Similarity Threshold (%)",
+                                description="Minimum similarity for tree analysis"
+                            )
+                            build_ml_tree = gr.Checkbox(
+                                label="🌲 Build ML Tree",
+                                value=False,
+                                description="Generate phylogenetic placement (slower)"
+                            )
+                            analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
                 with gr.TabItem("📁 File Upload"):
+                    with gr.Row():
+                        with gr.Column(scale=2):
+                            file_input = gr.File(
+                                label="📄 Upload FASTA File",
+                                file_types=[".fasta", ".fa", ".fas", ".txt"],
+                                description="Upload a FASTA file containing your sequence"
+                            )
+                        with gr.Column(scale=1):
+                            file_similarity_score = gr.Slider(
+                                minimum=1,
+                                maximum=99,
+                                value=95.0,
+                                step=1.0,
+                                label="🎯 Similarity Threshold (%)",
+                                description="Minimum similarity for tree analysis"
+                            )
+                            file_build_ml_tree = gr.Checkbox(
+                                label="🌲 Build ML Tree",
+                                value=False,
+                                description="Generate phylogenetic placement (slower)"
+                            )
+                            analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
+            gr.Markdown("## 📊 Analysis Results")
             with gr.Row():
                 with gr.Column():
                     boundary_output = gr.Textbox(
+                        label="🎯 Boundary Detection",
+                        interactive=False,
+                        lines=2
                     )
                     keras_output = gr.Textbox(
+                        label="🧠 F Gene Validation",
+                        interactive=False,
+                        lines=2
                     )
+                with gr.Column():
                     ml_tree_output = gr.Textbox(
+                        label="🌲 Phylogenetic Placement",
+                        interactive=False,
+                        lines=2
                     )
                     tree_analysis_output = gr.Textbox(
+                        label="🌳 Tree Analysis",
+                        interactive=False,
+                        lines=2
                     )
+            summary_output = gr.Textbox(
+                label="📋 Summary",
+                interactive=False,
+                lines=8
+            )
+            with gr.Row():
+                aligned_file = gr.File(label="📄 Alignment File", visible=False)
+                tree_file = gr.File(label="🌲 Tree File", visible=False)
             with gr.Tabs():
                 with gr.TabItem("🌳 Interactive Tree"):
                     tree_html = gr.HTML(
+                        label="Phylogenetic Tree",
+                        value="<div style='text-align: center; padding: 20px; color: #666;'>No tree generated yet.</div>"
                     )
                 with gr.TabItem("📊 Detailed Report"):
                     report_html = gr.HTML(
                         label="Analysis Report",
+                        value="<div style='text-align: center; padding: 20px; color: #666;'>No report generated yet.</div>"
                     )
+            analyze_btn.click(
                 fn=run_pipeline,
+                inputs=[dna_input, similarity_score, build_ml_tree],
                 outputs=[
+                    boundary_output, keras_output, ml_tree_output,
+                    tree_analysis_output, summary_output,
+                    aligned_file, tree_file, gr.State(), gr.State(),
+                    tree_html, report_html
                 ]
             )
             analyze_file_btn.click(
                 fn=run_pipeline_from_file,
+                inputs=[file_input, file_similarity_score, file_build_ml_tree],
                 outputs=[
+                    boundary_output, keras_output, ml_tree_output,
+                    tree_analysis_output, summary_output,
+                    aligned_file, tree_file, gr.State(), gr.State(),
+                    tree_html, report_html
                 ]
             )
+            gr.Markdown("## 🔬 Example Sequences")
+            example_sequences = [
+                ["ATGGACTTCCAAATTAACAACCTCAACAACCTCAACAACATCAACAACATCAACAACATCAACAACATCAACAAC", 90.0, False],
+                ["ATGAAACAAATTAACAACCTCAACAACCTCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAAC", 85.0, True]
+            ]
             gr.Examples(
+                examples=example_sequences,
+                inputs=[dna_input, similarity_score, build_ml_tree],
                 label="Click to load example sequences"
             )
+            with gr.Accordion("❓ Help & Information", open=False):
+                gr.Markdown("""
+                ### 🧬 Gene Analysis Pipeline
+                This tool performs comprehensive analysis of F gene sequences:
+                **🎯 Boundary Detection**: Identifies F gene regions within your sequence
+                **🧠 F Gene Validation**: Validates sequence as F gene using deep learning
+                **🌲 Phylogenetic Placement**: Places sequence in reference phylogeny
+                **🌳 Tree Analysis**: Finds similar sequences and builds interactive trees
+                ### 📋 Input Requirements
+                - DNA sequences in ATCG format
+                - Minimum 10 bp for basic analysis
+                - Minimum 100 bp for phylogenetic placement
+                - FASTA files supported for upload
+                ### ⚙️ Parameters
+                - **Similarity Threshold**: Minimum % similarity for tree analysis (1-99%)
+                - **Build ML Tree**: Enable phylogenetic placement (requires MAFFT/IQ-TREE)
+                ### 📊 Output Files
+                - Alignment files (.fa format)
+                - Tree files (.treefile format)
+                - Interactive HTML visualizations
+                """)
         return iface
     except Exception as e:
         logger.error(f"Failed to create Gradio interface: {e}")
+        return None
 # --- Application Startup ---
+def mount_gradio_app():
     try:
+        gradio_app = create_gradio_interface()
+        if gradio_app:
+            app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
+            logger.info("✅ Gradio interface mounted at /gradio")
+        else:
+            logger.error("❌ Failed to create Gradio interface")
     except Exception as e:
+        logger.error(f"❌ Failed to mount Gradio app: {e}")
+# Initialize Gradio
+mount_gradio_app()
+# --- Main Application ---
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="🧬 Gene Analysis Pipeline")
+    parser.add_argument("--host", default="0.0.0.0", help="Host address")
+    parser.add_argument("--port", type=int, default=7860, help="Port number")
+    parser.add_argument("--reload", action="store_true", help="Enable auto-reload")
+    parser.add_argument("--gradio-only", action="store_true", help="Run Gradio interface only")
+    args = parser.parse_args()
+    if args.gradio_only:
+        logger.info("🚖 Starting Gradio interface only...")
+        iface = create_gradio_interface()
+        if iface:
+            iface.launch(
+                server_name=args.host,
+                server_port=args.port,
+                share=False,
+                show_error=True
+            )
+        else:
+            logger.error("Failed to create Gradio interface")
+            sys.exit(1)
+    else:
+        logger.info(f"🚖 Starting Gene Analysis Pipeline on {args.host}:{args.port}")
+        logger.info("📖 API Documentation: http://localhost:7860/docs")
+        logger.info("🧬 Gradio Interface: http://localhost:7860/gradio")
+        try:
+            uvicorn.run(
+                "app:app" if args.reload else app,
+                host=args.host,
+                port=args.port,
+                reload=args.reload,
+                log_level="info"
+            )
+        except KeyboardInterrupt:
+            logger.info("🛑 Application stopped by user")
+        except Exception as e:
+            logger.error(f"❌ Application failed: {e}")
+            sys.exit(1)