Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 13, 2025

Commit

7537055

verified ·

1 Parent(s): d5239ac

Update app.py

Browse files

Files changed (1) hide show

app.py +515 -373

app.py CHANGED Viewed

@@ -22,8 +22,6 @@ from Bio.SeqRecord import SeqRecord
 import stat
 import time
 import asyncio
-# FastAPI imports
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
@@ -36,19 +34,19 @@ try:
 except Exception:
     pass
 # --- Enhanced Logging ---
 log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 log_handler = logging.StreamHandler()
 log_handler.setFormatter(log_formatter)
-# File handler with error handling
 try:
     file_handler = logging.FileHandler('/tmp/app.log')
     file_handler.setFormatter(log_formatter)
     logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
 except Exception:
     logging.basicConfig(level=logging.INFO, handlers=[log_handler])
 logger = logging.getLogger(__name__)
 # --- Global Variables ---
@@ -60,21 +58,13 @@ ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
 TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
 QUERY_OUTPUT_DIR = os.path.join("/tmp", "queries")
 os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
 # --- Model Configuration ---
-boundary_model_repo = "GGproject10/best_boundary_aware_model"
-other_models_repo = "GGproject10/simplified_tree_AI"
-# Try multiple CSV locations
-csv_candidates = [
-    os.path.join(BASE_DIR, "f_cleaned.csv"),
-    os.path.join(BASE_DIR, "f cleaned.csv"),
-    "f_cleaned.csv",
-    os.path.join(BASE_DIR, "data", "f_cleaned.csv"),
-    os.path.join(MODELS_DIR, "f_cleaned.csv")
-]
-hf_token = os.getenv("HF_TOKEN")
 # Initialize models as None
 boundary_model = None
@@ -82,44 +72,35 @@ keras_model = None
 kmer_to_index = None
 analyzer = None
-# --- Create directories ---
-os.makedirs(MODELS_DIR, exist_ok=True)
-os.makedirs("/tmp/hf_cache", exist_ok=True)
-# --- Enhanced Model Loading ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
     logger.info(f"🔍 Looking for models in: {MODELS_DIR}")
     logger.info(f"📁 Models directory exists: {os.path.exists(MODELS_DIR)}")
-    logger.info(f"🔑 HF_TOKEN available: {hf_token is not None}")
     # Load Boundary Model
     try:
         local_boundary_path = os.path.join(MODELS_DIR, "best_boundary_aware_model.pth")
         if os.path.exists(local_boundary_path):
             logger.info(f"✅ Loading boundary model from local: {local_boundary_path}")
             boundary_model = EnhancedGenePredictor(local_boundary_path)
             logger.info("✅ Boundary model loaded successfully")
-        elif hf_token:
-            logger.info("🌐 Downloading boundary model from HF...")
-            try:
-                boundary_path = hf_hub_download(
-                    repo_id=boundary_model_repo,
-                    filename="best_boundary_aware_model.pth",
-                    token=hf_token,
-                    cache_dir="/tmp/hf_cache",
-                    local_dir=MODELS_DIR,
-                    local_dir_use_symlinks=False
-                )
-                if os.path.exists(boundary_path):
-                    boundary_model = EnhancedGenePredictor(boundary_path)
-                    logger.info("✅ Boundary model downloaded and loaded")
-                else:
-                    logger.warning("❌ Boundary model download failed")
-            except Exception as e:
-                logger.error(f"❌ HF download failed: {e}")
         else:
             logger.warning("❌ No boundary model found and no HF_TOKEN")
     except Exception as e:
@@ -130,43 +111,37 @@ def load_models_safely():
     try:
         local_keras_path = os.path.join(MODELS_DIR, "best_model.keras")
         local_kmer_path = os.path.join(MODELS_DIR, "kmer_to_index.pkl")
         if os.path.exists(local_keras_path) and os.path.exists(local_kmer_path):
-            logger.info(f"��� Loading Keras model from local files")
             keras_model = load_model(local_keras_path)
             with open(local_kmer_path, "rb") as f:
                 kmer_to_index = pickle.load(f)
             logger.info("✅ Keras model loaded successfully")
-        elif hf_token:
-            logger.info("🌐 Downloading Keras model from HF...")
-            try:
-                keras_path = hf_hub_download(
-                    repo_id=other_models_repo,
-                    filename="best_model.keras",
-                    token=hf_token,
-                    cache_dir="/tmp/hf_cache",
-                    local_dir=MODELS_DIR,
-                    local_dir_use_symlinks=False
-                )
-                kmer_path = hf_hub_download(
-                    repo_id=other_models_repo,
-                    filename="kmer_to_index.pkl",
-                    token=hf_token,
-                    cache_dir="/tmp/hf_cache",
-                    local_dir=MODELS_DIR,
-                    local_dir_use_symlinks=False
-                )
-                if os.path.exists(keras_path) and os.path.exists(kmer_path):
-                    keras_model = load_model(keras_path)
-                    with open(kmer_path, "rb") as f:
-                        kmer_to_index = pickle.load(f)
-                    logger.info("✅ Keras model downloaded and loaded")
-                else:
-                    logger.warning("❌ Keras model download failed")
-            except Exception as e:
-                logger.error(f"❌ Keras HF download failed: {e}")
         else:
             logger.warning("❌ No Keras model found and no HF_TOKEN")
     except Exception as e:
@@ -178,12 +153,18 @@ def load_models_safely():
     try:
         logger.info("🌳 Initializing tree analyzer...")
         analyzer = PhylogeneticTreeAnalyzer()
         csv_loaded = False
         for csv_candidate in csv_candidates:
             if os.path.exists(csv_candidate):
                 try:
-                    logger.info(f"📊 Trying CSV: {csv_candidate}")
                     if analyzer.load_data(csv_candidate):
                         logger.info(f"✅ CSV loaded from: {csv_candidate}")
                         csv_loaded = True
@@ -191,29 +172,27 @@ def load_models_safely():
                 except Exception as e:
                     logger.warning(f"CSV load failed for {csv_candidate}: {e}")
                     continue
         if not csv_loaded:
             logger.error("❌ No CSV data loaded")
-            if hf_token:
-                try:
-                    logger.info("🌐 Downloading CSV from HF...")
-                    csv_path = hf_hub_download(
-                        repo_id=other_models_repo,
-                        filename="f_cleaned.csv",
-                        token=hf_token,
-                        cache_dir="/tmp/hf_cache",
-                        local_dir=BASE_DIR,
-                        local_dir_use_symlinks=False
-                    )
-                    if analyzer.load_data(csv_path):
-                        logger.info("✅ CSV downloaded and loaded")
-                        csv_loaded = True
-                except Exception as e:
-                    logger.error(f"❌ CSV HF download failed: {e}")
-            if not csv_loaded:
-                analyzer = None
     except Exception as e:
         logger.error(f"❌ Tree analyzer initialization failed: {e}")
         analyzer = None
@@ -233,187 +212,254 @@ def setup_binary_permissions():
 def check_tool_availability():
     setup_binary_permissions()
-    # Check MAFFT
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
     for candidate in mafft_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                 result = subprocess.run(
-                    [candidate, "--help"],
-                    capture_output=True,
-                    text=True,
                     timeout=5
                 )
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     mafft_available = True
                     mafft_cmd = candidate
-                    logger.info(f"✅ MAFFT found: {candidate}")
                     break
             except Exception as e:
                 logger.debug(f"MAFFT test failed for {candidate}: {e}")
-    # Check IQ-TREE
     iqtree_available = False
     iqtree_cmd = None
-    iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', IQTREE_PATH]
     for candidate in iqtree_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                 result = subprocess.run(
-                    [candidate, "--help"],
-                    capture_output=True,
-                    text=True,
                     timeout=5
                 )
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     iqtree_available = True
                     iqtree_cmd = candidate
-                    logger.info(f"✅ IQ-TREE found: {candidate}")
                     break
             except Exception as e:
                 logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
-# --- Core Pipeline Functions ---
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
-            return "❌ Keras model not available"
         if len(sequence) < 6:
-            return "❌ Sequence too short (<6 bp)"
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
         f_gene_prob = prediction[-1]
         percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"✅ {percentage}% F gene confidence"
     except Exception as e:
         logger.error(f"Keras prediction failed: {e}")
         return f"❌ Error: {str(e)}"
-def run_simple_pipeline(dna_input, similarity_score=95.0):
-    """Simplified pipeline that avoids complex Gradio components"""
     try:
-        if not dna_input or not dna_input.strip():
-            return "❌ Empty input"
         dna_input = dna_input.upper().strip()
-        # Clean sequence
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
         processed_sequence = dna_input
-        results = []
-        # Boundary prediction
         if boundary_model:
             try:
                 result = boundary_model.predict_sequence(dna_input)
                 regions = result['gene_regions']
                 if regions:
                     processed_sequence = regions[0]["sequence"]
-                    results.append(f"✅ F gene region: {len(processed_sequence)} bp")
                 else:
-                    results.append("⚠️ No F gene regions found")
             except Exception as e:
-                results.append(f"❌ Boundary error: {str(e)}")
         else:
-            results.append("⚠️ Boundary model not available")
-        # Keras prediction
-        keras_result = predict_with_keras(processed_sequence)
-        results.append(keras_result)
-        # Tree analysis
-        if analyzer and len(processed_sequence) >= 10:
             try:
-                tree_result = analyze_sequence_simple(processed_sequence, similarity_score)
-                results.append(tree_result)
             except Exception as e:
-                results.append(f"❌ Tree analysis error: {str(e)}")
         else:
-            results.append("❌ Tree analyzer not available" if not analyzer else "❌ Sequence too short")
-        # Summary
-        summary = f"""
 📊 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Input: {len(dna_input)} bp
 F Gene: {len(processed_sequence)} bp
-Results:
-{chr(10).join(f"  - {r}" for r in results)}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 """
-        return summary
     except Exception as e:
-        logger.error(f"Simple pipeline error: {e}")
-        return f"❌ Pipeline Error: {str(e)}"
-def analyze_sequence_simple(sequence: str, matching_percentage: float):
-    """Simplified tree analysis"""
     try:
-        if not analyzer:
-            return "❌ Tree analyzer not available"
-        if not analyzer.find_query_sequence(sequence):
-            return "❌ Sequence not accepted"
-        matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
-        if not matched_ids:
-            return f"❌ No similar sequences at {matching_percentage}% threshold"
-        return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity"
     except Exception as e:
-        logger.error(f"Simple tree analysis failed: {e}")
-        return f"❌ Tree analysis error: {str(e)}"
-def read_fasta_simple(file_obj):
-    """Simplified FASTA reader"""
-    try:
-        if file_obj is None:
-            return ""
-        if hasattr(file_obj, 'name'):
-            with open(file_obj.name, "r") as f:
-                content = f.read()
-        else:
-            content = file_obj.read()
-            if isinstance(content, bytes):
-                content = content.decode("utf-8")
-        lines = content.strip().split("\n")
-        seq_lines = [line.strip() for line in lines if not line.startswith(">")]
-        return ''.join(seq_lines)
-    except Exception as e:
-        logger.error(f"FASTA read failed: {e}")
-        return ""
-# --- FastAPI App Setup ---
-app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
 # --- Pydantic Models ---
 class AnalysisRequest(BaseModel):
     sequence: str
     similarity_score: float = 95.0
 class AnalysisResponse(BaseModel):
-    result: str
     success: bool
     error_message: Optional[str] = None
@@ -427,7 +473,8 @@ async def root():
             "docs": "/docs",
             "health": "/health",
             "gradio": "/gradio",
-            "analyze": "/analyze"
         }
     }
@@ -443,202 +490,297 @@ async def health_check():
                 "tree_analyzer": analyzer is not None,
                 "mafft_available": mafft_available,
                 "iqtree_available": iqtree_available
             }
         }
     except Exception as e:
         return {"status": "unhealthy", "error": str(e)}
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_sequence(request: AnalysisRequest):
     try:
-        result = run_simple_pipeline(request.sequence, request.similarity_score)
-        return AnalysisResponse(result=result, success=True)
     except Exception as e:
-        logger.error(f"API analyze error: {e}")
-        return AnalysisResponse(result="", success=False, error_message=str(e))
-# --- Simplified Gradio Interface ---
-def create_simple_gradio_interface():
-    """Create a simple, robust Gradio interface"""
     try:
-        # Get system status
-        status_info = []
-        status_info.append(f"🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}")
-        status_info.append(f"🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}")
-        status_info.append(f"🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}")
-        mafft_available, iqtree_available, _, _ = check_tool_availability()
-        status_info.append(f"🔬 MAFFT: {'✅ Available' if mafft_available else '❌ Missing'}")
-        status_info.append(f"🔬 IQ-TREE: {'✅ Available' if iqtree_available else '❌ Missing'}")
-        status_text = "\n".join(status_info)
         with gr.Blocks(
             title="🧬 Gene Analysis Pipeline",
-            theme=gr.themes.Default()
-        ) as interface:
             gr.Markdown("# 🧬 Gene Analysis Pipeline")
-            # System status
-            gr.Markdown("## 🔧 System Status")
-            gr.Textbox(
-                value=status_text,
-                label="Component Status",
-                lines=6,
-                interactive=False
-            )
-            # Input section
-            gr.Markdown("## 📝 Input")
-            with gr.Tab("Text Input"):
-                dna_input = gr.Textbox(
-                    label="🧬 DNA Sequence",
-                    placeholder="Enter DNA sequence (ATCG format)...",
-                    lines=5
-                )
-            with gr.Tab("File Upload"):
-                fasta_file = gr.File(
-                    label="📄 Upload FASTA File",
-                    file_types=[".fasta", ".fa", ".txt"]
-                )
-            # Parameters
-            similarity_slider = gr.Slider(
-                minimum=1,
-                maximum=99,
-                value=95,
-                step=1,
-                label="🎯 Similarity Threshold (%)"
-            )
-            # Buttons
             with gr.Row():
-                analyze_text_btn = gr.Button("🔍 Analyze Text", variant="primary")
-                analyze_file_btn = gr.Button("📁 Analyze File", variant="secondary")
-                clear_btn = gr.Button("🗑️ Clear")
-            # Output
-            gr.Markdown("## 📊 Results")
-            output_text = gr.Textbox(
-                label="Analysis Results",
-                lines=15,
-                interactive=False
             )
-            # Event handlers
-            def analyze_text(sequence, similarity):
-                if not sequence or not sequence.strip():
-                    return "❌ Please enter a DNA sequence"
-                return run_simple_pipeline(sequence, similarity)
-            def analyze_file(file_obj, similarity):
-                if file_obj is None:
-                    return "❌ Please upload a file"
-                sequence = read_fasta_simple(file_obj)
-                if not sequence:
-                    return "❌ Failed to read sequence from file"
-                return run_simple_pipeline(sequence, similarity)
-            def clear_all():
-                return "", None, ""
-            # Connect events
-            analyze_text_btn.click(
-                fn=analyze_text,
-                inputs=[dna_input, similarity_slider],
-                outputs=[output_text]
             )
             analyze_file_btn.click(
-                fn=analyze_file,
-                inputs=[fasta_file, similarity_slider],
-                outputs=[output_text]
-            )
-            clear_btn.click(
-                fn=clear_all,
-                outputs=[dna_input, fasta_file, output_text]
             )
-            # Examples
-            gr.Markdown("## 🧪 Examples")
             gr.Examples(
-                examples=[
-                    ["ATGAAACTGCAGCTGAGGTCCCTGGTGGTGAACAAGCTCAGCAGCAAGTGCTGAACTGGATGGGCGAGAAGAGCAACTGCATCCAGTGCAAGCGCCTGAAGAGGAACTGCAAGAAGGTGGTGGACCTGCAGTGC", 95],
-                    ["ATGGAGCTGCAGCTGAGGTCCCTGGTGGTGAACAAGCTCAGCAGCAAGTGCTGAACTGGATGGGCGAGAAGAGCAACTGCATCCAGTGCAAGCGCCTGAAGAGGAACTGCAAGAAGGTGGTGGACCTGCAG", 85]
-                ],
-                inputs=[dna_input, similarity_slider]
             )
-            # Info
-            gr.Markdown("""
-            ## ℹ️ About
-            This tool analyzes DNA sequences for F gene characteristics using:
-            - **Boundary Detection**: ML-based F gene region identification
-            - **Keras Validation**: Neural network sequence validation
-            - **Tree Analysis**: Phylogenetic similarity analysis
-            **Requirements**: Sequences should be in ATCG format, minimum 10 bp recommended.
-            """)
-        return interface
     except Exception as e:
         logger.error(f"Failed to create Gradio interface: {e}")
-        # Ultra-simple fallback interface
-        with gr.Blocks() as fallback:
-            gr.Markdown("# 🧬 Gene Analysis Pipeline (Safe Mode)")
-            gr.Markdown(f"⚠️ Interface error: {str(e)}")
-            sequence_input = gr.Textbox(label="DNA Sequence", lines=3)
-            analyze_btn = gr.Button("Analyze")
-            result_output = gr.Textbox(label="Result", lines=10)
-            analyze_btn.click(
-                fn=lambda seq: run_simple_pipeline(seq, 95.0),
-                inputs=[sequence_input],
-                outputs=[result_output]
-            )
-        return fallback
 # --- Application Startup ---
-if __name__ == "__main__":
     try:
-        # Create simplified Gradio interface
-        gr_interface = create_simple_gradio_interface()
-        # Mount to FastAPI with error handling
         try:
-            gr_app = gr.mount_gradio_app(app, gr_interface, path="/gradio")
         except Exception as e:
-            logger.error(f"Failed to mount Gradio: {e}")
-            # Continue with just FastAPI
-        # Log startup info
-        logger.info("🚀 Starting Gene Analysis Pipeline...")
-        logger.info(f"📁 Base directory: {BASE_DIR}")
-        logger.info(f"🤖 Models: Boundary={boundary_model is not None}, Keras={keras_model is not None}")
-        logger.info(f"🌳 Tree analyzer: {analyzer is not None}")
-        # Start server
-        logger.info("🌐 Server starting on http://0.0.0.0:7860")
-        logger.info("📊 FastAPI docs: http://0.0.0.0:7860/docs")
-        logger.info("🎮 Gradio: http://0.0.0.0:7860/gradio")
-        uvicorn.run(
-            app,
-            host="0.0.0.0",
-            port=7860,
-            log_level="info"
-        )
-    except Exception as e:
-        logger.error(f"❌ Startup failed: {e}")
-        print(f"❌ Application failed to start: {e}")
-        sys.exit(1)

 import stat
 import time
 import asyncio
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 except Exception:
     pass
+# --- FastAPI App Setup ---
+app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
 # --- Enhanced Logging ---
 log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 log_handler = logging.StreamHandler()
 log_handler.setFormatter(log_formatter)
 try:
     file_handler = logging.FileHandler('/tmp/app.log')
     file_handler.setFormatter(log_formatter)
     logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
 except Exception:
     logging.basicConfig(level=logging.INFO, handlers=[log_handler])
 logger = logging.getLogger(__name__)
 # --- Global Variables ---
 TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
 QUERY_OUTPUT_DIR = os.path.join("/tmp", "queries")
 os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
+os.makedirs(MODELS_DIR, exist_ok=True)
+os.makedirs("/tmp/hf_cache", exist_ok=True)
 # --- Model Configuration ---
+BOUNDARY_MODEL_REPO = "GGproject10/best_boundary_aware_model"
+OTHER_MODELS_REPO = "GGproject10/simplified_tree_AI"
+HF_TOKEN = os.getenv("HF_TOKEN")
 # Initialize models as None
 boundary_model = None
 kmer_to_index = None
 analyzer = None
+# --- Model Loading ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
     logger.info(f"🔍 Looking for models in: {MODELS_DIR}")
     logger.info(f"📁 Models directory exists: {os.path.exists(MODELS_DIR)}")
+    logger.info(f"🔑 HF_TOKEN available: {HF_TOKEN is not None}")
     # Load Boundary Model
     try:
         local_boundary_path = os.path.join(MODELS_DIR, "best_boundary_aware_model.pth")
         if os.path.exists(local_boundary_path):
             logger.info(f"✅ Loading boundary model from local: {local_boundary_path}")
             boundary_model = EnhancedGenePredictor(local_boundary_path)
             logger.info("✅ Boundary model loaded successfully")
+        elif HF_TOKEN:
+            logger.info(f"🌐 Downloading boundary model from {BOUNDARY_MODEL_REPO}")
+            boundary_path = hf_hub_download(
+                repo_id=BOUNDARY_MODEL_REPO,
+                filename="best_boundary_aware_model.pth",
+                token=HF_TOKEN,
+                cache_dir="/tmp/hf_cache",
+                local_dir=MODELS_DIR,
+                local_dir_use_symlinks=False
+            )
+            if os.path.exists(boundary_path):
+                boundary_model = EnhancedGenePredictor(boundary_path)
+                logger.info("✅ Boundary model downloaded and loaded")
+            else:
+                logger.warning(f"❌ Boundary model download failed from {BOUNDARY_MODEL_REPO}")
         else:
             logger.warning("❌ No boundary model found and no HF_TOKEN")
     except Exception as e:
     try:
         local_keras_path = os.path.join(MODELS_DIR, "best_model.keras")
         local_kmer_path = os.path.join(MODELS_DIR, "kmer_to_index.pkl")
         if os.path.exists(local_keras_path) and os.path.exists(local_kmer_path):
+            logger.info(f"✅ Loading Keras model from local: {local_keras_path}")
             keras_model = load_model(local_keras_path)
             with open(local_kmer_path, "rb") as f:
                 kmer_to_index = pickle.load(f)
             logger.info("✅ Keras model loaded successfully")
+        elif HF_TOKEN:
+            logger.info(f"🌐 Downloading Keras model from {OTHER_MODELS_REPO}")
+            keras_path = hf_hub_download(
+                repo_id=OTHER_MODELS_REPO,
+                filename="best_model.keras",
+                token=HF_TOKEN,
+                cache_dir="/tmp/hf_cache",
+                local_dir=MODELS_DIR,
+                local_dir_use_symlinks=False
+            )
+            kmer_path = hf_hub_download(
+                repo_id=OTHER_MODELS_REPO,
+                filename="kmer_to_index.pkl",
+                token=HF_TOKEN,
+                cache_dir="/tmp/hf_cache",
+                local_dir=MODELS_DIR,
+                local_dir_use_symlinks=False
+            )
+            if os.path.exists(keras_path) and os.path.exists(kmer_path):
+                keras_model = load_model(keras_path)
+                with open(kmer_path, "rb") as f:
+                    kmer_to_index = pickle.load(f)
+                logger.info("✅ Keras model downloaded and loaded")
+            else:
+                logger.warning(f"❌ Keras model download failed from {OTHER_MODELS_REPO}")
         else:
             logger.warning("❌ No Keras model found and no HF_TOKEN")
     except Exception as e:
     try:
         logger.info("🌳 Initializing tree analyzer...")
         analyzer = PhylogeneticTreeAnalyzer()
+        csv_candidates = [
+            os.path.join(BASE_DIR, "f_cleaned.csv"),
+            os.path.join(BASE_DIR, "f cleaned.csv"),
+            os.path.join(MODELS_DIR, "f_cleaned.csv"),
+            os.path.join(BASE_DIR, "data", "f_cleaned.csv"),
+            "f_cleaned.csv"
+        ]
         csv_loaded = False
         for csv_candidate in csv_candidates:
             if os.path.exists(csv_candidate):
+                logger.info(f"📊 Trying CSV: {csv_candidate}")
                 try:
                     if analyzer.load_data(csv_candidate):
                         logger.info(f"✅ CSV loaded from: {csv_candidate}")
                         csv_loaded = True
                 except Exception as e:
                     logger.warning(f"CSV load failed for {csv_candidate}: {e}")
                     continue
+        if not csv_loaded and HF_TOKEN:
+            logger.info(f"🌐 Downloading CSV from {OTHER_MODELS_REPO}")
+            try:
+                csv_path = hf_hub_download(
+                    repo_id=OTHER_MODELS_REPO,
+                    filename="f_cleaned.csv",
+                    token=HF_TOKEN,
+                    cache_dir="/tmp/hf_cache",
+                    local_dir=BASE_DIR,
+                    local_dir_use_symlinks=False
+                )
+                if os.path.exists(csv_path) and analyzer.load_data(csv_path):
+                    logger.info("✅ CSV downloaded and loaded")
+                    csv_loaded = True
+                else:
+                    logger.warning(f"❌ CSV download failed from {OTHER_MODELS_REPO}")
+            except Exception as e:
+                logger.error(f"❌ CSV HF download failed: {e}")
         if not csv_loaded:
             logger.error("❌ No CSV data loaded")
+            analyzer = None
     except Exception as e:
         logger.error(f"❌ Tree analyzer initialization failed: {e}")
         analyzer = None
 def check_tool_availability():
     setup_binary_permissions()
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
     for candidate in mafft_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                 result = subprocess.run(
+                    [candidate, "--help"],
+                    capture_output=True,
+                    text=True,
                     timeout=5
                 )
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     mafft_available = True
                     mafft_cmd = candidate
+                    logger.info(f"✅ MAFFT found at: {candidate}")
                     break
             except Exception as e:
                 logger.debug(f"MAFFT test failed for {candidate}: {e}")
     iqtree_available = False
     iqtree_cmd = None
+    iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
     for candidate in iqtree_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                 result = subprocess.run(
+                    [candidate, "--help"],
+                    capture_output=True,
+                    text=True,
                     timeout=5
                 )
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     iqtree_available = True
                     iqtree_cmd = candidate
+                    logger.info(f"✅ IQ-TREE found at: {candidate}")
                     break
             except Exception as e:
                 logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
+# --- Pipeline Functions ---
+def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
+    try:
+        if len(sequence.strip()) < 100:
+            return False, "Sequence too short (<100 bp).", None, None
+        query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
+        query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
+        aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
+        output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
+        if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
+            return False, "Reference alignment or tree not found.", None, None
+        query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
+        SeqIO.write([query_record], query_fasta, "fasta")
+        with open(aligned_with_query, "w") as output_file:
+            subprocess.run([
+                mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
+            ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
+        if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
+            return False, "MAFFT alignment failed.", None, None
+        subprocess.run([
+            iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
+            "-m", "GTR+G", "-pre", output_prefix, "-redo"
+        ], capture_output=True, text=True, timeout=1200, check=True)
+        treefile = f"{output_prefix}.treefile"
+        if not os.path.exists(treefile):
+            return False, "IQ-TREE placement failed.", aligned_with_query, None
+        success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
+        return True, success_msg, aligned_with_query, treefile
+    except Exception as e:
+        logger.error(f"Phylogenetic placement failed: {e}")
+        return False, f"Error: {str(e)}", None, None
+    finally:
+        if 'query_fasta' in locals() and os.path.exists(query_fasta):
+            try:
+                os.unlink(query_fasta)
+            except:
+                pass
+def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
+    try:
+        if not analyzer:
+            return "❌ Tree analyzer not initialized.", None, None
+        if not sequence or len(sequence.strip()) < 10:
+            return "❌ Invalid sequence.", None, None
+        if not (1 <= matching_percentage <= 99):
+            return "❌ Matching percentage must be 1-99.", None, None
+        if not analyzer.find_query_sequence(sequence):
+            return "❌ Sequence not accepted.", None, None
+        matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
+        if not matched_ids:
+            return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
+        analyzer.build_tree_structure_with_ml_safe(matched_ids)
+        fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
+        query_id = analyzer.query_id or f"query_{int(time.time())}"
+        tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
+        fig.write_html(tree_html_path)
+        analyzer.matching_percentage = matching_percentage
+        report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
+        report_html_path = os.path.join("/tmp", f'detailed_report_{query_id}.html') if report_success else None
+        return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
+    except Exception as e:
+        logger.error(f"Tree analysis failed: {e}")
+        return f"❌ Error: {str(e)}", None, None
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
+            return "❌ Keras model not available."
         if len(sequence) < 6:
+            return "❌ Sequence too short (<6 bp)."
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
         f_gene_prob = prediction[-1]
         percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"✅ {percentage}% F gene confidence"
     except Exception as e:
         logger.error(f"Keras prediction failed: {e}")
         return f"❌ Error: {str(e)}"
+def read_fasta_file(file_obj):
+    try:
+        if file_obj is None:
+            return ""
+        if isinstance(file_obj, str):
+            with open(file_obj, "r") as f:
+                content = f.read()
+        else:
+            content = file_obj.read().decode("utf-8")
+        lines = content.strip().split("\n")
+        seq_lines = [line.strip() for line in lines if not line.startswith(">")]
+        return ''.join(seq_lines)
+    except Exception as e:
+        logger.error(f"Failed to read FASTA file: {e}")
+        return ""
+def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
         dna_input = dna_input.upper().strip()
+        if not dna_input:
+            return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input"
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
         processed_sequence = dna_input
+        boundary_output = ""
         if boundary_model:
             try:
                 result = boundary_model.predict_sequence(dna_input)
                 regions = result['gene_regions']
                 if regions:
                     processed_sequence = regions[0]["sequence"]
+                    boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
+                else:
+                    boundary_output = "⚠️ No F gene regions found."
+                    processed_sequence = dna_input
+            except Exception as e:
+                boundary_output = f"❌ Boundary prediction error: {str(e)}"
+                processed_sequence = dna_input
+        else:
+            boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
+        keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
+        aligned_file = None
+        phy_file = None
+        ml_tree_output = ""
+        if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
+            try:
+                mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
+                if mafft_available and iqtree_available:
+                    ml_success, ml_message, ml_aligned, ml_tree = phylogenetic_placement(processed_sequence, mafft_cmd, iqtree_cmd)
+                    ml_tree_output = ml_message
+                    aligned_file = ml_aligned
+                    phy_file = ml_tree
                 else:
+                    ml_tree_output = "❌ MAFFT or IQ-TREE not available"
             except Exception as e:
+                ml_tree_output = f"❌ ML tree error: {str(e)}"
+        elif build_ml_tree:
+            ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
         else:
+            ml_tree_output = "⚠️ Phylogenetic placement skipped."
+        tree_html_content = "No tree generated."
+        report_html_content = "No report generated."
+        simplified_ml_output = ""
+        if analyzer and processed_sequence and len(processed_sequence) >= 10:
             try:
+                tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
+                simplified_ml_output = tree_result
+                if tree_html_path and os.path.exists(tree_html_path):
+                    with open(tree_html_path, 'r', encoding='utf-8') as f:
+                        tree_html_content = f.read()
+                else:
+                    tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
+                if report_html_path and os.path.exists(report_html_path):
+                    with open(report_html_path, 'r', encoding='utf-8') as f:
+                        report_html_content = f.read()
+                else:
+                    report_html_content = f"<div style='color: red;'>{tree_result}</div>"
             except Exception as e:
+                simplified_ml_output = f"❌ Tree analysis error: {str(e)}"
+                tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
+                report_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
         else:
+            simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
+            tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
+            report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
+        summary_output = f"""
 📊 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Input: {len(dna_input)} bp
 F Gene: {len(processed_sequence)} bp
+Validation: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
+Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skipped' in ml_tree_output else '❌ Failed'}
+Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 """
+        return (
+            boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
+            aligned_file, phy_file, None, None, tree_html_content, report_html_content
+        )
     except Exception as e:
+        logger.error(f"Pipeline error: {e}")
+        error_msg = f"❌ Pipeline Error: {str(e)}"
+        return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
+async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
     try:
+        dna_input = read_fasta_file(fasta_file_obj)
+        if not dna_input:
+            return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input"
+        return run_pipeline(dna_input, similarity_score, build_ml_tree)
     except Exception as e:
+        logger.error(f"Pipeline from file error: {e}")
+        error_msg = f"❌ Error: {str(e)}"
+        return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
 # --- Pydantic Models ---
 class AnalysisRequest(BaseModel):
     sequence: str
     similarity_score: float = 95.0
+    build_ml_tree: bool = False
 class AnalysisResponse(BaseModel):
+    boundary_output: str
+    keras_output: str
+    ml_tree_output: str
+    tree_analysis_output: str
+    summary_output: str
     success: bool
     error_message: Optional[str] = None
             "docs": "/docs",
             "health": "/health",
             "gradio": "/gradio",
+            "analyze": "/analyze",
+            "analyze_file": "/analyze-file"
         }
     }
                 "tree_analyzer": analyzer is not None,
                 "mafft_available": mafft_available,
                 "iqtree_available": iqtree_available
+            },
+            "paths": {
+                "base_dir": BASE_DIR,
+                "models_dir": MODELS_DIR,
+                "hf_cache": "/tmp/hf_cache",
+                "models_dir_exists": os.path.exists(MODELS_DIR),
+                "hf_cache_exists": os.path.exists("/tmp/hf_cache")
             }
         }
     except Exception as e:
+        logger.error(f"Health check error: {e}")
         return {"status": "unhealthy", "error": str(e)}
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_sequence(request: AnalysisRequest):
     try:
+        result = run_pipeline(request.sequence, request.similarity_score, request.build_ml_tree)
+        return AnalysisResponse(
+            boundary_output=result[0] or "",
+            keras_output=result[1] or "",
+            ml_tree_output=result[2] or "",
+            tree_analysis_output=result[3] or "",
+            summary_output=result[4] or "",
+            success=True
+        )
     except Exception as e:
+        logger.error(f"Analyze error: {e}")
+        return AnalysisResponse(
+            boundary_output="", keras_output="", ml_tree_output="",
+            tree_analysis_output="", summary_output="",
+            success=False, error_message=str(e)
+        )
+@app.post("/analyze-file")
+async def analyze_file(
+    file: UploadFile = File(...),
+    similarity_score: float = Form(95.0),
+    build_ml_tree: bool = Form(False)
+):
+    temp_file_path = None
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
+            content = await file.read()
+            temp_file.write(content)
+            temp_file_path = temp_file.name
+        result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
+        return AnalysisResponse(
+            boundary_output=result[0] or "",
+            keras_output=result[1] or "",
+            ml_tree_output=result[2] or "",
+            tree_analysis_output=result[3] or "",
+            summary_output=result[4] or "",
+            success=True
+        )
+    except Exception as e:
+        logger.error(f"Analyze-file error: {e}")
+        return AnalysisResponse(
+            boundary_output="", keras_output="", ml_tree_output="",
+            tree_analysis_output="", summary_output="",
+            success=False, error_message=str(e)
+        )
+    finally:
+        if temp_file_path and os.path.exists(temp_file_path):
+            try:
+                os.unlink(temp_file_path)
+            except:
+                pass
+# --- Gradio Interface ---
+def create_gradio_interface():
     try:
         with gr.Blocks(
             title="🧬 Gene Analysis Pipeline",
+            theme=gr.themes.Soft(),
+            css="""
+            .gradio-container { max-width: 1200px !important; }
+            .status-box { padding: 10px; border-radius: 5px; margin: 5px 0; }
+            .success { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
+            .warning { background-color: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
+            .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
+            """
+        ) as iface:
             gr.Markdown("# 🧬 Gene Analysis Pipeline")
             with gr.Row():
+                with gr.Column():
+                    status_display = gr.HTML(value=f"""
+                    <div class="status-box">
+                        <h3>🔧 System Status</h3>
+                        <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
+                        <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
+                        <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}</p>
+                        <p>🧬 MAFFT: {'✅ Available' if check_tool_availability()[0] else '❌ Missing'}</p>
+                        <p>🌲 IQ-TREE: {'✅ Available' if check_tool_availability()[1] else '❌ Missing'}</p>
+                    </div>
+                    """)
+            with gr.Tabs() as tabs:
+                with gr.TabItem("📝 Text Input"):
+                    with gr.Row():
+                        with gr.Column(scale=2):
+                            dna_input = gr.Textbox(
+                                label="🧬 DNA Sequence",
+                                placeholder="Enter DNA sequence (ATCG format)...",
+                                lines=5,
+                                info="Paste your DNA sequence here"
+                            )
+                        with gr.Column(scale=1):
+                            similarity_score = gr.Slider(
+                                minimum=1,
+                                maximum=99,
+                                value=95.0,
+                                step=1.0,
+                                label="🎯 Similarity Threshold (%)",
+                                info="Minimum similarity for tree analysis"
+                            )
+                            build_ml_tree = gr.Checkbox(
+                                label="🌲 Build ML Tree",
+                                value=False,
+                                info="Generate phylogenetic placement (slower)"
+                            )
+                            analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
+                with gr.TabItem("📁 File Upload"):
+                    with gr.Row():
+                        with gr.Column(scale=2):
+                            file_input = gr.File(
+                                label="📄 Upload FASTA File",
+                                file_types=[".fasta", ".fa", ".fas", ".txt"],
+                                info="Upload a FASTA file containing your sequence"
+                            )
+                        with gr.Column(scale=1):
+                            file_similarity_score = gr.Slider(
+                                minimum=1,
+                                maximum=99,
+                                value=95.0,
+                                step=1.0,
+                                label="🎯 Similarity Threshold (%)"
+                            )
+                            file_build_ml_tree = gr.Checkbox(
+                                label="🌲 Build ML Tree",
+                                value=False
+                            )
+                            analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
+            gr.Markdown("## 📊 Analysis Results")
+            with gr.Row():
+                with gr.Column():
+                    boundary_output = gr.Textbox(
+                        label="🎯 Boundary Detection",
+                        interactive=False,
+                        lines=2
+                    )
+                    keras_output = gr.Textbox(
+                        label="🧠 F Gene Validation",
+                        interactive=False,
+                        lines=2
+                    )
+                with gr.Column():
+                    ml_tree_output = gr.Textbox(
+                        label="🌲 Phylogenetic Placement",
+                        interactive=False,
+                        lines=2
+                    )
+                    tree_analysis_output = gr.Textbox(
+                        label="🌳 Tree Analysis",
+                        interactive=False,
+                        lines=2
+                    )
+            summary_output = gr.Textbox(
+                label="📋 Summary",
+                interactive=False,
+                lines=8
             )
+            with gr.Row():
+                aligned_file = gr.File(label="📄 Alignment File", visible=False)
+                tree_file = gr.File(label="🌲 Tree File", visible=False)
+            with gr.Tabs():
+                with gr.TabItem("🌳 Interactive Tree"):
+                    tree_html = gr.HTML(
+                        label="Phylogenetic Tree",
+                        value="<div style='text-align: center; padding: 20px; color: #666;'>No tree generated yet.</div>"
+                    )
+                with gr.TabItem("📊 Detailed Report"):
+                    report_html = gr.HTML(
+                        label="Analysis Report",
+                        value="<div style='text-align: center; padding: 20px; color: #666;'>No report generated yet.</div>"
+                    )
+            analyze_btn.click(
+                fn=run_pipeline,
+                inputs=[dna_input, similarity_score, build_ml_tree],
+                outputs=[
+                    boundary_output, keras_output, ml_tree_output,
+                    tree_analysis_output, summary_output,
+                    aligned_file, tree_file, gr.State(), gr.State(),
+                    tree_html, report_html
+                ]
             )
             analyze_file_btn.click(
+                fn=run_pipeline_from_file,
+                inputs=[file_input, file_similarity_score, file_build_ml_tree],
+                outputs=[
+                    boundary_output, keras_output, ml_tree_output,
+                    tree_analysis_output, summary_output,
+                    aligned_file, tree_file, gr.State(), gr.State(),
+                    tree_html, report_html
+                ]
             )
+            gr.Markdown("## 🔬 Example Sequences")
+            example_sequences = [
+                ["ATGGACTTCCAAATTAACAACCTCAACAACCTCAACAACATCAACAACATCAACAACATCAACAACATCAACAAC", 90.0, False],
+                ["ATGAAACAAATTAACAACCTCAACAACCTCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAAC", 85.0, True]
+            ]
             gr.Examples(
+                examples=example_sequences,
+                inputs=[dna_input, similarity_score, build_ml_tree],
+                label="Click to load example sequences"
             )
+            with gr.Accordion("❓ Help & Information", open=False):
+                gr.Markdown("""
+                ### 🧬 Gene Analysis Pipeline
+                This tool performs comprehensive analysis of F gene sequences:
+                **🎯 Boundary Detection**: Identifies F gene regions within your sequence
+                **🧠 F Gene Validation**: Validates sequence as F gene using deep learning
+                **🌲 Phylogenetic Placement**: Places sequence in reference phylogeny
+                **🌳 Tree Analysis**: Finds similar sequences and builds interactive trees
+                ### 📋 Input Requirements
+                - DNA sequences in ATCG format
+                - Minimum 10 bp for basic analysis
+                - Minimum 100 bp for phylogenetic placement
+                - FASTA files supported for upload
+                ### ⚙️ Parameters
+                - **Similarity Threshold**: Minimum % similarity for tree analysis (1-99%)
+                - **Build ML Tree**: Enable phylogenetic placement (requires MAFFT/IQ-TREE)
+                ### 📊 Output Files
+                - Alignment files (.fa format)
+                - Tree files (.treefile format)
+                - Interactive HTML visualizations
+                """)
+        return iface
     except Exception as e:
         logger.error(f"Failed to create Gradio interface: {e}")
+        return None
 # --- Application Startup ---
+def mount_gradio_app():
     try:
+        gradio_app = create_gradio_interface()
+        if gradio_app:
+            app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
+            logger.info("✅ Gradio interface mounted at /gradio")
+        else:
+            logger.error("❌ Failed to create Gradio interface")
+    except Exception as e:
+        logger.error(f"❌ Failed to mount Gradio app: {e}")
+# Initialize Gradio
+mount_gradio_app()
+# --- Main Application ---
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="🧬 Gene Analysis Pipeline")
+    parser.add_argument("--host", default="0.0.0.0", help="Host address")
+    parser.add_argument("--port", type=int, default=7860, help="Port number")
+    parser.add_argument("--reload", action="store_true", help="Enable auto-reload")
+    parser.add_argument("--gradio-only", action="store_true", help="Run Gradio interface only")
+    args = parser.parse_args()
+    if args.gradio_only:
+        logger.info("🚀 Starting Gradio interface only...")
+        iface = create_gradio_interface()
+        if iface:
+            iface.launch(
+                server_name=args.host,
+                server_port=args.port,
+                share=False,
+                show_error=True
+            )
+        else:
+            logger.error("❌ Failed to create Gradio interface")
+            sys.exit(1)
+    else:
+        logger.info(f"🚀 Starting Gene Analysis Pipeline on {args.host}:{args.port}")
+        logger.info("📊 API Documentation: http://localhost:7860/docs")
+        logger.info("🧬 Gradio Interface: http://localhost:7860/gradio")
         try:
+            uvicorn.run(
+                "app:app" if args.reload else app,
+                host=args.host,
+                port=args.port,
+                reload=args.reload,
+                log_level="info"
+            )
+        except KeyboardInterrupt:
+            logger.info("🛑 Application stopped by user")
         except Exception as e:
+            logger.error(f"❌ Application failed: {e}")
+            sys.exit(1)