Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 13, 2025

Commit

5ff3d5b

verified ·

1 Parent(s): 664ad2e

Update app.py

Browse files

Files changed (1) hide show

app.py +415 -227

app.py CHANGED Viewed

@@ -22,6 +22,8 @@ from Bio.SeqRecord import SeqRecord
 import stat
 import time
 import asyncio
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
@@ -41,16 +43,20 @@ app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
 log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 log_handler = logging.StreamHandler()
 log_handler.setFormatter(log_formatter)
 try:
     file_handler = logging.FileHandler('/tmp/app.log')
     file_handler.setFormatter(log_formatter)
     logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
 except Exception:
     logging.basicConfig(level=logging.INFO, handlers=[log_handler])
 logger = logging.getLogger(__name__)
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 MAFFT_PATH = shutil.which("mafft") or os.path.join(BASE_DIR, "binaries", "mafft", "mafft")
 IQTREE_PATH = shutil.which("iqtree") or os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
 ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
@@ -58,10 +64,11 @@ TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
 QUERY_OUTPUT_DIR = os.path.join("/tmp", "queries")
 os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
-# --- Hugging Face Repositories ---
-MODEL_REPO = "GGproject10/best_boundary_aware_model"
-DATA_REPO = "GGproject10/simplified_tree_AI"
-HF_TOKEN = os.getenv("HF_TOKEN")
 # Initialize models as None
 boundary_model = None
@@ -69,58 +76,84 @@ keras_model = None
 kmer_to_index = None
 analyzer = None
-# --- Model Loading ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
-    logger.info("🔍 Loading models and data from Hugging Face repositories")
-    if not HF_TOKEN:
-        logger.error("❌ HF_TOKEN environment variable not set")
-        return
-    # Load Boundary Model
     try:
-        logger.info(f"🌐 Downloading boundary model from {MODEL_REPO}")
-        boundary_path = hf_hub_download(
-            repo_id=MODEL_REPO,
-            filename="best_boundary_aware_model.pth",
-            token=HF_TOKEN,
-            cache_dir="/tmp/hf_cache"
-        )
-        if os.path.exists(boundary_path):
-            logger.info(f"✅ Boundary model downloaded to: {boundary_path}")
-            boundary_model = EnhancedGenePredictor(boundary_path)
-            logger.info("✅ Boundary model loaded successfully")
         else:
-            logger.warning(f"❌ Boundary model not found at: {boundary_path}")
     except Exception as e:
         logger.error(f"❌ Failed to load boundary model: {e}")
         boundary_model = None
-    # Load Keras Model
     try:
-        logger.info(f"🌐 Downloading Keras model and kmer index from {MODEL_REPO}")
-        keras_path = hf_hub_download(
-            repo_id=MODEL_REPO,
-            filename="best_model.keras",
-            token=HF_TOKEN,
-            cache_dir="/tmp/hf_cache"
-        )
-        kmer_path = hf_hub_download(
-            repo_id=MODEL_REPO,
-            filename="kmer_to_index.pkl",
-            token=HF_TOKEN,
-            cache_dir="/tmp/hf_cache"
-        )
-        if os.path.exists(keras_path) and os.path.exists(kmer_path):
-            logger.info(f"✅ Keras model downloaded to: {keras_path}")
-            logger.info(f"✅ Kmer index downloaded to: {kmer_path}")
-            keras_model = load_model(keras_path)
-            with open(kmer_path, "rb") as f:
                 kmer_to_index = pickle.load(f)
-            logger.info("✅ Keras model loaded successfully")
         else:
-            logger.warning(f"❌ Keras model files not found: keras={os.path.exists(keras_path)}, kmer={os.path.exists(kmer_path)}")
     except Exception as e:
         logger.error(f"❌ Failed to load Keras model: {e}")
         keras_model = None
@@ -130,22 +163,38 @@ def load_models_safely():
     try:
         logger.info("🌳 Initializing tree analyzer...")
         analyzer = PhylogeneticTreeAnalyzer()
-        logger.info(f"🌐 Downloading CSV from {DATA_REPO}")
-        csv_path = hf_hub_download(
-            repo_id=DATA_REPO,
-            filename="f_cleaned.csv",
-            token=HF_TOKEN,
-            cache_dir="/tmp/hf_cache"
-        )
-        if os.path.exists(csv_path):
-            logger.info(f"📊 CSV downloaded to: {csv_path}")
-            if analyzer.load_data(csv_path):
-                logger.info(f"✅ Tree analyzer loaded CSV successfully")
-            else:
-                logger.error("❌ Failed to load CSV data")
-                analyzer = None
-        else:
-            logger.warning(f"❌ CSV not found at: {csv_path}")
             analyzer = None
     except Exception as e:
         logger.error(f"❌ Failed to initialize tree analyzer: {e}")
@@ -166,9 +215,12 @@ def setup_binary_permissions():
 def check_tool_availability():
     setup_binary_permissions()
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
     for candidate in mafft_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
@@ -185,9 +237,12 @@ def check_tool_availability():
                     break
             except Exception as e:
                 logger.debug(f"MAFFT test failed for {candidate}: {e}")
     iqtree_available = False
     iqtree_cmd = None
     iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
     for candidate in iqtree_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
@@ -204,36 +259,46 @@ def check_tool_availability():
                     break
             except Exception as e:
                 logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
-# --- Pipeline Functions ---
 def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
     try:
         if len(sequence.strip()) < 100:
             return False, "Sequence too short (<100 bp).", None, None
         query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
         query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
         aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
         output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
         if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
             return False, "Reference alignment or tree not found.", None, None
         query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
         SeqIO.write([query_record], query_fasta, "fasta")
         with open(aligned_with_query, "w") as output_file:
             subprocess.run([
                 mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
             ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
         if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
             return False, "MAFFT alignment failed.", None, None
         subprocess.run([
             iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
             "-m", "GTR+G", "-pre", output_prefix, "-redo"
         ], capture_output=True, text=True, timeout=1200, check=True)
         treefile = f"{output_prefix}.treefile"
         if not os.path.exists(treefile):
             return False, "IQ-TREE placement failed.", aligned_with_query, None
         success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
         return True, success_msg, aligned_with_query, treefile
     except Exception as e:
         logger.error(f"Phylogenetic placement failed: {e}")
         return False, f"Error: {str(e)}", None, None
@@ -248,14 +313,18 @@ def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
             return "❌ Keras model not available."
         if len(sequence) < 6:
             return "❌ Sequence too short (<6 bp)."
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
         f_gene_prob = prediction[-1]
         percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"✅ {percentage}% F gene confidence"
     except Exception as e:
         logger.error(f"Keras prediction failed: {e}")
@@ -266,9 +335,14 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
         dna_input = dna_input.upper().strip()
         if not dna_input:
             return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input"
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
         processed_sequence = dna_input
         boundary_output = ""
         if boundary_model:
             try:
@@ -285,10 +359,15 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
                 processed_sequence = dna_input
         else:
             boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
         keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
         aligned_file = None
         phy_file = None
         ml_tree_output = ""
         if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
             try:
                 mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
@@ -305,23 +384,29 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
             ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
         else:
             ml_tree_output = "⚠️ Phylogenetic placement skipped."
         tree_html_content = "No tree generated."
         report_html_content = "No report generated."
         simplified_ml_output = ""
         if analyzer and processed_sequence and len(processed_sequence) >= 10:
             try:
                 tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
                 simplified_ml_output = tree_result
                 if tree_html_path and os.path.exists(tree_html_path):
                     with open(tree_html_path, 'r', encoding='utf-8') as f:
                         tree_html_content = f.read()
                 else:
                     tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
                 if report_html_path and os.path.exists(report_html_path):
                     with open(report_html_path, 'r', encoding='utf-8') as f:
                         report_html_content = f.read()
                 else:
                     report_html_content = f"<div style='color: red;'>{tree_result}</div>"
             except Exception as e:
                 simplified_ml_output = f"❌ Tree analysis error: {str(e)}"
                 tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
@@ -330,6 +415,8 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
             simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
             tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
             report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
         summary_output = f"""
 📊 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
@@ -340,37 +427,49 @@ Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skippe
 Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 """
         return (
             boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
             aligned_file, phy_file, None, None, tree_html_content, report_html_content
         )
     except Exception as e:
         logger.error(f"Pipeline error: {e}")
         error_msg = f"❌ Pipeline Error: {str(e)}"
         return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
 def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
     try:
         if not analyzer:
             return "❌ Tree analyzer not initialized.", None, None
         if not sequence or len(sequence.strip()) < 10:
             return "❌ Invalid sequence.", None, None
         if not (1 <= matching_percentage <= 99):
             return "❌ Matching percentage must be 1-99.", None, None
         if not analyzer.find_query_sequence(sequence):
             return "❌ Sequence not accepted.", None, None
         matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
         if not matched_ids:
             return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
         analyzer.build_tree_structure_with_ml_safe(matched_ids)
         fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
         query_id = analyzer.query_id or f"query_{int(time.time())}"
         tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
         fig.write_html(tree_html_path)
         analyzer.matching_percentage = matching_percentage
         report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
         report_html_path = os.path.join("/tmp", f"detailed_report_{query_id}.html") if report_success else None
         return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
     except Exception as e:
         logger.error(f"Tree analysis failed: {e}")
         return f"❌ Error: {str(e)}", None, None
@@ -379,14 +478,17 @@ def read_fasta_file(file_obj):
     try:
         if file_obj is None:
             return ""
         if isinstance(file_obj, str):
             with open(file_obj, "r") as f:
                 content = f.read()
         else:
             content = file_obj.read().decode("utf-8")
         lines = content.strip().split("\n")
         seq_lines = [line.strip() for line in lines if not line.startswith(">")]
         return ''.join(seq_lines)
     except Exception as e:
         logger.error(f"Failed to read FASTA file: {e}")
         return ""
@@ -447,11 +549,17 @@ async def health_check():
             },
             "paths": {
                 "base_dir": BASE_DIR,
-                "hf_cache": "/tmp/hf_cache",
-                "hf_cache_exists": os.path.exists("/tmp/hf_cache")
             },
             "recommendations": {
-                "models": "Models loaded from Hugging Face" if (boundary_model and keras_model) else "Check HF_TOKEN and repository",
                 "bioinformatics_tools": "Install MAFFT and IQ-TREE" if not (mafft_available and iqtree_available) else "OK"
             }
         }
@@ -491,7 +599,9 @@ async def analyze_file(
             content = await file.read()
             temp_file.write(content)
             temp_file_path = temp_file.name
         result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
         return AnalysisResponse(
             boundary_output=result[0] or "",
             keras_output=result[1] or "",
@@ -514,7 +624,7 @@ async def analyze_file(
             except:
                 pass
-# --- Enhanced Gradio Interface ---
 def create_gradio_interface():
     try:
         with gr.Blocks(
@@ -528,7 +638,10 @@ def create_gradio_interface():
             .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
             """
         ) as iface:
             gr.Markdown("# 🧬 Gene Analysis Pipeline")
             with gr.Row():
                 with gr.Column():
                     status_display = gr.HTML(value=f"""
@@ -536,206 +649,281 @@ def create_gradio_interface():
                         <h3>🔧 System Status</h3>
                         <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
                         <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
-                        <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}</p>
-                        <p>🧬 MAFFT: {'✅ Available' if check_tool_availability()[0] else '❌ Missing'}</p>
-                        <p>🌲 IQ-TREE: {'✅ Available' if check_tool_availability()[1] else '❌ Missing'}</p>
                     """)
-            with gr.Tabs() as tabs:
                 with gr.TabItem("📝 Text Input"):
-                    with gr.Row():
-                        with gr.Column(scale=2):
-                            dna_input = gr.Textbox(
-                                label="🧬 DNA Sequence",
-                                placeholder="Enter DNA sequence (ATCG format)...",
-                                lines=5,
-                                info="Paste your DNA sequence here"
-                            )
-                        with gr.Column(scale=1):
-                            similarity_score = gr.Slider(
-                                minimum=1,
-                                maximum=99,
-                                value=95.0,
-                                step=1.0,
-                                label="🎯 Similarity Threshold (%)",
-                                info="Minimum similarity for tree analysis"
-                            )
-                            build_ml_tree = gr.Checkbox(
-                                label="🌲 Build ML Tree",
-                                value=False,
-                                info="Generate phylogenetic placement (slower)"
-                            )
-                            analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
                 with gr.TabItem("📁 File Upload"):
-                    with gr.Row():
-                        with gr.Column(scale=2):
-                            file_input = gr.File(
-                                label="📄 Upload FASTA File",
-                                file_types=[".fasta", ".fa", ".fas", ".txt"],
-                                info="Upload a FASTA file containing your sequence"
-                            )
-                        with gr.Column(scale=1):
-                            file_similarity_score = gr.Slider(
-                                minimum=1,
-                                maximum=99,
-                                value=95.0,
-                                step=1.0,
-                                label="🎯 Similarity Threshold (%)"
-                            )
-                            file_build_ml_tree = gr.Checkbox(
-                                label="🌲 Build ML Tree",
-                                value=False
-                            )
-                            analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
-            gr.Markdown("## 📊 Analysis Results")
             with gr.Row():
                 with gr.Column():
                     boundary_output = gr.Textbox(
-                        label="🎯 Boundary Detection",
-                        interactive=False,
-                        lines=2
                     )
                     keras_output = gr.Textbox(
-                        label="🧠 F Gene Validation",
-                        interactive=False,
-                        lines=2
                     )
-                with gr.Column():
                     ml_tree_output = gr.Textbox(
-                        label="🌲 Phylogenetic Placement",
-                        interactive=False,
-                        lines=2
                     )
                     tree_analysis_output = gr.Textbox(
-                        label="🌳 Tree Analysis",
-                        interactive=False,
-                        lines=2
                     )
-            summary_output = gr.Textbox(
-                label="📋 Summary",
-                interactive=False,
-                lines=8
-            )
-            with gr.Row():
-                aligned_file = gr.File(label="📄 Alignment File", visible=False)
-                tree_file = gr.File(label="🌲 Tree File", visible=False)
             with gr.Tabs():
                 with gr.TabItem("🌳 Interactive Tree"):
                     tree_html = gr.HTML(
-                        label="Phylogenetic Tree",
-                        value="<div style='text-align: center; padding: 20px; color: #666;'>No tree generated yet.</div>"
                     )
                 with gr.TabItem("📊 Detailed Report"):
                     report_html = gr.HTML(
                         label="Analysis Report",
-                        value="<div style='text-align: center; padding: 20px; color: #666;'>No report generated yet.</div>"
                     )
-            analyze_btn.click(
                 fn=run_pipeline,
-                inputs=[dna_input, similarity_score, build_ml_tree],
                 outputs=[
-                    boundary_output, keras_output, ml_tree_output,
-                    tree_analysis_output, summary_output,
-                    aligned_file, tree_file, gr.State(), gr.State(),
-                    tree_html, report_html
                 ]
             )
             analyze_file_btn.click(
                 fn=run_pipeline_from_file,
-                inputs=[file_input, file_similarity_score, file_build_ml_tree],
                 outputs=[
-                    boundary_output, keras_output, ml_tree_output,
-                    tree_analysis_output, summary_output,
-                    aligned_file, tree_file, gr.State(), gr.State(),
-                    tree_html, report_html
                 ]
             )
-            gr.Markdown("## 🔬 Example Sequences")
-            example_sequences = [
-                ["ATGGACTTCCAAATTAACAACCTCAACAACCTCAACAACATCAACAACATCAACAACATCAACAACATCAACAAC", 90.0, False],
-                ["ATGAAACAAATTAACAACCTCAACAACCTCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAACATCAACAAC", 85.0, True],
-            ]
             gr.Examples(
-                examples=example_sequences,
-                inputs=[dna_input, similarity_score, build_ml_tree],
                 label="Click to load example sequences"
             )
-            with gr.Accordion("❓ Help & Information", open=False):
-                gr.Markdown("""
-                ### 🧬 Gene Analysis Pipeline
-                This tool performs comprehensive analysis of F gene sequences:
-                **🎯 Boundary Detection**: Identifies F gene regions within your sequence
-                **🧠 F Gene Validation**: Validates sequence as F gene using deep learning
-                **🌲 Phylogenetic Placement**: Places sequence in reference phylogeny
-                **🌳 Tree Analysis**: Finds similar sequences and builds interactive trees
-                ### 📋 Input Requirements
-                - DNA sequences in ATCG format
-                - Minimum 10 bp for basic analysis
-                - Minimum 100 bp for phylogenetic placement
-                - FASTA files supported for upload
-                ### ⚙️ Parameters
-                - **Similarity Threshold**: Minimum % similarity for tree analysis (1-99%)
-                - **Build ML Tree**: Enable phylogenetic placement (requires MAFFT/IQ-TREE)
-                ### 📊 Output Files
-                - Alignment files (.fa format)
-                - Tree files (.treefile format)
-                - Interactive HTML visualizations
-                """)
         return iface
     except Exception as e:
         logger.error(f"Failed to create Gradio interface: {e}")
-        return None
 # --- Application Startup ---
-def mount_gradio_app():
     try:
-        gradio_app = create_gradio_interface()
-        if gradio_app:
-            app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
-            logger.info("✅ Gradio interface mounted at /gradio")
-        else:
-            logger.error("❌ Failed to create Gradio interface")
     except Exception as e:
-        logger.error(f"❌ Failed to mount Gradio app: {e}")
-# Initialize Gradio
-mount_gradio_app()
-# --- Main Application ---
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="🧬 Gene Analysis Pipeline")
-    parser.add_argument("--host", default="0.0.0.0", help="Host address")
-    parser.add_argument("--port", type=int, default=7860, help="Port number")
-    parser.add_argument("--reload", action="store_true", help="Enable auto-reload")
-    parser.add_argument("--gradio-only", action="store_true", help="Run Gradio interface only")
-    args = parser.parse_args()
-    if args.gradio_only:
-        logger.info("🚀 Starting Gradio interface only...")
-        iface = create_gradio_interface()
-        if iface:
-            iface.launch(
-                server_name=args.host,
-                server_port=args.port,
-                share=False,
-                show_error=True
-            )
-        else:
-            logger.error("❌ Failed to create Gradio interface")
-            sys.exit(1)
-    else:
-        logger.info(f"🚀 Starting Gene Analysis Pipeline on {args.host}:{args.port}")
-        logger.info("📊 API Documentation: http://localhost:7860/docs")
-        logger.info("🧬 Gradio Interface: http://localhost:7860/gradio")
-        try:
-            uvicorn.run(
-                "app:app" if args.reload else app,
-                host=args.host,
-                port=args.port,
-                reload=args.reload,
-                log_level="info"
-            )
-        except KeyboardInterrupt:
-            logger.info("🛑 Application stopped by user")
-        except Exception as e:
-            logger.error(f"❌ Application failed: {e}")
-            sys.exit(1)

 import stat
 import time
 import asyncio
+# FastAPI imports
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
 log_handler = logging.StreamHandler()
 log_handler.setFormatter(log_formatter)
+# File handler with error handling
 try:
     file_handler = logging.FileHandler('/tmp/app.log')
     file_handler.setFormatter(log_formatter)
     logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
 except Exception:
     logging.basicConfig(level=logging.INFO, handlers=[log_handler])
 logger = logging.getLogger(__name__)
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+MODELS_DIR = os.path.join(BASE_DIR, "models")  # Local models directory
 MAFFT_PATH = shutil.which("mafft") or os.path.join(BASE_DIR, "binaries", "mafft", "mafft")
 IQTREE_PATH = shutil.which("iqtree") or os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
 ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
 QUERY_OUTPUT_DIR = os.path.join("/tmp", "queries")
 os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
+# --- Corrected Paths ---
+boundary_model_repo = "GGproject10/best_boundary_aware_model"
+other_models_repo = "GGproject10/simplified_tree_AI"
+csv_path = os.path.join(BASE_DIR, "f_cleaned.csv")
+hf_token = os.getenv("HF_TOKEN")
 # Initialize models as None
 boundary_model = None
 kmer_to_index = None
 analyzer = None
+# --- Enhanced Model Loading with Correct Paths ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
+    logger.info(f"🔍 Looking for models in: {MODELS_DIR}")
+    logger.info(f"📁 Models directory exists: {os.path.exists(MODELS_DIR)}")
+    if os.path.exists(MODELS_DIR):
+        logger.info(f"📂 Contents of models directory: {os.listdir(MODELS_DIR)}")
+    # Load Boundary Model - Try local first, then HF from correct repo
     try:
+        # Local model paths
+        local_boundary_path = os.path.join(MODELS_DIR, "best_boundary_aware_model.pth")
+        if os.path.exists(local_boundary_path):
+            logger.info(f"✅ Loading boundary model from local path: {local_boundary_path}")
+            boundary_model = EnhancedGenePredictor(local_boundary_path)
+            logger.info("✅ Boundary model loaded successfully from local directory")
+        elif hf_token:
+            logger.info("🌐 Attempting to load boundary model from Hugging Face...")
+            boundary_path = hf_hub_download(
+                repo_id=boundary_model_repo,  # Correct repo for boundary model
+                filename="best_boundary_aware_model.pth",
+                token=hf_token,
+                cache_dir="/tmp/hf_cache"
+            )
+            if os.path.exists(boundary_path):
+                boundary_model = EnhancedGenePredictor(boundary_path)
+                logger.info("✅ Boundary model loaded successfully from HF")
+            else:
+                logger.warning("❌ Boundary model file not found after HF download")
         else:
+            logger.warning("❌ No local boundary model found and no HF_TOKEN available")
     except Exception as e:
         logger.error(f"❌ Failed to load boundary model: {e}")
         boundary_model = None
+    # Load Keras Model - Try local first, then HF from correct repo
     try:
+        # Local model paths
+        local_keras_path = os.path.join(MODELS_DIR, "best_model.keras")
+        local_kmer_path = os.path.join(MODELS_DIR, "kmer_to_index.pkl")
+        if os.path.exists(local_keras_path) and os.path.exists(local_kmer_path):
+            logger.info(f"✅ Loading Keras model from local paths:")
+            logger.info(f"   - Keras model: {local_keras_path}")
+            logger.info(f"   - K-mer index: {local_kmer_path}")
+            keras_model = load_model(local_keras_path)
+            with open(local_kmer_path, "rb") as f:
                 kmer_to_index = pickle.load(f)
+            logger.info("✅ Keras model loaded successfully from local directory")
+        elif hf_token:
+            logger.info("🌐 Attempting to load Keras model from Hugging Face...")
+            keras_path = hf_hub_download(
+                repo_id=other_models_repo,  # Correct repo for other models
+                filename="best_model.keras",
+                token=hf_token,
+                cache_dir="/tmp/hf_cache"
+            )
+            kmer_path = hf_hub_download(
+                repo_id=other_models_repo,  # Correct repo for other models
+                filename="kmer_to_index.pkl",
+                token=hf_token,
+                cache_dir="/tmp/hf_cache"
+            )
+            if os.path.exists(keras_path) and os.path.exists(kmer_path):
+                keras_model = load_model(keras_path)
+                with open(kmer_path, "rb") as f:
+                    kmer_to_index = pickle.load(f)
+                logger.info("✅ Keras model loaded successfully from HF")
+            else:
+                logger.warning("❌ Keras model files not found after HF download")
         else:
+            logger.warning("❌ No local Keras model found and no HF_TOKEN available")
     except Exception as e:
         logger.error(f"❌ Failed to load Keras model: {e}")
         keras_model = None
     try:
         logger.info("🌳 Initializing tree analyzer...")
         analyzer = PhylogeneticTreeAnalyzer()
+        # Try multiple CSV locations
+        csv_candidates = [
+            csv_path,
+            os.path.join(BASE_DIR, "f cleaned.csv"),
+            "f_cleaned.csv",
+            os.path.join(BASE_DIR, "data", "f_cleaned.csv"),
+            os.path.join(MODELS_DIR, "f_cleaned.csv")  # Also check models directory
+        ]
+        csv_loaded = False
+        for csv_candidate in csv_candidates:
+            if os.path.exists(csv_candidate):
+                try:
+                    logger.info(f"📊 Trying to load CSV from: {csv_candidate}")
+                    if analyzer.load_data(csv_candidate):
+                        logger.info(f"✅ Tree analyzer loaded CSV from: {csv_candidate}")
+                        csv_loaded = True
+                        break
+                except Exception as e:
+                    logger.warning(f"Failed to load CSV from {csv_candidate}: {e}")
+                    continue
+        if not csv_loaded:
+            logger.error("❌ Failed to load CSV data from any location")
+            logger.info("📂 Available files in base directory:")
+            try:
+                for file in os.listdir(BASE_DIR):
+                    if file.endswith('.csv'):
+                        logger.info(f"   - {file}")
+            except:
+                pass
             analyzer = None
     except Exception as e:
         logger.error(f"❌ Failed to initialize tree analyzer: {e}")
 def check_tool_availability():
     setup_binary_permissions()
+    # Check MAFFT
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
     for candidate in mafft_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                     break
             except Exception as e:
                 logger.debug(f"MAFFT test failed for {candidate}: {e}")
+    # Check IQ-TREE
     iqtree_available = False
     iqtree_cmd = None
     iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
     for candidate in iqtree_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
                     break
             except Exception as e:
                 logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
+# --- Pipeline Functions (keeping your original logic) ---
 def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
     try:
         if len(sequence.strip()) < 100:
             return False, "Sequence too short (<100 bp).", None, None
         query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
         query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
         aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
         output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
         if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
             return False, "Reference alignment or tree not found.", None, None
         query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
         SeqIO.write([query_record], query_fasta, "fasta")
         with open(aligned_with_query, "w") as output_file:
             subprocess.run([
                 mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
             ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
         if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
             return False, "MAFFT alignment failed.", None, None
         subprocess.run([
             iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
             "-m", "GTR+G", "-pre", output_prefix, "-redo"
         ], capture_output=True, text=True, timeout=1200, check=True)
         treefile = f"{output_prefix}.treefile"
         if not os.path.exists(treefile):
             return False, "IQ-TREE placement failed.", aligned_with_query, None
         success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
         return True, success_msg, aligned_with_query, treefile
     except Exception as e:
         logger.error(f"Phylogenetic placement failed: {e}")
         return False, f"Error: {str(e)}", None, None
     try:
         if not keras_model or not kmer_to_index:
             return "❌ Keras model not available."
         if len(sequence) < 6:
             return "❌ Sequence too short (<6 bp)."
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
         f_gene_prob = prediction[-1]
         percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"✅ {percentage}% F gene confidence"
     except Exception as e:
         logger.error(f"Keras prediction failed: {e}")
         dna_input = dna_input.upper().strip()
         if not dna_input:
             return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input"
+        # Clean sequence
         if not re.match('^[ACTGN]+$', dna_input):
             dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
         processed_sequence = dna_input
+        # Boundary prediction
         boundary_output = ""
         if boundary_model:
             try:
                 processed_sequence = dna_input
         else:
             boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
+        # Keras prediction
         keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
+        # ML Tree (keeping your original logic)
         aligned_file = None
         phy_file = None
         ml_tree_output = ""
         if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
             try:
                 mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
             ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
         else:
             ml_tree_output = "⚠️ Phylogenetic placement skipped."
+        # Tree analysis
         tree_html_content = "No tree generated."
         report_html_content = "No report generated."
         simplified_ml_output = ""
         if analyzer and processed_sequence and len(processed_sequence) >= 10:
             try:
                 tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
                 simplified_ml_output = tree_result
                 if tree_html_path and os.path.exists(tree_html_path):
                     with open(tree_html_path, 'r', encoding='utf-8') as f:
                         tree_html_content = f.read()
                 else:
                     tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
                 if report_html_path and os.path.exists(report_html_path):
                     with open(report_html_path, 'r', encoding='utf-8') as f:
                         report_html_content = f.read()
                 else:
                     report_html_content = f"<div style='color: red;'>{tree_result}</div>"
             except Exception as e:
                 simplified_ml_output = f"❌ Tree analysis error: {str(e)}"
                 tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
             simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
             tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
             report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
+        # Summary
         summary_output = f"""
 📊 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 """
         return (
             boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
             aligned_file, phy_file, None, None, tree_html_content, report_html_content
         )
     except Exception as e:
         logger.error(f"Pipeline error: {e}")
         error_msg = f"❌ Pipeline Error: {str(e)}"
         return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
+# Keep your other functions (analyze_sequence_for_tree, build_maximum_likelihood_tree, etc.)
 def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
     try:
         if not analyzer:
             return "❌ Tree analyzer not initialized.", None, None
         if not sequence or len(sequence.strip()) < 10:
             return "❌ Invalid sequence.", None, None
         if not (1 <= matching_percentage <= 99):
             return "❌ Matching percentage must be 1-99.", None, None
         if not analyzer.find_query_sequence(sequence):
             return "❌ Sequence not accepted.", None, None
         matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
         if not matched_ids:
             return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
         analyzer.build_tree_structure_with_ml_safe(matched_ids)
         fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
         query_id = analyzer.query_id or f"query_{int(time.time())}"
         tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
         fig.write_html(tree_html_path)
         analyzer.matching_percentage = matching_percentage
         report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
         report_html_path = os.path.join("/tmp", f"detailed_report_{query_id}.html") if report_success else None
         return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
     except Exception as e:
         logger.error(f"Tree analysis failed: {e}")
         return f"❌ Error: {str(e)}", None, None
     try:
         if file_obj is None:
             return ""
         if isinstance(file_obj, str):
             with open(file_obj, "r") as f:
                 content = f.read()
         else:
             content = file_obj.read().decode("utf-8")
         lines = content.strip().split("\n")
         seq_lines = [line.strip() for line in lines if not line.startswith(">")]
         return ''.join(seq_lines)
     except Exception as e:
         logger.error(f"Failed to read FASTA file: {e}")
         return ""
             },
             "paths": {
                 "base_dir": BASE_DIR,
+                "models_dir": MODELS_DIR,
+                "models_dir_exists": os.path.exists(MODELS_DIR),
+                "csv_path": csv_path,
+                "csv_exists": os.path.exists(csv_path)
+            },
+            "model_repos": {
+                "boundary_model": boundary_model_repo,
+                "other_models": other_models_repo
             },
             "recommendations": {
+                "models": "Models loaded from local directory" if (boundary_model and keras_model) else "Check models directory",
                 "bioinformatics_tools": "Install MAFFT and IQ-TREE" if not (mafft_available and iqtree_available) else "OK"
             }
         }
             content = await file.read()
             temp_file.write(content)
             temp_file_path = temp_file.name
         result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
         return AnalysisResponse(
             boundary_output=result[0] or "",
             keras_output=result[1] or "",
             except:
                 pass
+# --- Fixed Gradio Interface ---
 def create_gradio_interface():
     try:
         with gr.Blocks(
             .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
             """
         ) as iface:
             gr.Markdown("# 🧬 Gene Analysis Pipeline")
+            # Status display
             with gr.Row():
                 with gr.Column():
                     status_display = gr.HTML(value=f"""
                         <h3>🔧 System Status</h3>
                         <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
                         <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
+                        <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}
+                        <p>🔬 MAFFT/IQ-TREE: {'✅ Available' if check_tool_availability()[0] and check_tool_availability()[1] else '❌ Missing'}</p>
                     """)
+            # Input tabs
+            with gr.Tabs():
                 with gr.TabItem("📝 Text Input"):
+                    dna_input = gr.Textbox(
+                        label="🧬 DNA Sequence",
+                        placeholder="Enter DNA sequence (ATCG format)...",
+                        lines=5,
+                        max_lines=10
+                    )
                 with gr.TabItem("📁 File Upload"):
+                    fasta_file = gr.File(
+                        label="📄 Upload FASTA File",
+                        file_types=[".fasta", ".fa", ".txt"],
+                        file_count="single"
+                    )
+            # Analysis options
             with gr.Row():
                 with gr.Column():
+                    similarity_slider = gr.Slider(
+                        minimum=1,
+                        maximum=99,
+                        value=95,
+                        step=1,
+                        label="🎯 Similarity Threshold (%)",
+                        info="Minimum similarity for phylogenetic analysis"
+                    )
+                with gr.Column():
+                    ml_tree_checkbox = gr.Checkbox(
+                        label="🌲 Build ML Tree",
+                        value=False,
+                        info="Perform phylogenetic placement (slower)"
+                    )
+            # Action buttons
+            with gr.Row():
+                analyze_text_btn = gr.Button("🔍 Analyze Text", variant="primary", size="lg")
+                analyze_file_btn = gr.Button("📁 Analyze File", variant="secondary", size="lg")
+                clear_btn = gr.Button("🗑️ Clear", variant="stop")
+            # Results section
+            gr.Markdown("## 📊 Analysis Results")
+            with gr.Tabs():
+                with gr.TabItem("🎯 Boundary Prediction"):
                     boundary_output = gr.Textbox(
+                        label="🔍 F Gene Boundary Detection",
+                        lines=3,
+                        interactive=False
                     )
+                with gr.TabItem("🧠 Keras Validation"):
                     keras_output = gr.Textbox(
+                        label="🤖 Neural Network Validation",
+                        lines=3,
+                        interactive=False
                     )
+                with gr.TabItem("🌲 ML Tree Placement"):
                     ml_tree_output = gr.Textbox(
+                        label="🌳 Maximum Likelihood Tree",
+                        lines=5,
+                        interactive=False
                     )
+                with gr.TabItem("📈 Tree Analysis"):
                     tree_analysis_output = gr.Textbox(
+                        label="📊 Phylogenetic Analysis",
+                        lines=5,
+                        interactive=False
                     )
+                with gr.TabItem("📋 Summary"):
+                    summary_output = gr.Textbox(
+                        label="📝 Analysis Summary",
+                        lines=10,
+                        interactive=False
+                    )
+            # Visualization section
             with gr.Tabs():
                 with gr.TabItem("🌳 Interactive Tree"):
                     tree_html = gr.HTML(
+                        label="Phylogenetic Tree Visualization",
+                        value="<div style='text-align: center; padding: 20px; color: #666;'>Tree visualization will appear here after analysis</div>"
                     )
                 with gr.TabItem("📊 Detailed Report"):
                     report_html = gr.HTML(
                         label="Analysis Report",
+                        value="<div style='text-align: center; padding: 20px; color: #666;'>Detailed report will appear here after analysis</div>"
                     )
+            # File downloads
+            gr.Markdown("## 📥 Download Results")
+            with gr.Row():
+                aligned_file = gr.File(
+                    label="📄 Aligned Sequences",
+                    interactive=False
+                )
+                tree_file = gr.File(
+                    label="🌳 Tree File",
+                    interactive=False
+                )
+            # Event handlers
+            def clear_all():
+                return (
+                    "",  # dna_input
+                    None,  # fasta_file
+                    "",  # boundary_output
+                    "",  # keras_output
+                    "",  # ml_tree_output
+                    "",  # tree_analysis_output
+                    "",  # summary_output
+                    "<div style='text-align: center; padding: 20px; color: #666;'>Tree visualization will appear here after analysis</div>",  # tree_html
+                    "<div style='text-align: center; padding: 20px; color: #666;'>Detailed report will appear here after analysis</div>",  # report_html
+                    None,  # aligned_file
+                    None   # tree_file
+                )
+            # Text analysis
+            analyze_text_btn.click(
                 fn=run_pipeline,
+                inputs=[dna_input, similarity_slider, ml_tree_checkbox],
                 outputs=[
+                    boundary_output,
+                    keras_output,
+                    ml_tree_output,
+                    tree_analysis_output,
+                    summary_output,
+                    aligned_file,
+                    tree_file,
+                    gr.State(),  # placeholder for additional outputs
+                    gr.State(),  # placeholder for additional outputs
+                    tree_html,
+                    report_html
                 ]
             )
+            # File analysis
             analyze_file_btn.click(
                 fn=run_pipeline_from_file,
+                inputs=[fasta_file, similarity_slider, ml_tree_checkbox],
                 outputs=[
+                    boundary_output,
+                    keras_output,
+                    ml_tree_output,
+                    tree_analysis_output,
+                    summary_output,
+                    aligned_file,
+                    tree_file,
+                    gr.State(),  # placeholder for additional outputs
+                    gr.State(),  # placeholder for additional outputs
+                    tree_html,
+                    report_html
                 ]
             )
+            # Clear button
+            clear_btn.click(
+                fn=clear_all,
+                outputs=[
+                    dna_input,
+                    fasta_file,
+                    boundary_output,
+                    keras_output,
+                    ml_tree_output,
+                    tree_analysis_output,
+                    summary_output,
+                    tree_html,
+                    report_html,
+                    aligned_file,
+                    tree_file
+                ]
+            )
+            # Examples
+            gr.Markdown("## 🧪 Example Sequences")
             gr.Examples(
+                examples=[
+                    ["ATGAAACTGCAGCTGAGGTCCCTGGTGGTGAACAAGCTCAGCAGCAAGTGCTGAACTGGATGGGCGAGAAGAGCAACTGCATCCAGTGCAAGCGCCTGAAGAGGAACTGCAAGAAGGTGGTGGACCTGCAGTGCAGCAGCAGCAGCAGCAGCAGCAGCAGC", 95.0, False],
+                    ["ATGAAACTGCAGCTGAGGTCCCTGGTGGTGAACAAGCTCAGCAGCAAGTGCTGAACTGGATGGGCGAGAAGAGCAACTGCATCCAGTGCAAGCGCCTGAAGAGGAACTGCAAGAAGGTGGTGGACCTGCAGTGCAGCAGCAGCAGCAGCAGCAGCAGCAGC", 85.0, True],
+                    ["ATGGAGCTGCAGCTGAGGTCCCTGGTGGTGAACAAGCTCAGCAGCAAGTGCTGAACTGGATGGGCGAGAAGAGCAACTGCATCCAGTGCAAGCGCCTGAAGAGGAACTGCAAGAAGGTGGTGGACCTGCAG", 90.0, False]
+                ],
+                inputs=[dna_input, similarity_slider, ml_tree_checkbox],
                 label="Click to load example sequences"
             )
+            # Footer
+            gr.Markdown("""
+            ---
+            ### 🔬 About This Pipeline
+            This tool performs comprehensive analysis of DNA sequences using multiple approaches:
+            - **🎯 Boundary Detection**: Identifies F gene regions using ML models
+            - **🧠 Keras Validation**: Neural network-based sequence validation
+            - **🌲 ML Tree Placement**: Phylogenetic placement using MAFFT + IQ-TREE
+            - **📈 Tree Analysis**: Interactive phylogenetic analysis and visualization
+            ### 📝 Usage Notes
+            - Sequences should be in ATCG format (other characters will be converted to N)
+            - Minimum 100 bp recommended for phylogenetic placement
+            - Higher similarity thresholds = fewer but more similar sequences
+            - ML tree building requires MAFFT and IQ-TREE (slower but more accurate)
+            ### ⚠️ System Requirements
+            - Python packages: gradio, torch, tensorflow, biopython, plotly
+            - Bioinformatics tools: MAFFT, IQ-TREE (optional for ML placement)
+            - Pre-trained models: boundary detection + keras validation models
+            """)
         return iface
     except Exception as e:
         logger.error(f"Failed to create Gradio interface: {e}")
+        # Fallback simple interface
+        with gr.Blocks() as fallback_iface:
+            gr.Markdown("# 🧬 Gene Analysis Pipeline (Fallback Mode)")
+            gr.Markdown(f"⚠️ Error creating full interface: {str(e)}")
+            dna_input = gr.Textbox(label="DNA Sequence", lines=5)
+            analyze_btn = gr.Button("Analyze")
+            output = gr.Textbox(label="Results", lines=10)
+            analyze_btn.click(
+                fn=lambda seq: run_pipeline(seq, 95.0, False)[4],  # Just return summary
+                inputs=[dna_input],
+                outputs=[output]
+            )
+        return fallback_iface
 # --- Application Startup ---
+if __name__ == "__main__":
     try:
+        # Create Gradio interface
+        gr_interface = create_gradio_interface()
+        # Mount Gradio app to FastAPI
+        gr_app = gr.mount_gradio_app(app, gr_interface, path="/gradio")
+        # Log startup info
+        logger.info("🚀 Starting Gene Analysis Pipeline...")
+        logger.info(f"📁 Base directory: {BASE_DIR}")
+        logger.info(f"🤖 Models loaded: Boundary={boundary_model is not None}, Keras={keras_model is not None}")
+        logger.info(f"🌳 Tree analyzer: {analyzer is not None}")
+        mafft_available, iqtree_available, _, _ = check_tool_availability()
+        logger.info(f"🔬 Tools available: MAFFT={mafft_available}, IQ-TREE={iqtree_available}")
+        # Start server
+        logger.info("🌐 Starting server on http://0.0.0.0:7860")
+        logger.info("📊 FastAPI docs: http://0.0.0.0:7860/docs")
+        logger.info("🎮 Gradio interface: http://0.0.0.0:7860/gradio")
+        uvicorn.run(
+            app,
+            host="0.0.0.0",
+            port=7860,
+            log_level="info",
+            access_log=True
+        )
     except Exception as e:
+        logger.error(f"❌ Startup failed: {e}")
+        print(f"❌ Failed to start application: {e}")
+        sys.exit(1)